From 4123ad6ca7bb7bdcb4cca65e97d30b4677233de3 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Mon, 8 Jul 2024 21:51:36 -0600
Subject: [PATCH 001/357] Prepare 40.0.0 Release (#11343)

---
 Cargo.toml                        |  36 +--
 datafusion-cli/Cargo.lock         |  96 ++++----
 datafusion-cli/Cargo.toml         |   4 +-
 dev/changelog/40.0.0.md           | 371 ++++++++++++++++++++++++++++++
 docs/source/user-guide/configs.md |   2 +-
 5 files changed, 440 insertions(+), 69 deletions(-)
 create mode 100644 dev/changelog/40.0.0.md

diff --git a/Cargo.toml b/Cargo.toml
index 968a74e37f10f..f87205f0d0671 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -53,7 +53,7 @@ license = "Apache-2.0"
 readme = "README.md"
 repository = "https://github.com/apache/datafusion"
 rust-version = "1.76"
-version = "39.0.0"
+version = "40.0.0"
 
 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -86,23 +86,23 @@ bytes = "1.4"
 chrono = { version = "0.4.34", default-features = false }
 ctor = "0.2.0"
 dashmap = "5.5.0"
-datafusion = { path = "datafusion/core", version = "39.0.0", default-features = false }
-datafusion-common = { path = "datafusion/common", version = "39.0.0", default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = "39.0.0" }
-datafusion-execution = { path = "datafusion/execution", version = "39.0.0" }
-datafusion-expr = { path = "datafusion/expr", version = "39.0.0" }
-datafusion-functions = { path = "datafusion/functions", version = "39.0.0" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "39.0.0" }
-datafusion-functions-array = { path = "datafusion/functions-array", version = "39.0.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "39.0.0", default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "39.0.0", default-features = false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "39.0.0", default-features = false }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "39.0.0" }
-datafusion-proto = { path = "datafusion/proto", version = "39.0.0" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = "39.0.0" }
-datafusion-sql = { path = "datafusion/sql", version = "39.0.0" }
-datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "39.0.0" }
-datafusion-substrait = { path = "datafusion/substrait", version = "39.0.0" }
+datafusion = { path = "datafusion/core", version = "40.0.0", default-features = false }
+datafusion-common = { path = "datafusion/common", version = "40.0.0", default-features = false }
+datafusion-common-runtime = { path = "datafusion/common-runtime", version = "40.0.0" }
+datafusion-execution = { path = "datafusion/execution", version = "40.0.0" }
+datafusion-expr = { path = "datafusion/expr", version = "40.0.0" }
+datafusion-functions = { path = "datafusion/functions", version = "40.0.0" }
+datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "40.0.0" }
+datafusion-functions-array = { path = "datafusion/functions-array", version = "40.0.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "40.0.0", default-features = false }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "40.0.0", default-features = false }
+datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "40.0.0", default-features = false }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "40.0.0" }
+datafusion-proto = { path = "datafusion/proto", version = "40.0.0" }
+datafusion-proto-common = { path = "datafusion/proto-common", version = "40.0.0" }
+datafusion-sql = { path = "datafusion/sql", version = "40.0.0" }
+datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "40.0.0" }
+datafusion-substrait = { path = "datafusion/substrait", version = "40.0.0" }
 doc-comment = "0.3"
 env_logger = "0.11"
 futures = "0.3"
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 500e731a5b4f8..42ec5922a73fe 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -381,13 +381,13 @@ dependencies = [
 
 [[package]]
 name = "async-trait"
-version = "0.1.80"
+version = "0.1.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
+checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -875,9 +875,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.0.104"
+version = "1.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490"
+checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2"
 dependencies = [
  "jobserver",
  "libc",
@@ -1099,7 +1099,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -1123,7 +1123,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1177,7 +1177,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "arrow",
  "assert_cmd",
@@ -1204,7 +1204,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1225,14 +1225,14 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-execution"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -1251,7 +1251,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1268,7 +1268,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "arrow",
  "base64 0.22.1",
@@ -1292,7 +1292,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1308,7 +1308,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-array"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1327,7 +1327,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1345,7 +1345,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1373,7 +1373,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1385,7 +1385,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1417,7 +1417,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "39.0.0"
+version = "40.0.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1686,7 +1686,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -2008,7 +2008,7 @@ dependencies = [
  "http 1.1.0",
  "hyper 1.4.0",
  "hyper-util",
- "rustls 0.23.10",
+ "rustls 0.23.11",
  "rustls-native-certs 0.7.1",
  "rustls-pki-types",
  "tokio",
@@ -2699,7 +2699,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -2822,7 +2822,7 @@ dependencies = [
  "quinn-proto",
  "quinn-udp",
  "rustc-hash",
- "rustls 0.23.10",
+ "rustls 0.23.11",
  "thiserror",
  "tokio",
  "tracing",
@@ -2838,7 +2838,7 @@ dependencies = [
  "rand",
  "ring 0.17.8",
  "rustc-hash",
- "rustls 0.23.10",
+ "rustls 0.23.11",
  "slab",
  "thiserror",
  "tinyvec",
@@ -2987,7 +2987,7 @@ dependencies = [
  "percent-encoding",
  "pin-project-lite",
  "quinn",
- "rustls 0.23.10",
+ "rustls 0.23.11",
  "rustls-native-certs 0.7.1",
  "rustls-pemfile 2.1.2",
  "rustls-pki-types",
@@ -3117,9 +3117,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.10"
+version = "0.23.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05cff451f60db80f490f3c182b77c35260baace73209e9cdbbe526bfe3a4d402"
+checksum = "4828ea528154ae444e5a642dbb7d5623354030dc9822b83fd9bb79683c7399d0"
 dependencies = [
  "once_cell",
  "ring 0.17.8",
@@ -3296,22 +3296,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 
 [[package]]
 name = "serde"
-version = "1.0.203"
+version = "1.0.204"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
+checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.203"
+version = "1.0.204"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
+checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3446,7 +3446,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3492,7 +3492,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3505,7 +3505,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3527,9 +3527,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.68"
+version = "2.0.70"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9"
+checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3592,7 +3592,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3647,9 +3647,9 @@ dependencies = [
 
 [[package]]
 name = "tinyvec"
-version = "1.7.0"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce6b6a2fb3a985e99cebfaefa9faa3024743da73304ca1c683a36429613d3d22"
+checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938"
 dependencies = [
  "tinyvec_macros",
 ]
@@ -3687,7 +3687,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3707,7 +3707,7 @@ version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
 dependencies = [
- "rustls 0.23.10",
+ "rustls 0.23.11",
  "rustls-pki-types",
  "tokio",
 ]
@@ -3784,7 +3784,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3829,7 +3829,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
@@ -3983,7 +3983,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
  "wasm-bindgen-shared",
 ]
 
@@ -4017,7 +4017,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -4282,7 +4282,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.70",
 ]
 
 [[package]]
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index bcacf1d52a9b9..860dc123fa94a 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "datafusion-cli"
 description = "Command Line Client for DataFusion query engine."
-version = "39.0.0"
+version = "40.0.0"
 authors = ["Apache DataFusion <dev@datafusion.apache.org>"]
 edition = "2021"
 keywords = ["arrow", "datafusion", "query", "sql"]
@@ -35,7 +35,7 @@ async-trait = "0.1.41"
 aws-config = "0.55"
 aws-credential-types = "0.55"
 clap = { version = "3", features = ["derive", "cargo"] }
-datafusion = { path = "../datafusion/core", version = "39.0.0", features = [
+datafusion = { path = "../datafusion/core", version = "40.0.0", features = [
     "avro",
     "crypto_expressions",
     "datetime_expressions",
diff --git a/dev/changelog/40.0.0.md b/dev/changelog/40.0.0.md
new file mode 100644
index 0000000000000..72143ae48b288
--- /dev/null
+++ b/dev/changelog/40.0.0.md
@@ -0,0 +1,371 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion 40.0.0 Changelog
+
+This release consists of 263 commits from 64 contributors. See credits at the end of this changelog for more information.
+
+**Breaking changes:**
+
+- Convert `StringAgg` to UDAF [#10945](https://github.com/apache/datafusion/pull/10945) (lewiszlw)
+- Convert `bool_and` & `bool_or` to UDAF [#11009](https://github.com/apache/datafusion/pull/11009) (jcsherin)
+- Convert Average to UDAF #10942 [#10964](https://github.com/apache/datafusion/pull/10964) (dharanad)
+- fix: remove the Sized requirement on ExecutionPlan::name() [#11047](https://github.com/apache/datafusion/pull/11047) (waynexia)
+- Return `&Arc` reference to inner trait object [#11103](https://github.com/apache/datafusion/pull/11103) (linhr)
+- Support COPY TO Externally Defined File Formats, add FileType trait [#11060](https://github.com/apache/datafusion/pull/11060) (devinjdangelo)
+- expose table name in proto extension codec [#11139](https://github.com/apache/datafusion/pull/11139) (leoyvens)
+- fix(typo): unqualifed to unqualified [#11159](https://github.com/apache/datafusion/pull/11159) (waynexia)
+- Consolidate `Filter::remove_aliases` into `Expr::unalias_nested` [#11001](https://github.com/apache/datafusion/pull/11001) (alamb)
+- Convert `nth_value` to UDAF [#11287](https://github.com/apache/datafusion/pull/11287) (jcsherin)
+
+**Implemented enhancements:**
+
+- feat: Add support for Int8 and Int16 data types in data page statistics [#10931](https://github.com/apache/datafusion/pull/10931) (Weijun-H)
+- feat: add CliSessionContext trait for cli [#10890](https://github.com/apache/datafusion/pull/10890) (tshauck)
+- feat(optimizer): handle partial anchored regex cases and improve doc [#10977](https://github.com/apache/datafusion/pull/10977) (waynexia)
+- feat: support uint data page extraction [#11018](https://github.com/apache/datafusion/pull/11018) (tshauck)
+- feat: propagate EmptyRelation for more join types [#10963](https://github.com/apache/datafusion/pull/10963) (tshauck)
+- feat: Add method to add analyzer rules to SessionContext [#10849](https://github.com/apache/datafusion/pull/10849) (pingsutw)
+- feat: Support duplicate column names in Joins in Substrait consumer [#11049](https://github.com/apache/datafusion/pull/11049) (Blizzara)
+- feat: Add support for Timestamp data types in data page statistics. [#11123](https://github.com/apache/datafusion/pull/11123) (efredine)
+- feat: Add support for `Binary`/`LargeBinary`/`Utf8`/`LargeUtf8` data types in data page statistics [#11136](https://github.com/apache/datafusion/pull/11136) (PsiACE)
+- feat: Support Map type in Substrait conversions [#11129](https://github.com/apache/datafusion/pull/11129) (Blizzara)
+- feat: Conditionally allow to keep partition_by columns when using PARTITIONED BY enhancement [#11107](https://github.com/apache/datafusion/pull/11107) (hveiga)
+- feat: enable "substring" as a UDF in addition to "substr" [#11277](https://github.com/apache/datafusion/pull/11277) (Blizzara)
+
+**Fixed bugs:**
+
+- fix: use total ordering in the min & max accumulator for floats [#10627](https://github.com/apache/datafusion/pull/10627) (westonpace)
+- fix: Support double quotes in `date_part` [#10833](https://github.com/apache/datafusion/pull/10833) (Weijun-H)
+- fix: Ignore nullability of list elements when consuming Substrait [#10874](https://github.com/apache/datafusion/pull/10874) (Blizzara)
+- fix: Support `NOT <field> IN (<subquery>)` via anti join [#10936](https://github.com/apache/datafusion/pull/10936) (akoshchiy)
+- fix: CTEs defined in a subquery can escape their scope [#10954](https://github.com/apache/datafusion/pull/10954) (jonahgao)
+- fix: Fix the incorrect null joined rows for SMJ outer join with join filter [#10892](https://github.com/apache/datafusion/pull/10892) (viirya)
+- fix: gcd returns negative results [#11099](https://github.com/apache/datafusion/pull/11099) (jonahgao)
+- fix: LCM panicked due to overflow [#11131](https://github.com/apache/datafusion/pull/11131) (jonahgao)
+- fix: Support dictionary type in parquet metadata statistics. [#11169](https://github.com/apache/datafusion/pull/11169) (efredine)
+- fix: Ignore nullability in Substrait structs [#11130](https://github.com/apache/datafusion/pull/11130) (Blizzara)
+- fix: typo in comment about FinalPhysicalPlan [#11181](https://github.com/apache/datafusion/pull/11181) (c8ef)
+- fix: Support Substrait's compound names also for window functions [#11163](https://github.com/apache/datafusion/pull/11163) (Blizzara)
+- fix: Incorrect LEFT JOIN evaluation result on OR conditions [#11203](https://github.com/apache/datafusion/pull/11203) (viirya)
+- fix: Be more lenient in interpreting input args for builtin window functions [#11199](https://github.com/apache/datafusion/pull/11199) (Blizzara)
+- fix: correctly handle Substrait windows with rows bounds (and validate executability of test plans) [#11278](https://github.com/apache/datafusion/pull/11278) (Blizzara)
+- fix: When consuming Substrait, temporarily rename clashing duplicate columns [#11329](https://github.com/apache/datafusion/pull/11329) (Blizzara)
+
+**Documentation updates:**
+
+- Minor: Clarify `SessionContext::state` docs [#10847](https://github.com/apache/datafusion/pull/10847) (alamb)
+- Minor: Update SIGMOD paper reference url [#10860](https://github.com/apache/datafusion/pull/10860) (alamb)
+- docs(variance): Correct typos in comments [#10844](https://github.com/apache/datafusion/pull/10844) (pingsutw)
+- Add missing code close tick in LiteralGuarantee docs [#10859](https://github.com/apache/datafusion/pull/10859) (adriangb)
+- Minor: Add more docs and examples for `Transformed` and `TransformedResult` [#11003](https://github.com/apache/datafusion/pull/11003) (alamb)
+- doc: Update links in the documantation [#11044](https://github.com/apache/datafusion/pull/11044) (Weijun-H)
+- Minor: Examples cleanup + more docs in pruning example [#11086](https://github.com/apache/datafusion/pull/11086) (alamb)
+- Minor: refine documentation pointing to examples [#11110](https://github.com/apache/datafusion/pull/11110) (alamb)
+- Fix running in Docker instructions [#11141](https://github.com/apache/datafusion/pull/11141) (findepi)
+- docs: add example for custom file format with `COPY TO` [#11174](https://github.com/apache/datafusion/pull/11174) (tshauck)
+- Fix docs wordings [#11226](https://github.com/apache/datafusion/pull/11226) (findepi)
+- Fix count() docs around including null values [#11293](https://github.com/apache/datafusion/pull/11293) (findepi)
+
+**Other:**
+
+- chore: Prepare 39.0.0-rc1 [#10828](https://github.com/apache/datafusion/pull/10828) (andygrove)
+- Remove expr_fn::sum and replace them with function stub [#10816](https://github.com/apache/datafusion/pull/10816) (jayzhan211)
+- Debug print as many fields as possible for `SessionState` [#10818](https://github.com/apache/datafusion/pull/10818) (lewiszlw)
+- Prune Parquet RowGroup in a single call to `PruningPredicate::prune`, update StatisticsExtractor API [#10802](https://github.com/apache/datafusion/pull/10802) (alamb)
+- Remove Built-in sum and Rename to lowercase `sum` [#10831](https://github.com/apache/datafusion/pull/10831) (jayzhan211)
+- Convert `stddev` and `stddev_pop` to UDAF [#10834](https://github.com/apache/datafusion/pull/10834) (goldmedal)
+- Introduce expr builder for aggregate function [#10560](https://github.com/apache/datafusion/pull/10560) (jayzhan211)
+- chore: Improve change log generator [#10841](https://github.com/apache/datafusion/pull/10841) (andygrove)
+- Support user defined `ParquetAccessPlan` in `ParquetExec`, validation to `ParquetAccessPlan::select` [#10813](https://github.com/apache/datafusion/pull/10813) (alamb)
+- Convert `VariancePopulation` to UDAF [#10836](https://github.com/apache/datafusion/pull/10836) (mknaw)
+- Convert `approx_median` to UDAF [#10840](https://github.com/apache/datafusion/pull/10840) (goldmedal)
+- MINOR: use workspace deps in proto-common (upgrade object store dependency) [#10848](https://github.com/apache/datafusion/pull/10848) (waynexia)
+- Minor: add `Window::try_new_with_schema` constructor [#10850](https://github.com/apache/datafusion/pull/10850) (sadboy)
+- Add support for reading CSV files with comments [#10467](https://github.com/apache/datafusion/pull/10467) (bbannier)
+- Convert approx_distinct to UDAF [#10851](https://github.com/apache/datafusion/pull/10851) (Lordworms)
+- minor: add proto-common crate to release instructions [#10858](https://github.com/apache/datafusion/pull/10858) (andygrove)
+- Implement TPCH substrait integration teset, support tpch_1 [#10842](https://github.com/apache/datafusion/pull/10842) (Lordworms)
+- Remove unecessary passing around of `suffix: &str` in `pruning.rs`'s `RequiredColumns` [#10863](https://github.com/apache/datafusion/pull/10863) (adriangb)
+- chore: Make DFSchema::datatype_is_logically_equal function public [#10867](https://github.com/apache/datafusion/pull/10867) (advancedxy)
+- Bump braces from 3.0.2 to 3.0.3 in /datafusion/wasmtest/datafusion-wasm-app [#10865](https://github.com/apache/datafusion/pull/10865) (dependabot[bot])
+- Docs: Add `unnest` to SQL Reference [#10839](https://github.com/apache/datafusion/pull/10839) (gloomweaver)
+- Support correct output column names and struct field names when consuming/producing Substrait [#10829](https://github.com/apache/datafusion/pull/10829) (Blizzara)
+- Make Logical Plans more readable by removing extra aliases [#10832](https://github.com/apache/datafusion/pull/10832) (MohamedAbdeen21)
+- Minor: Improve `ListingTable` documentation [#10854](https://github.com/apache/datafusion/pull/10854) (alamb)
+- Extending join fuzz tests to support join filtering [#10728](https://github.com/apache/datafusion/pull/10728) (edmondop)
+- replace and(_, not(_)) with and_not(\*) [#10885](https://github.com/apache/datafusion/pull/10885) (RTEnzyme)
+- Disabling test for semi join with filters [#10887](https://github.com/apache/datafusion/pull/10887) (edmondop)
+- Minor: Update `min_statistics` and `max_statistics` to be helpers, update docs [#10866](https://github.com/apache/datafusion/pull/10866) (alamb)
+- Remove `Interval` column test // parquet extraction [#10888](https://github.com/apache/datafusion/pull/10888) (marvinlanhenke)
+- Minor: SMJ fuzz tests fix for rowcounts [#10891](https://github.com/apache/datafusion/pull/10891) (comphead)
+- Move `Count` to `functions-aggregate`, update MSRV to rust 1.75 [#10484](https://github.com/apache/datafusion/pull/10484) (jayzhan211)
+- refactor: fetch statistics for a given ParquetMetaData [#10880](https://github.com/apache/datafusion/pull/10880) (NGA-TRAN)
+- Move FileSinkExec::metrics to the correct place [#10901](https://github.com/apache/datafusion/pull/10901) (joroKr21)
+- Refine ParquetAccessPlan comments and tests [#10896](https://github.com/apache/datafusion/pull/10896) (alamb)
+- ci: fix clippy failures on main [#10903](https://github.com/apache/datafusion/pull/10903) (jonahgao)
+- Minor: disable flaky fuzz test [#10904](https://github.com/apache/datafusion/pull/10904) (comphead)
+- Remove builtin count [#10893](https://github.com/apache/datafusion/pull/10893) (jayzhan211)
+- Move Regr\_\* functions to use UDAF [#10898](https://github.com/apache/datafusion/pull/10898) (eejbyfeldt)
+- Docs: clarify when the parquet reader will read from object store when using cached metadata [#10909](https://github.com/apache/datafusion/pull/10909) (alamb)
+- Minor: Fix `bench.sh tpch data` [#10905](https://github.com/apache/datafusion/pull/10905) (alamb)
+- Minor: use venv in benchmark compare [#10894](https://github.com/apache/datafusion/pull/10894) (tmi)
+- Support explicit type and name during table creation [#10273](https://github.com/apache/datafusion/pull/10273) (duongcongtoai)
+- Simplify Join Partition Rules [#10911](https://github.com/apache/datafusion/pull/10911) (berkaysynnada)
+- Move `Literal` to `physical-expr-common` [#10910](https://github.com/apache/datafusion/pull/10910) (lewiszlw)
+- chore: update some error messages for clarity [#10916](https://github.com/apache/datafusion/pull/10916) (jeffreyssmith2nd)
+- Initial Extract parquet data page statistics API [#10852](https://github.com/apache/datafusion/pull/10852) (marvinlanhenke)
+- Add contains function, and support in datafusion substrait consumer [#10879](https://github.com/apache/datafusion/pull/10879) (Lordworms)
+- Minor: Improve `arrow_statistics` tests [#10927](https://github.com/apache/datafusion/pull/10927) (alamb)
+- Minor: Remove `prefer_hash_join` env variable for clickbench [#10933](https://github.com/apache/datafusion/pull/10933) (jayzhan211)
+- Convert ApproxPercentileCont and ApproxPercentileContWithWeight to UDAF [#10917](https://github.com/apache/datafusion/pull/10917) (goldmedal)
+- refactor: remove extra default in max rows [#10941](https://github.com/apache/datafusion/pull/10941) (tshauck)
+- chore: Improve performance of Parquet statistics conversion [#10932](https://github.com/apache/datafusion/pull/10932) (Weijun-H)
+- Add catalog::resolve_table_references [#10876](https://github.com/apache/datafusion/pull/10876) (leoyvens)
+- Convert BitAnd, BitOr, BitXor to UDAF [#10930](https://github.com/apache/datafusion/pull/10930) (dharanad)
+- refactor: improve PoolType argument handling for CLI [#10940](https://github.com/apache/datafusion/pull/10940) (tshauck)
+- Minor: remove potential string copy from Column::from_qualified_name [#10947](https://github.com/apache/datafusion/pull/10947) (alamb)
+- Fix: StatisticsConverter `counts` for missing columns [#10946](https://github.com/apache/datafusion/pull/10946) (marvinlanhenke)
+- Add initial support for Utf8View and BinaryView types [#10925](https://github.com/apache/datafusion/pull/10925) (XiangpengHao)
+- Use shorter aliases in CSE [#10939](https://github.com/apache/datafusion/pull/10939) (peter-toth)
+- Substrait support for ParquetExec round trip for simple select [#10949](https://github.com/apache/datafusion/pull/10949) (xinlifoobar)
+- Support to unparse `ScalarValue::IntervalMonthDayNano` to String [#10956](https://github.com/apache/datafusion/pull/10956) (goldmedal)
+- Minor: Return option from row_group_row_count [#10973](https://github.com/apache/datafusion/pull/10973) (marvinlanhenke)
+- Minor: Add routine to debug join fuzz tests [#10970](https://github.com/apache/datafusion/pull/10970) (comphead)
+- Support to unparse `ScalarValue::TimestampNanosecond` to String [#10984](https://github.com/apache/datafusion/pull/10984) (goldmedal)
+- build(deps-dev): bump ws from 8.14.2 to 8.17.1 in /datafusion/wasmtest/datafusion-wasm-app [#10988](https://github.com/apache/datafusion/pull/10988) (dependabot[bot])
+- Minor: reuse Rows buffer in GroupValuesRows [#10980](https://github.com/apache/datafusion/pull/10980) (alamb)
+- Add example for writing SQL analysis using DataFusion structures [#10938](https://github.com/apache/datafusion/pull/10938) (LorrensP-2158466)
+- Push down filter for Unnest plan [#10974](https://github.com/apache/datafusion/pull/10974) (jayzhan211)
+- Add parquet page stats for float{16, 32, 64} [#10982](https://github.com/apache/datafusion/pull/10982) (tmi)
+- Fix `file_stream_provider` example compilation failure on windows [#10975](https://github.com/apache/datafusion/pull/10975) (lewiszlw)
+- Stop copying LogicalPlan and Exprs in `CommonSubexprEliminate` (2-3% planning speed improvement) [#10835](https://github.com/apache/datafusion/pull/10835) (alamb)
+- chore: Update documentation link in `PhysicalOptimizerRule` comment [#11002](https://github.com/apache/datafusion/pull/11002) (Weijun-H)
+- Push down filter plan for unnest on non-unnest column only [#10991](https://github.com/apache/datafusion/pull/10991) (jayzhan211)
+- Minor: add test for pushdown past unnest [#11017](https://github.com/apache/datafusion/pull/11017) (alamb)
+- Update docs for `protoc` minimum installed version [#11006](https://github.com/apache/datafusion/pull/11006) (jcsherin)
+- propagate error instead of panicking on out of bounds in physical-expr/src/analysis.rs [#10992](https://github.com/apache/datafusion/pull/10992) (LorrensP-2158466)
+- Add drop_columns to dataframe api [#11010](https://github.com/apache/datafusion/pull/11010) (Omega359)
+- Push down filter plan for non-unnest column [#11019](https://github.com/apache/datafusion/pull/11019) (jayzhan211)
+- Consider timezones with `UTC` and `+00:00` to be the same [#10960](https://github.com/apache/datafusion/pull/10960) (marvinlanhenke)
+- Deprecate `OptimizerRule::try_optimize` [#11022](https://github.com/apache/datafusion/pull/11022) (lewiszlw)
+- Relax combine partial final rule [#10913](https://github.com/apache/datafusion/pull/10913) (mustafasrepo)
+- Compute gcd with u64 instead of i64 because of overflows [#11036](https://github.com/apache/datafusion/pull/11036) (LorrensP-2158466)
+- Add distinct_on to dataframe api [#11012](https://github.com/apache/datafusion/pull/11012) (Omega359)
+- chore: add test to show current behavior of `AT TIME ZONE` for string vs. timestamp [#11056](https://github.com/apache/datafusion/pull/11056) (appletreeisyellow)
+- Boolean parquet get datapage stat [#11054](https://github.com/apache/datafusion/pull/11054) (LorrensP-2158466)
+- Using display_name for Expr::Aggregation [#11020](https://github.com/apache/datafusion/pull/11020) (Lordworms)
+- Minor: Convert `Count`'s name to lowercase [#11028](https://github.com/apache/datafusion/pull/11028) (jayzhan211)
+- Minor: Move `function::Hint` to `datafusion-expr` crate to avoid physical-expr dependency for `datafusion-function` crate [#11061](https://github.com/apache/datafusion/pull/11061) (jayzhan211)
+- Support to unparse ScalarValue::TimestampMillisecond to String [#11046](https://github.com/apache/datafusion/pull/11046) (pingsutw)
+- Support to unparse IntervalYearMonth and IntervalDayTime to String [#11065](https://github.com/apache/datafusion/pull/11065) (goldmedal)
+- SMJ: fix streaming row concurrency issue for LEFT SEMI filtered join [#11041](https://github.com/apache/datafusion/pull/11041) (comphead)
+- Add `advanced_parquet_index.rs` example of index in into parquet files [#10701](https://github.com/apache/datafusion/pull/10701) (alamb)
+- Add Expr::column_refs to find column references without copying [#10948](https://github.com/apache/datafusion/pull/10948) (alamb)
+- Give `OptimizerRule::try_optimize` default implementation and cleanup duplicated custom implementations [#11059](https://github.com/apache/datafusion/pull/11059) (lewiszlw)
+- Fix `FormatOptions::CSV` propagation [#10912](https://github.com/apache/datafusion/pull/10912) (svranesevic)
+- Support parsing SQL strings to Exprs [#10995](https://github.com/apache/datafusion/pull/10995) (xinlifoobar)
+- Support dictionary data type in array_to_string [#10908](https://github.com/apache/datafusion/pull/10908) (EduardoVega)
+- Implement min/max for interval types [#11015](https://github.com/apache/datafusion/pull/11015) (maxburke)
+- Improve LIKE performance for Dictionary arrays [#11058](https://github.com/apache/datafusion/pull/11058) (Lordworms)
+- handle overflow in gcd and return this as an error [#11057](https://github.com/apache/datafusion/pull/11057) (LorrensP-2158466)
+- Convert Correlation to UDAF [#11064](https://github.com/apache/datafusion/pull/11064) (pingsutw)
+- Migrate more code from `Expr::to_columns` to `Expr::column_refs` [#11067](https://github.com/apache/datafusion/pull/11067) (alamb)
+- decimal support for unparser [#11092](https://github.com/apache/datafusion/pull/11092) (y-f-u)
+- Improve `CommonSubexprEliminate` identifier management (10% faster planning) [#10473](https://github.com/apache/datafusion/pull/10473) (peter-toth)
+- Change wildcard qualifier type from `String` to `TableReference` [#11073](https://github.com/apache/datafusion/pull/11073) (linhr)
+- Allow access to UDTF in `SessionContext` [#11071](https://github.com/apache/datafusion/pull/11071) (linhr)
+- Strip table qualifiers from schema in `UNION ALL` for unparser [#11082](https://github.com/apache/datafusion/pull/11082) (phillipleblanc)
+- Update ListingTable to use StatisticsConverter [#11068](https://github.com/apache/datafusion/pull/11068) (xinlifoobar)
+- to_timestamp functions should preserve timezone [#11038](https://github.com/apache/datafusion/pull/11038) (maxburke)
+- Rewrite array operator to function in parser [#11101](https://github.com/apache/datafusion/pull/11101) (jayzhan211)
+- Resolve empty relation opt for join types [#11066](https://github.com/apache/datafusion/pull/11066) (LorrensP-2158466)
+- Add composed extension codec example [#11095](https://github.com/apache/datafusion/pull/11095) (lewiszlw)
+- Minor: Avoid some repetition in to_timestamp [#11116](https://github.com/apache/datafusion/pull/11116) (alamb)
+- Minor: fix ScalarValue::new_ten error message (cites one not ten) [#11126](https://github.com/apache/datafusion/pull/11126) (gstvg)
+- Deprecate Expr::column_refs [#11115](https://github.com/apache/datafusion/pull/11115) (alamb)
+- Overflow in negate operator [#11084](https://github.com/apache/datafusion/pull/11084) (LorrensP-2158466)
+- Minor: Add Architectural Goals to the docs [#11109](https://github.com/apache/datafusion/pull/11109) (alamb)
+- Fix overflow in pow [#11124](https://github.com/apache/datafusion/pull/11124) (LorrensP-2158466)
+- Support to unparse Time scalar value to String [#11121](https://github.com/apache/datafusion/pull/11121) (goldmedal)
+- Support to unparse `TimestampSecond` and `TimestampMicrosecond` to String [#11120](https://github.com/apache/datafusion/pull/11120) (goldmedal)
+- Add standalone example for `OptimizerRule` [#11087](https://github.com/apache/datafusion/pull/11087) (alamb)
+- Fix overflow in factorial [#11134](https://github.com/apache/datafusion/pull/11134) (LorrensP-2158466)
+- Temporary Fix: Query error when grouping by case expressions [#11133](https://github.com/apache/datafusion/pull/11133) (jonahgao)
+- Fix nullability of return value of array_agg [#11093](https://github.com/apache/datafusion/pull/11093) (eejbyfeldt)
+- Support filter for List [#11091](https://github.com/apache/datafusion/pull/11091) (jayzhan211)
+- [MINOR]: Fix some minor silent bugs [#11127](https://github.com/apache/datafusion/pull/11127) (mustafasrepo)
+- Minor Fix for Logical and Physical Expr Conversions [#11142](https://github.com/apache/datafusion/pull/11142) (berkaysynnada)
+- Support Date Parquet Data Page Statistics [#11135](https://github.com/apache/datafusion/pull/11135) (dharanad)
+- fix flaky array query slt test [#11140](https://github.com/apache/datafusion/pull/11140) (leoyvens)
+- Support Decimal and Decimal256 Parquet Data Page Statistics [#11138](https://github.com/apache/datafusion/pull/11138) (Lordworms)
+- Implement comparisons on nested data types such that distinct/except would work [#11117](https://github.com/apache/datafusion/pull/11117) (rtyler)
+- Minor: dont panic with bad arguments to round [#10899](https://github.com/apache/datafusion/pull/10899) (tmi)
+- Minor: reduce replication for nested comparison [#11149](https://github.com/apache/datafusion/pull/11149) (alamb)
+- [Minor]: Remove datafusion-functions-aggregate dependency from physical-expr crate [#11158](https://github.com/apache/datafusion/pull/11158) (mustafasrepo)
+- adding config to control Varchar behavior [#11090](https://github.com/apache/datafusion/pull/11090) (Lordworms)
+- minor: consolidate `gcd` related tests [#11164](https://github.com/apache/datafusion/pull/11164) (jonahgao)
+- Minor: move batch spilling methods to `lib.rs` to make it reusable [#11154](https://github.com/apache/datafusion/pull/11154) (comphead)
+- Move schema projection to where it's used in ListingTable [#11167](https://github.com/apache/datafusion/pull/11167) (adriangb)
+- Make running in docker instruction be copy-pastable [#11148](https://github.com/apache/datafusion/pull/11148) (findepi)
+- Rewrite `array @> array` and `array <@ array` in sql_expr_to_logical_expr [#11155](https://github.com/apache/datafusion/pull/11155) (jayzhan211)
+- Minor: make some physical_optimizer rules public [#11171](https://github.com/apache/datafusion/pull/11171) (askalt)
+- Remove pr_benchmarks.yml [#11165](https://github.com/apache/datafusion/pull/11165) (alamb)
+- Optionally display schema in explain plan [#11177](https://github.com/apache/datafusion/pull/11177) (alamb)
+- Minor: Add more support for ScalarValue::Float16 [#11156](https://github.com/apache/datafusion/pull/11156) (Lordworms)
+- Minor: fix SQLOptions::with_allow_ddl comments [#11166](https://github.com/apache/datafusion/pull/11166) (alamb)
+- Update sqllogictest requirement from 0.20.0 to 0.21.0 [#11189](https://github.com/apache/datafusion/pull/11189) (dependabot[bot])
+- Support Time Parquet Data Page Statistics [#11187](https://github.com/apache/datafusion/pull/11187) (dharanad)
+- Adds support for Dictionary data type statistics from parquet data pages. [#11195](https://github.com/apache/datafusion/pull/11195) (efredine)
+- [Minor]: Make sort_batch public [#11191](https://github.com/apache/datafusion/pull/11191) (mustafasrepo)
+- Introduce user defined SQL planner API [#11180](https://github.com/apache/datafusion/pull/11180) (jayzhan211)
+- Covert grouping to udaf [#11147](https://github.com/apache/datafusion/pull/11147) (Rachelint)
+- Make statistics_from_parquet_meta a sync function [#11205](https://github.com/apache/datafusion/pull/11205) (adriangb)
+- Allow user defined SQL planners to be registered [#11208](https://github.com/apache/datafusion/pull/11208) (samuelcolvin)
+- Recursive `unnest` [#11062](https://github.com/apache/datafusion/pull/11062) (duongcongtoai)
+- Document how to test examples in user guide, add some more coverage [#11178](https://github.com/apache/datafusion/pull/11178) (alamb)
+- Minor: Move MemoryCatalog\*Provider into a module, improve comments [#11183](https://github.com/apache/datafusion/pull/11183) (alamb)
+- Add standalone example of using the SQL frontend [#11088](https://github.com/apache/datafusion/pull/11088) (alamb)
+- Add Optimizer Sanity Checker, improve sortedness equivalence properties [#11196](https://github.com/apache/datafusion/pull/11196) (mustafasrepo)
+- Implement user defined planner for extract [#11215](https://github.com/apache/datafusion/pull/11215) (xinlifoobar)
+- Move basic SQL query examples to user guide [#11217](https://github.com/apache/datafusion/pull/11217) (alamb)
+- Support FixedSizedBinaryArray Parquet Data Page Statistics [#11200](https://github.com/apache/datafusion/pull/11200) (dharanad)
+- Implement ScalarValue::Map [#11224](https://github.com/apache/datafusion/pull/11224) (goldmedal)
+- Remove unmaintained python pre-commit configuration [#11255](https://github.com/apache/datafusion/pull/11255) (findepi)
+- Enable `clone_on_ref_ptr` clippy lint on execution crate [#11239](https://github.com/apache/datafusion/pull/11239) (lewiszlw)
+- Minor: Improve documentation about pushdown join predicates [#11209](https://github.com/apache/datafusion/pull/11209) (alamb)
+- Minor: clean up data page statistics tests and fix bugs [#11236](https://github.com/apache/datafusion/pull/11236) (efredine)
+- Replacing pattern matching through downcast with trait method [#11257](https://github.com/apache/datafusion/pull/11257) (edmondop)
+- Update substrait requirement from 0.34.0 to 0.35.0 [#11206](https://github.com/apache/datafusion/pull/11206) (dependabot[bot])
+- Enhance short circuit handling in `CommonSubexprEliminate` [#11197](https://github.com/apache/datafusion/pull/11197) (peter-toth)
+- Add bench for data page statistics parquet extraction [#10950](https://github.com/apache/datafusion/pull/10950) (marvinlanhenke)
+- Register SQL planners in `SessionState` constructor [#11253](https://github.com/apache/datafusion/pull/11253) (dharanad)
+- Support DuckDB style struct syntax [#11214](https://github.com/apache/datafusion/pull/11214) (jayzhan211)
+- Enable `clone_on_ref_ptr` clippy lint on expr crate [#11238](https://github.com/apache/datafusion/pull/11238) (lewiszlw)
+- Optimize PushDownFilter to avoid recreating schema columns [#11211](https://github.com/apache/datafusion/pull/11211) (alamb)
+- Remove outdated `rewrite_expr.rs` example [#11085](https://github.com/apache/datafusion/pull/11085) (alamb)
+- Implement TPCH substrait integration teset, support tpch_2 [#11234](https://github.com/apache/datafusion/pull/11234) (Lordworms)
+- Enable `clone_on_ref_ptr` clippy lint on physical-expr crate [#11240](https://github.com/apache/datafusion/pull/11240) (lewiszlw)
+- Add standalone `AnalyzerRule` example that implements row level access control [#11089](https://github.com/apache/datafusion/pull/11089) (alamb)
+- Replace println! with assert! if possible in DataFusion examples [#11237](https://github.com/apache/datafusion/pull/11237) (Nishi46)
+- minor: format `Expr::get_type()` [#11267](https://github.com/apache/datafusion/pull/11267) (jonahgao)
+- Fix hash join for nested types [#11232](https://github.com/apache/datafusion/pull/11232) (eejbyfeldt)
+- Infer count() aggregation is not null [#11256](https://github.com/apache/datafusion/pull/11256) (findepi)
+- Remove unnecessary qualified names [#11292](https://github.com/apache/datafusion/pull/11292) (findepi)
+- Fix running examples readme [#11225](https://github.com/apache/datafusion/pull/11225) (findepi)
+- Minor: Add `ConstExpr::from` and use in physical optimizer [#11283](https://github.com/apache/datafusion/pull/11283) (alamb)
+- Implement TPCH substrait integration teset, support tpch_3 [#11298](https://github.com/apache/datafusion/pull/11298) (Lordworms)
+- Implement user defined planner for position [#11243](https://github.com/apache/datafusion/pull/11243) (xinlifoobar)
+- Upgrade to arrow 52.1.0 (and fix clippy issues on main) [#11302](https://github.com/apache/datafusion/pull/11302) (alamb)
+- AggregateExec: Take grouping sets into account for InputOrderMode [#11301](https://github.com/apache/datafusion/pull/11301) (thinkharderdev)
+- Add user_defined_sql_planners(..) to FunctionRegistry [#11296](https://github.com/apache/datafusion/pull/11296) (Omega359)
+- use safe cast in propagate_constraints [#11297](https://github.com/apache/datafusion/pull/11297) (Lordworms)
+- Minor: Remove clone in optimizer [#11315](https://github.com/apache/datafusion/pull/11315) (jayzhan211)
+- minor: Add `PhysicalSortExpr::new` [#11310](https://github.com/apache/datafusion/pull/11310) (andygrove)
+- Fix data page statistics when all rows are null in a data page [#11295](https://github.com/apache/datafusion/pull/11295) (efredine)
+- Made UserDefinedFunctionPlanner to uniform the usages [#11318](https://github.com/apache/datafusion/pull/11318) (xinlifoobar)
+- Implement user defined planner for `create_struct` & `create_named_struct` [#11273](https://github.com/apache/datafusion/pull/11273) (dharanad)
+- Improve stats convert performance for Binary/String/Boolean arrays [#11319](https://github.com/apache/datafusion/pull/11319) (Rachelint)
+- Fix typos in datafusion-examples/datafusion-cli/docs [#11259](https://github.com/apache/datafusion/pull/11259) (lewiszlw)
+- Minor: Fix Failing TPC-DS Test [#11331](https://github.com/apache/datafusion/pull/11331) (berkaysynnada)
+- HashJoin can preserve the right ordering when join type is Right [#11276](https://github.com/apache/datafusion/pull/11276) (berkaysynnada)
+- Update substrait requirement from 0.35.0 to 0.36.0 [#11328](https://github.com/apache/datafusion/pull/11328) (dependabot[bot])
+- Support to uparse logical plans with timestamp cast to string [#11326](https://github.com/apache/datafusion/pull/11326) (sgrebnov)
+- Implement user defined planner for sql_substring_to_expr [#11327](https://github.com/apache/datafusion/pull/11327) (xinlifoobar)
+- Improve volatile expression handling in `CommonSubexprEliminate` [#11265](https://github.com/apache/datafusion/pull/11265) (peter-toth)
+- Support `IS NULL` and `IS NOT NULL` on Unions [#11321](https://github.com/apache/datafusion/pull/11321) (samuelcolvin)
+- Implement TPCH substrait integration test, support tpch_4 and tpch_5 [#11311](https://github.com/apache/datafusion/pull/11311) (Lordworms)
+- Enable `clone_on_ref_ptr` clippy lint on physical-plan crate [#11241](https://github.com/apache/datafusion/pull/11241) (lewiszlw)
+- Remove any aliases in `Filter::try_new` rather than erroring [#11307](https://github.com/apache/datafusion/pull/11307) (samuelcolvin)
+- Improve `DataFrame` Users Guide [#11324](https://github.com/apache/datafusion/pull/11324) (alamb)
+- chore: Rename UserDefinedSQLPlanner to ExprPlanner [#11338](https://github.com/apache/datafusion/pull/11338) (andygrove)
+- Revert "remove `derive(Copy)` from `Operator` (#11132)" [#11341](https://github.com/apache/datafusion/pull/11341) (alamb)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+    41	Andrew Lamb
+    17	Jay Zhan
+    12	Lordworms
+    12	张林伟
+    10	Arttu
+     9	Jax Liu
+     9	Lorrens Pantelis
+     8	Piotr Findeisen
+     7	Dharan Aditya
+     7	Jonah Gao
+     7	Xin Li
+     6	Andy Grove
+     6	Marvin Lanhenke
+     6	Trent Hauck
+     5	Alex Huang
+     5	Eric Fredine
+     5	Mustafa Akur
+     5	Oleks V
+     5	dependabot[bot]
+     4	Adrian Garcia Badaracco
+     4	Berkay Şahin
+     4	Kevin Su
+     4	Peter Toth
+     4	Ruihang Xia
+     4	Samuel Colvin
+     3	Bruce Ritchie
+     3	Edmondo Porcu
+     3	Emil Ejbyfeldt
+     3	Heran Lin
+     3	Leonardo Yvens
+     3	jcsherin
+     3	tmi
+     2	Duong Cong Toai
+     2	Liang-Chi Hsieh
+     2	Max Burke
+     2	kamille
+     1	Albert Skalt
+     1	Andrey Koshchiy
+     1	Benjamin Bannier
+     1	Bo Lin
+     1	Chojan Shang
+     1	Chunchun Ye
+     1	Dan Harris
+     1	Devin D'Angelo
+     1	Eduardo Vega
+     1	Georgi Krastev
+     1	Hector Veiga
+     1	Jeffrey Smith II
+     1	Kirill Khramkov
+     1	Matt Nawara
+     1	Mohamed Abdeen
+     1	Nga Tran
+     1	Nishi
+     1	Phillip LeBlanc
+     1	R. Tyler Croy
+     1	RT_Enzyme
+     1	Sava Vranešević
+     1	Sergei Grebnov
+     1	Weston Pace
+     1	Xiangpeng Hao
+     1	advancedxy
+     1	c8ef
+     1	gstvg
+     1	yfu
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 303caef577008..579088f991ef2 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -64,7 +64,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.statistics_enabled                         | NULL                      | Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.execution.parquet.max_statistics_size                        | NULL                      | Sets max statistics size for any column. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.execution.parquet.created_by                                 | datafusion version 39.0.0 | Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| datafusion.execution.parquet.created_by                                 | datafusion version 40.0.0 | Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.execution.parquet.column_index_truncate_length               | NULL                      | Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | datafusion.execution.parquet.data_page_row_count_limit                  | 18446744073709551615      | Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.encoding                                   | NULL                      | Sets default encoding for any column Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                               |

From fa0191772e87e04da2598aedb7fe11dd49f88f88 Mon Sep 17 00:00:00 2001
From: Xin Li <33629085+xinlifoobar@users.noreply.github.com>
Date: Tue, 9 Jul 2024 22:12:48 +0900
Subject: [PATCH 002/357] Support `NULL` literals in where clause  (#11266)

* Try fix where clause incorrectly reject NULL literal

* check null in filter
---
 datafusion/expr/src/logical_plan/plan.rs    |  3 +-
 datafusion/physical-plan/src/filter.rs      | 39 +++++++++++++++++----
 datafusion/sqllogictest/test_files/misc.slt | 14 ++++++++
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index bda03fb7087a9..998b5bdcb60c8 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -2123,7 +2123,8 @@ impl Filter {
         // construction (such as with correlated subqueries) so we make a best effort here and
         // ignore errors resolving the expression against the schema.
         if let Ok(predicate_type) = predicate.get_type(input.schema()) {
-            if predicate_type != DataType::Boolean {
+            // Interpret NULL as a missing boolean value.
+            if predicate_type != DataType::Boolean && predicate_type != DataType::Null {
                 return plan_err!(
                     "Cannot create filter with non-boolean predicate '{predicate}' returning {predicate_type}"
                 );
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 96ec6c0cf34da..84afc227578f8 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -31,13 +31,13 @@ use crate::{
     metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
     DisplayFormatType, ExecutionPlan,
 };
-
 use arrow::compute::filter_record_batch;
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow::record_batch::RecordBatch;
-use datafusion_common::cast::as_boolean_array;
+use arrow_array::{Array, BooleanArray};
+use datafusion_common::cast::{as_boolean_array, as_null_array};
 use datafusion_common::stats::Precision;
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
 use datafusion_expr::Operator;
 use datafusion_physical_expr::expressions::BinaryExpr;
@@ -84,6 +84,19 @@ impl FilterExec {
                     cache,
                 })
             }
+            DataType::Null => {
+                let default_selectivity = 0;
+                let cache =
+                    Self::compute_properties(&input, &predicate, default_selectivity)?;
+
+                Ok(Self {
+                    predicate,
+                    input: input.clone(),
+                    metrics: ExecutionPlanMetricsSet::new(),
+                    default_selectivity,
+                    cache,
+                })
+            }
             other => {
                 plan_err!("Filter predicate must return boolean values, not {other:?}")
             }
@@ -355,9 +368,23 @@ pub(crate) fn batch_filter(
         .evaluate(batch)
         .and_then(|v| v.into_array(batch.num_rows()))
         .and_then(|array| {
-            Ok(as_boolean_array(&array)?)
-                // apply filter array to record batch
-                .and_then(|filter_array| Ok(filter_record_batch(batch, filter_array)?))
+            let filter_array = match as_boolean_array(&array) {
+                Ok(boolean_array) => {
+                    Ok(boolean_array.to_owned())
+                },
+                Err(_) => {
+                    let Ok(null_array) = as_null_array(&array) else {
+                        return internal_err!("Cannot create filter_array from non-boolean predicates, unable to continute");
+                    };
+
+                    // if the predicate is null, then the result is also null
+                    Ok::<BooleanArray, DataFusionError>(BooleanArray::new_null(
+                        null_array.len(),
+                    ))
+                }
+            }?;
+
+            Ok(filter_record_batch(batch, &filter_array)?)
         })
 }
 
diff --git a/datafusion/sqllogictest/test_files/misc.slt b/datafusion/sqllogictest/test_files/misc.slt
index 848cdc9439147..66606df834808 100644
--- a/datafusion/sqllogictest/test_files/misc.slt
+++ b/datafusion/sqllogictest/test_files/misc.slt
@@ -24,3 +24,17 @@ query TT?
 select 'foo', '', NULL
 ----
 foo (empty) NULL
+
+# Where clause accept NULL literal
+query I
+select 1 where NULL
+----
+
+query I
+select 1 where NULL and 1 = 1
+----
+
+query I
+select 1 where NULL or 1 = 1
+----
+1
\ No newline at end of file

From e65c3e919855c9977cf4d80c0630ee26b7fd03ee Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 9 Jul 2024 09:13:11 -0400
Subject: [PATCH 003/357] Minor: Add link to blog to main DataFusion website
 (#11356)

---
 docs/source/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 8677b63c916a8..d491df04f7fe7 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -69,6 +69,7 @@ See the `developer’s guide`_ for contributing and `communication`_ for getting
    GitHub and Issue Tracker <https://github.com/apache/datafusion>
    crates.io <https://crates.io/crates/datafusion>
    API Docs <https://docs.rs/datafusion/latest/datafusion/>
+   Blog <https://datafusion.apache.org/blog/>
    Code of conduct <https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md>
    Download <download>
 

From 3d1792fb9f73c6df0ca71e06c89a7bb6e273a740 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Tue, 9 Jul 2024 14:05:59 -0700
Subject: [PATCH 004/357] Implement TPCH substrait integration test, support
 tpch_6, tpch_10, tpch_11 (#11349)

---
 .../tests/cases/consumer_integration.rs       |  131 ++
 .../tpch_substrait_plans/query_10.json        | 1257 +++++++++++++++++
 .../tpch_substrait_plans/query_11.json        | 1059 ++++++++++++++
 .../tpch_substrait_plans/query_6.json         |  585 ++++++++
 4 files changed, 3032 insertions(+)
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_10.json
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_11.json
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_6.json

diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs
index 5d565c0378528..6133c239873b2 100644
--- a/datafusion/substrait/tests/cases/consumer_integration.rs
+++ b/datafusion/substrait/tests/cases/consumer_integration.rs
@@ -124,6 +124,56 @@ mod tests {
         Ok(ctx)
     }
 
+    async fn create_context_tpch6() -> Result<SessionContext> {
+        let ctx = SessionContext::new();
+
+        let registrations =
+            vec![("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/lineitem.csv")];
+
+        for (table_name, file_path) in registrations {
+            register_csv(&ctx, table_name, file_path).await?;
+        }
+
+        Ok(ctx)
+    }
+    // missing context for query 7,8,9
+
+    async fn create_context_tpch10() -> Result<SessionContext> {
+        let ctx = SessionContext::new();
+
+        let registrations = vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/customer.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/orders.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/lineitem.csv"),
+            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/nation.csv"),
+        ];
+
+        for (table_name, file_path) in registrations {
+            register_csv(&ctx, table_name, file_path).await?;
+        }
+
+        Ok(ctx)
+    }
+
+    async fn create_context_tpch11() -> Result<SessionContext> {
+        let ctx = SessionContext::new();
+
+        let registrations = vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/partsupp.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/supplier.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/nation.csv"),
+            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/partsupp.csv"),
+            ("FILENAME_PLACEHOLDER_4", "tests/testdata/tpch/supplier.csv"),
+            ("FILENAME_PLACEHOLDER_5", "tests/testdata/tpch/nation.csv"),
+        ];
+
+        for (table_name, file_path) in registrations {
+            register_csv(&ctx, table_name, file_path).await?;
+        }
+
+        Ok(ctx)
+    }
+
     #[tokio::test]
     async fn tpch_test_1() -> Result<()> {
         let ctx = create_context_tpch1().await?;
@@ -266,4 +316,85 @@ mod tests {
         \n            TableScan: REGION projection=[r_regionkey, r_name, r_comment]");
         Ok(())
     }
+
+    #[tokio::test]
+    async fn tpch_test_6() -> Result<()> {
+        let ctx = create_context_tpch6().await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_6.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Aggregate: groupBy=[[]], aggr=[[sum(FILENAME_PLACEHOLDER_0.l_extendedprice * FILENAME_PLACEHOLDER_0.l_discount) AS REVENUE]]\
+        \n  Projection: FILENAME_PLACEHOLDER_0.l_extendedprice * FILENAME_PLACEHOLDER_0.l_discount\
+        \n    Filter: FILENAME_PLACEHOLDER_0.l_shipdate >= CAST(Utf8(\"1994-01-01\") AS Date32) AND FILENAME_PLACEHOLDER_0.l_shipdate < CAST(Utf8(\"1995-01-01\") AS Date32) AND FILENAME_PLACEHOLDER_0.l_discount >= Decimal128(Some(5),3,2) AND FILENAME_PLACEHOLDER_0.l_discount <= Decimal128(Some(7),3,2) AND FILENAME_PLACEHOLDER_0.l_quantity < CAST(Int32(24) AS Decimal128(19, 0))\
+        \n      TableScan: FILENAME_PLACEHOLDER_0 projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment]");
+        Ok(())
+    }
+
+    // TODO: missing plan 7, 8, 9
+    #[tokio::test]
+    async fn tpch_test_10() -> Result<()> {
+        let ctx = create_context_tpch10().await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_10.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.c_custkey AS C_CUSTKEY, FILENAME_PLACEHOLDER_0.c_name AS C_NAME, sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount) AS REVENUE, FILENAME_PLACEHOLDER_0.c_acctbal AS C_ACCTBAL, FILENAME_PLACEHOLDER_3.n_name AS N_NAME, FILENAME_PLACEHOLDER_0.c_address AS C_ADDRESS, FILENAME_PLACEHOLDER_0.c_phone AS C_PHONE, FILENAME_PLACEHOLDER_0.c_comment AS C_COMMENT\
+        \n  Limit: skip=0, fetch=20\
+        \n    Sort: sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount) DESC NULLS FIRST\
+        \n      Projection: FILENAME_PLACEHOLDER_0.c_custkey, FILENAME_PLACEHOLDER_0.c_name, sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount), FILENAME_PLACEHOLDER_0.c_acctbal, FILENAME_PLACEHOLDER_3.n_name, FILENAME_PLACEHOLDER_0.c_address, FILENAME_PLACEHOLDER_0.c_phone, FILENAME_PLACEHOLDER_0.c_comment\n        Aggregate: groupBy=[[FILENAME_PLACEHOLDER_0.c_custkey, FILENAME_PLACEHOLDER_0.c_name, FILENAME_PLACEHOLDER_0.c_acctbal, FILENAME_PLACEHOLDER_0.c_phone, FILENAME_PLACEHOLDER_3.n_name, FILENAME_PLACEHOLDER_0.c_address, FILENAME_PLACEHOLDER_0.c_comment]], aggr=[[sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount)]]\
+        \n          Projection: FILENAME_PLACEHOLDER_0.c_custkey, FILENAME_PLACEHOLDER_0.c_name, FILENAME_PLACEHOLDER_0.c_acctbal, FILENAME_PLACEHOLDER_0.c_phone, FILENAME_PLACEHOLDER_3.n_name, FILENAME_PLACEHOLDER_0.c_address, FILENAME_PLACEHOLDER_0.c_comment, FILENAME_PLACEHOLDER_2.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_2.l_discount)\
+        \n            Filter: FILENAME_PLACEHOLDER_0.c_custkey = FILENAME_PLACEHOLDER_1.o_custkey AND FILENAME_PLACEHOLDER_2.l_orderkey = FILENAME_PLACEHOLDER_1.o_orderkey AND FILENAME_PLACEHOLDER_1.o_orderdate >= CAST(Utf8(\"1993-10-01\") AS Date32) AND FILENAME_PLACEHOLDER_1.o_orderdate < CAST(Utf8(\"1994-01-01\") AS Date32) AND FILENAME_PLACEHOLDER_2.l_returnflag = Utf8(\"R\") AND FILENAME_PLACEHOLDER_0.c_nationkey = FILENAME_PLACEHOLDER_3.n_nationkey\
+        \n              Inner Join:  Filter: Boolean(true)\
+        \n                Inner Join:  Filter: Boolean(true)\
+        \n                  Inner Join:  Filter: Boolean(true)\
+        \n                    TableScan: FILENAME_PLACEHOLDER_0 projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment]\
+        \n                    TableScan: FILENAME_PLACEHOLDER_1 projection=[o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment]\
+        \n                  TableScan: FILENAME_PLACEHOLDER_2 projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment]\
+        \n                TableScan: FILENAME_PLACEHOLDER_3 projection=[n_nationkey, n_name, n_regionkey, n_comment]");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpch_test_11() -> Result<()> {
+        let ctx = create_context_tpch11().await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_11.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.ps_partkey AS PS_PARTKEY, sum(FILENAME_PLACEHOLDER_0.ps_supplycost * FILENAME_PLACEHOLDER_0.ps_availqty) AS value\
+        \n  Sort: sum(FILENAME_PLACEHOLDER_0.ps_supplycost * FILENAME_PLACEHOLDER_0.ps_availqty) DESC NULLS FIRST\
+        \n    Filter: sum(FILENAME_PLACEHOLDER_0.ps_supplycost * FILENAME_PLACEHOLDER_0.ps_availqty) > (<subquery>)\
+        \n      Subquery:\
+        \n        Projection: sum(FILENAME_PLACEHOLDER_3.ps_supplycost * FILENAME_PLACEHOLDER_3.ps_availqty) * Decimal128(Some(1000000),11,10)\
+        \n          Aggregate: groupBy=[[]], aggr=[[sum(FILENAME_PLACEHOLDER_3.ps_supplycost * FILENAME_PLACEHOLDER_3.ps_availqty)]]\
+        \n            Projection: FILENAME_PLACEHOLDER_3.ps_supplycost * CAST(FILENAME_PLACEHOLDER_3.ps_availqty AS Decimal128(19, 0))\
+        \n              Filter: FILENAME_PLACEHOLDER_3.ps_suppkey = FILENAME_PLACEHOLDER_4.s_suppkey AND FILENAME_PLACEHOLDER_4.s_nationkey = FILENAME_PLACEHOLDER_5.n_nationkey AND FILENAME_PLACEHOLDER_5.n_name = CAST(Utf8(\"JAPAN\") AS Utf8)\
+        \n                Inner Join:  Filter: Boolean(true)\
+        \n                  Inner Join:  Filter: Boolean(true)\
+        \n                    TableScan: FILENAME_PLACEHOLDER_3 projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment]\
+        \n                    TableScan: FILENAME_PLACEHOLDER_4 projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]\
+        \n                  TableScan: FILENAME_PLACEHOLDER_5 projection=[n_nationkey, n_name, n_regionkey, n_comment]\
+        \n      Aggregate: groupBy=[[FILENAME_PLACEHOLDER_0.ps_partkey]], aggr=[[sum(FILENAME_PLACEHOLDER_0.ps_supplycost * FILENAME_PLACEHOLDER_0.ps_availqty)]]\
+        \n        Projection: FILENAME_PLACEHOLDER_0.ps_partkey, FILENAME_PLACEHOLDER_0.ps_supplycost * CAST(FILENAME_PLACEHOLDER_0.ps_availqty AS Decimal128(19, 0))\
+        \n          Filter: FILENAME_PLACEHOLDER_0.ps_suppkey = FILENAME_PLACEHOLDER_1.s_suppkey AND FILENAME_PLACEHOLDER_1.s_nationkey = FILENAME_PLACEHOLDER_2.n_nationkey AND FILENAME_PLACEHOLDER_2.n_name = CAST(Utf8(\"JAPAN\") AS Utf8)\
+        \n            Inner Join:  Filter: Boolean(true)\
+        \n              Inner Join:  Filter: Boolean(true)\
+        \n                TableScan: FILENAME_PLACEHOLDER_0 projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment]\
+        \n                TableScan: FILENAME_PLACEHOLDER_1 projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]\
+        \n              TableScan: FILENAME_PLACEHOLDER_2 projection=[n_nationkey, n_name, n_regionkey, n_comment]");
+        Ok(())
+    }
 }
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_10.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_10.json
new file mode 100644
index 0000000000000..04e13b1edc279
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_10.json
@@ -0,0 +1,1257 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 1,
+      "uri": "/functions_boolean.yaml"
+    },
+    {
+      "extensionUriAnchor": 4,
+      "uri": "/functions_arithmetic_decimal.yaml"
+    },
+    {
+      "extensionUriAnchor": 3,
+      "uri": "/functions_datetime.yaml"
+    },
+    {
+      "extensionUriAnchor": 2,
+      "uri": "/functions_comparison.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "and:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 1,
+        "name": "equal:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 2,
+        "name": "gte:date_date"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 3,
+        "name": "lt:date_date"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 4,
+        "name": "multiply:opt_decimal_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 5,
+        "name": "subtract:opt_decimal_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 6,
+        "name": "sum:opt_decimal"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "fetch": {
+            "common": {
+              "direct": {
+              }
+            },
+            "input": {
+              "sort": {
+                "common": {
+                  "direct": {
+                  }
+                },
+                "input": {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          8,
+                          9,
+                          10,
+                          11,
+                          12,
+                          13,
+                          14,
+                          15
+                        ]
+                      }
+                    },
+                    "input": {
+                      "aggregate": {
+                        "common": {
+                          "direct": {
+                          }
+                        },
+                        "input": {
+                          "project": {
+                            "common": {
+                              "emit": {
+                                "outputMapping": [
+                                  37,
+                                  38,
+                                  39,
+                                  40,
+                                  41,
+                                  42,
+                                  43,
+                                  44
+                                ]
+                              }
+                            },
+                            "input": {
+                              "filter": {
+                                "common": {
+                                  "direct": {
+                                  }
+                                },
+                                "input": {
+                                  "join": {
+                                    "common": {
+                                      "direct": {
+                                      }
+                                    },
+                                    "left": {
+                                      "join": {
+                                        "common": {
+                                          "direct": {
+                                          }
+                                        },
+                                        "left": {
+                                          "join": {
+                                            "common": {
+                                              "direct": {
+                                              }
+                                            },
+                                            "left": {
+                                              "read": {
+                                                "common": {
+                                                  "direct": {
+                                                  }
+                                                },
+                                                "baseSchema": {
+                                                  "names": [
+                                                    "C_CUSTKEY",
+                                                    "C_NAME",
+                                                    "C_ADDRESS",
+                                                    "C_NATIONKEY",
+                                                    "C_PHONE",
+                                                    "C_ACCTBAL",
+                                                    "C_MKTSEGMENT",
+                                                    "C_COMMENT"
+                                                  ],
+                                                  "struct": {
+                                                    "types": [
+                                                      {
+                                                        "i64": {
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_REQUIRED"
+                                                        }
+                                                      },
+                                                      {
+                                                        "varchar": {
+                                                          "length": 25,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "varchar": {
+                                                          "length": 40,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "i64": {
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_REQUIRED"
+                                                        }
+                                                      },
+                                                      {
+                                                        "fixedChar": {
+                                                          "length": 15,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "decimal": {
+                                                          "scale": 0,
+                                                          "precision": 19,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "fixedChar": {
+                                                          "length": 10,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "varchar": {
+                                                          "length": 117,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      }
+                                                    ],
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                  }
+                                                },
+                                                "local_files": {
+                                                  "items": [
+                                                    {
+                                                      "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                                                      "parquet": {}
+                                                    }
+                                                  ]
+                                                }
+                                              }
+                                            },
+                                            "right": {
+                                              "read": {
+                                                "common": {
+                                                  "direct": {
+                                                  }
+                                                },
+                                                "baseSchema": {
+                                                  "names": [
+                                                    "O_ORDERKEY",
+                                                    "O_CUSTKEY",
+                                                    "O_ORDERSTATUS",
+                                                    "O_TOTALPRICE",
+                                                    "O_ORDERDATE",
+                                                    "O_ORDERPRIORITY",
+                                                    "O_CLERK",
+                                                    "O_SHIPPRIORITY",
+                                                    "O_COMMENT"
+                                                  ],
+                                                  "struct": {
+                                                    "types": [
+                                                      {
+                                                        "i64": {
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_REQUIRED"
+                                                        }
+                                                      },
+                                                      {
+                                                        "i64": {
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_REQUIRED"
+                                                        }
+                                                      },
+                                                      {
+                                                        "fixedChar": {
+                                                          "length": 1,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "decimal": {
+                                                          "scale": 0,
+                                                          "precision": 19,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "date": {
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "fixedChar": {
+                                                          "length": 15,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "fixedChar": {
+                                                          "length": 15,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "i32": {
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      },
+                                                      {
+                                                        "varchar": {
+                                                          "length": 79,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      }
+                                                    ],
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                  }
+                                                },
+                                                "local_files": {
+                                                  "items": [
+                                                    {
+                                                      "uri_file": "file://FILENAME_PLACEHOLDER_1",
+                                                      "parquet": {}
+                                                    }
+                                                  ]
+                                                }
+                                              }
+                                            },
+                                            "expression": {
+                                              "literal": {
+                                                "boolean": true,
+                                                "nullable": false,
+                                                "typeVariationReference": 0
+                                              }
+                                            },
+                                            "type": "JOIN_TYPE_INNER"
+                                          }
+                                        },
+                                        "right": {
+                                          "read": {
+                                            "common": {
+                                              "direct": {
+                                              }
+                                            },
+                                            "baseSchema": {
+                                              "names": [
+                                                "L_ORDERKEY",
+                                                "L_PARTKEY",
+                                                "L_SUPPKEY",
+                                                "L_LINENUMBER",
+                                                "L_QUANTITY",
+                                                "L_EXTENDEDPRICE",
+                                                "L_DISCOUNT",
+                                                "L_TAX",
+                                                "L_RETURNFLAG",
+                                                "L_LINESTATUS",
+                                                "L_SHIPDATE",
+                                                "L_COMMITDATE",
+                                                "L_RECEIPTDATE",
+                                                "L_SHIPINSTRUCT",
+                                                "L_SHIPMODE",
+                                                "L_COMMENT"
+                                              ],
+                                              "struct": {
+                                                "types": [
+                                                  {
+                                                    "i64": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  {
+                                                    "i64": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  {
+                                                    "i64": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  {
+                                                    "i32": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "fixedChar": {
+                                                      "length": 1,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "fixedChar": {
+                                                      "length": 1,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "date": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "date": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "date": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "fixedChar": {
+                                                      "length": 25,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "fixedChar": {
+                                                      "length": 10,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "varchar": {
+                                                      "length": 44,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  }
+                                                ],
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_REQUIRED"
+                                              }
+                                            },
+                                            "local_files": {
+                                              "items": [
+                                                {
+                                                  "uri_file": "file://FILENAME_PLACEHOLDER_2",
+                                                  "parquet": {}
+                                                }
+                                              ]
+                                            }
+                                          }
+                                        },
+                                        "expression": {
+                                          "literal": {
+                                            "boolean": true,
+                                            "nullable": false,
+                                            "typeVariationReference": 0
+                                          }
+                                        },
+                                        "type": "JOIN_TYPE_INNER"
+                                      }
+                                    },
+                                    "right": {
+                                      "read": {
+                                        "common": {
+                                          "direct": {
+                                          }
+                                        },
+                                        "baseSchema": {
+                                          "names": [
+                                            "N_NATIONKEY",
+                                            "N_NAME",
+                                            "N_REGIONKEY",
+                                            "N_COMMENT"
+                                          ],
+                                          "struct": {
+                                            "types": [
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 25,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "varchar": {
+                                                  "length": 152,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }
+                                            ],
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "local_files": {
+                                          "items": [
+                                            {
+                                              "uri_file": "file://FILENAME_PLACEHOLDER_3",
+                                              "parquet": {}
+                                            }
+                                          ]
+                                        }
+                                      }
+                                    },
+                                    "expression": {
+                                      "literal": {
+                                        "boolean": true,
+                                        "nullable": false,
+                                        "typeVariationReference": 0
+                                      }
+                                    },
+                                    "type": "JOIN_TYPE_INNER"
+                                  }
+                                },
+                                "condition": {
+                                  "scalarFunction": {
+                                    "functionReference": 0,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_NULLABLE"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_REQUIRED"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 0
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 9
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_REQUIRED"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 17
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 8
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 2,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 12
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "cast": {
+                                                    "type": {
+                                                      "date": {
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                      }
+                                                    },
+                                                    "input": {
+                                                      "literal": {
+                                                        "fixedChar": "1993-10-01",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    },
+                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 3,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 12
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "cast": {
+                                                    "type": {
+                                                      "date": {
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                      }
+                                                    },
+                                                    "input": {
+                                                      "literal": {
+                                                        "fixedChar": "1994-01-01",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    },
+                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 25
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "literal": {
+                                                    "fixedChar": "R",
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_REQUIRED"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 3
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 33
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              }
+                            },
+                            "expressions": [
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 0
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 1
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 5
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 4
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 34
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 2
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 7
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "scalarFunction": {
+                                  "functionReference": 4,
+                                  "args": [],
+                                  "outputType": {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  "arguments": [
+                                    {
+                                      "value": {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 22
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }
+                                    },
+                                    {
+                                      "value": {
+                                        "scalarFunction": {
+                                          "functionReference": 5,
+                                          "args": [],
+                                          "outputType": {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          "arguments": [
+                                            {
+                                              "value": {
+                                                "cast": {
+                                                  "type": {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  "input": {
+                                                    "literal": {
+                                                      "i32": 1,
+                                                      "nullable": false,
+                                                      "typeVariationReference": 0
+                                                    }
+                                                  },
+                                                  "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                }
+                                              }
+                                            },
+                                            {
+                                              "value": {
+                                                "selection": {
+                                                  "directReference": {
+                                                    "structField": {
+                                                      "field": 23
+                                                    }
+                                                  },
+                                                  "rootReference": {
+                                                  }
+                                                }
+                                              }
+                                            }
+                                          ]
+                                        }
+                                      }
+                                    }
+                                  ]
+                                }
+                              }
+                            ]
+                          }
+                        },
+                        "groupings": [
+                          {
+                            "groupingExpressions": [
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 0
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 1
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 2
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 3
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 4
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 5
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              },
+                              {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 6
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              }
+                            ]
+                          }
+                        ],
+                        "measures": [
+                          {
+                            "measure": {
+                              "functionReference": 6,
+                              "args": [],
+                              "sorts": [],
+                              "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                              "outputType": {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              "invocation": "AGGREGATION_INVOCATION_ALL",
+                              "arguments": [
+                                {
+                                  "value": {
+                                    "selection": {
+                                      "directReference": {
+                                        "structField": {
+                                          "field": 7
+                                        }
+                                      },
+                                      "rootReference": {
+                                      }
+                                    }
+                                  }
+                                }
+                              ]
+                            }
+                          }
+                        ]
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 0
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 1
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 7
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 2
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 4
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 5
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 3
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 6
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      }
+                    ]
+                  }
+                },
+                "sorts": [
+                  {
+                    "expr": {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 2
+                          }
+                        },
+                        "rootReference": {
+                        }
+                      }
+                    },
+                    "direction": "SORT_DIRECTION_DESC_NULLS_FIRST"
+                  }
+                ]
+              }
+            },
+            "offset": "0",
+            "count": "20"
+          }
+        },
+        "names": [
+          "C_CUSTKEY",
+          "C_NAME",
+          "REVENUE",
+          "C_ACCTBAL",
+          "N_NAME",
+          "C_ADDRESS",
+          "C_PHONE",
+          "C_COMMENT"
+        ]
+      }
+    }
+  ],
+  "expectedTypeUrls": []
+}
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_11.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_11.json
new file mode 100644
index 0000000000000..916bc6f71c2cb
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_11.json
@@ -0,0 +1,1059 @@
+{
+  "extensionUris": [{
+    "extensionUriAnchor": 1,
+    "uri": "/functions_boolean.yaml"
+  }, {
+    "extensionUriAnchor": 3,
+    "uri": "/functions_arithmetic_decimal.yaml"
+  }, {
+    "extensionUriAnchor": 2,
+    "uri": "/functions_comparison.yaml"
+  }],
+  "extensions": [{
+    "extensionFunction": {
+      "extensionUriReference": 1,
+      "functionAnchor": 0,
+      "name": "and:bool"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 2,
+      "functionAnchor": 1,
+      "name": "equal:any1_any1"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 3,
+      "functionAnchor": 2,
+      "name": "multiply:opt_decimal_decimal"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 3,
+      "functionAnchor": 3,
+      "name": "sum:opt_decimal"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 2,
+      "functionAnchor": 4,
+      "name": "gt:any1_any1"
+    }
+  }],
+  "relations": [{
+    "root": {
+      "input": {
+        "sort": {
+          "common": {
+            "direct": {
+            }
+          },
+          "input": {
+            "filter": {
+              "common": {
+                "direct": {
+                }
+              },
+              "input": {
+                "aggregate": {
+                  "common": {
+                    "direct": {
+                    }
+                  },
+                  "input": {
+                    "project": {
+                      "common": {
+                        "emit": {
+                          "outputMapping": [16, 17]
+                        }
+                      },
+                      "input": {
+                        "filter": {
+                          "common": {
+                            "direct": {
+                            }
+                          },
+                          "input": {
+                            "join": {
+                              "common": {
+                                "direct": {
+                                }
+                              },
+                              "left": {
+                                "join": {
+                                  "common": {
+                                    "direct": {
+                                    }
+                                  },
+                                  "left": {
+                                    "read": {
+                                      "common": {
+                                        "direct": {
+                                        }
+                                      },
+                                      "baseSchema": {
+                                        "names": ["PS_PARTKEY", "PS_SUPPKEY", "PS_AVAILQTY", "PS_SUPPLYCOST", "PS_COMMENT"],
+                                        "struct": {
+                                          "types": [{
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          }, {
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          }, {
+                                            "i32": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "varchar": {
+                                              "length": 199,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }],
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      },
+                                      "local_files": {
+                                        "items": [
+                                          {
+                                            "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                                            "parquet": {}
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  "right": {
+                                    "read": {
+                                      "common": {
+                                        "direct": {
+                                        }
+                                      },
+                                      "baseSchema": {
+                                        "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"],
+                                        "struct": {
+                                          "types": [{
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          }, {
+                                            "fixedChar": {
+                                              "length": 25,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "varchar": {
+                                              "length": 40,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          }, {
+                                            "fixedChar": {
+                                              "length": 15,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "varchar": {
+                                              "length": 101,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }],
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      },
+                                      "local_files": {
+                                        "items": [
+                                          {
+                                            "uri_file": "file://FILENAME_PLACEHOLDER_1",
+                                            "parquet": {}
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  "expression": {
+                                    "literal": {
+                                      "boolean": true,
+                                      "nullable": false,
+                                      "typeVariationReference": 0
+                                    }
+                                  },
+                                  "type": "JOIN_TYPE_INNER"
+                                }
+                              },
+                              "right": {
+                                "read": {
+                                  "common": {
+                                    "direct": {
+                                    }
+                                  },
+                                  "baseSchema": {
+                                    "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"],
+                                    "struct": {
+                                      "types": [{
+                                        "i64": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      }, {
+                                        "fixedChar": {
+                                          "length": 25,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      }, {
+                                        "i64": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      }, {
+                                        "varchar": {
+                                          "length": 152,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      }],
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  "local_files": {
+                                    "items": [
+                                      {
+                                        "uri_file": "file://FILENAME_PLACEHOLDER_2",
+                                        "parquet": {}
+                                      }
+                                    ]
+                                  }
+                                }
+                              },
+                              "expression": {
+                                "literal": {
+                                  "boolean": true,
+                                  "nullable": false,
+                                  "typeVariationReference": 0
+                                }
+                              },
+                              "type": "JOIN_TYPE_INNER"
+                            }
+                          },
+                          "condition": {
+                            "scalarFunction": {
+                              "functionReference": 0,
+                              "args": [],
+                              "outputType": {
+                                "bool": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              "arguments": [{
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 1,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_REQUIRED"
+                                      }
+                                    },
+                                    "arguments": [{
+                                      "value": {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 1
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }
+                                    }, {
+                                      "value": {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 5
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }
+                                    }]
+                                  }
+                                }
+                              }, {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 1,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_REQUIRED"
+                                      }
+                                    },
+                                    "arguments": [{
+                                      "value": {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 8
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }
+                                    }, {
+                                      "value": {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 12
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }
+                                    }]
+                                  }
+                                }
+                              }, {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 1,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_NULLABLE"
+                                      }
+                                    },
+                                    "arguments": [{
+                                      "value": {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 13
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }
+                                    }, {
+                                      "value": {
+                                        "cast": {
+                                          "type": {
+                                            "fixedChar": {
+                                              "length": 25,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          },
+                                          "input": {
+                                            "literal": {
+                                              "fixedChar": "JAPAN",
+                                              "nullable": false,
+                                              "typeVariationReference": 0
+                                            }
+                                          },
+                                          "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                        }
+                                      }
+                                    }]
+                                  }
+                                }
+                              }]
+                            }
+                          }
+                        }
+                      },
+                      "expressions": [{
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 0
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      }, {
+                        "scalarFunction": {
+                          "functionReference": 2,
+                          "args": [],
+                          "outputType": {
+                            "decimal": {
+                              "scale": 0,
+                              "precision": 19,
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          },
+                          "arguments": [{
+                            "value": {
+                              "selection": {
+                                "directReference": {
+                                  "structField": {
+                                    "field": 3
+                                  }
+                                },
+                                "rootReference": {
+                                }
+                              }
+                            }
+                          }, {
+                            "value": {
+                              "cast": {
+                                "type": {
+                                  "decimal": {
+                                    "scale": 0,
+                                    "precision": 19,
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "input": {
+                                  "selection": {
+                                    "directReference": {
+                                      "structField": {
+                                        "field": 2
+                                      }
+                                    },
+                                    "rootReference": {
+                                    }
+                                  }
+                                },
+                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                              }
+                            }
+                          }]
+                        }
+                      }]
+                    }
+                  },
+                  "groupings": [{
+                    "groupingExpressions": [{
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 0
+                          }
+                        },
+                        "rootReference": {
+                        }
+                      }
+                    }]
+                  }],
+                  "measures": [{
+                    "measure": {
+                      "functionReference": 3,
+                      "args": [],
+                      "sorts": [],
+                      "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                      "outputType": {
+                        "decimal": {
+                          "scale": 0,
+                          "precision": 19,
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      "invocation": "AGGREGATION_INVOCATION_ALL",
+                      "arguments": [{
+                        "value": {
+                          "selection": {
+                            "directReference": {
+                              "structField": {
+                                "field": 1
+                              }
+                            },
+                            "rootReference": {
+                            }
+                          }
+                        }
+                      }]
+                    }
+                  }]
+                }
+              },
+              "condition": {
+                "scalarFunction": {
+                  "functionReference": 4,
+                  "args": [],
+                  "outputType": {
+                    "bool": {
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  },
+                  "arguments": [{
+                    "value": {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 1
+                          }
+                        },
+                        "rootReference": {
+                        }
+                      }
+                    }
+                  }, {
+                    "value": {
+                      "subquery": {
+                        "scalar": {
+                          "input": {
+                            "project": {
+                              "common": {
+                                "emit": {
+                                  "outputMapping": [1]
+                                }
+                              },
+                              "input": {
+                                "aggregate": {
+                                  "common": {
+                                    "direct": {
+                                    }
+                                  },
+                                  "input": {
+                                    "project": {
+                                      "common": {
+                                        "emit": {
+                                          "outputMapping": [16]
+                                        }
+                                      },
+                                      "input": {
+                                        "filter": {
+                                          "common": {
+                                            "direct": {
+                                            }
+                                          },
+                                          "input": {
+                                            "join": {
+                                              "common": {
+                                                "direct": {
+                                                }
+                                              },
+                                              "left": {
+                                                "join": {
+                                                  "common": {
+                                                    "direct": {
+                                                    }
+                                                  },
+                                                  "left": {
+                                                    "read": {
+                                                      "common": {
+                                                        "direct": {
+                                                        }
+                                                      },
+                                                      "baseSchema": {
+                                                        "names": ["PS_PARTKEY", "PS_SUPPKEY", "PS_AVAILQTY", "PS_SUPPLYCOST", "PS_COMMENT"],
+                                                        "struct": {
+                                                          "types": [{
+                                                            "i64": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_REQUIRED"
+                                                            }
+                                                          }, {
+                                                            "i64": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_REQUIRED"
+                                                            }
+                                                          }, {
+                                                            "i32": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          }, {
+                                                            "decimal": {
+                                                              "scale": 0,
+                                                              "precision": 19,
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          }, {
+                                                            "varchar": {
+                                                              "length": 199,
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          }],
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_REQUIRED"
+                                                        }
+                                                      },
+                                                      "local_files": {
+                                                        "items": [
+                                                          {
+                                                            "uri_file": "file://FILENAME_PLACEHOLDER_3",
+                                                            "parquet": {}
+                                                          }
+                                                        ]
+                                                      }
+                                                    }
+                                                  },
+                                                  "right": {
+                                                    "read": {
+                                                      "common": {
+                                                        "direct": {
+                                                        }
+                                                      },
+                                                      "baseSchema": {
+                                                        "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"],
+                                                        "struct": {
+                                                          "types": [{
+                                                            "i64": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_REQUIRED"
+                                                            }
+                                                          }, {
+                                                            "fixedChar": {
+                                                              "length": 25,
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          }, {
+                                                            "varchar": {
+                                                              "length": 40,
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          }, {
+                                                            "i64": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_REQUIRED"
+                                                            }
+                                                          }, {
+                                                            "fixedChar": {
+                                                              "length": 15,
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          }, {
+                                                            "decimal": {
+                                                              "scale": 0,
+                                                              "precision": 19,
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          }, {
+                                                            "varchar": {
+                                                              "length": 101,
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          }],
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_REQUIRED"
+                                                        }
+                                                      },
+                                                      "local_files": {
+                                                        "items": [
+                                                          {
+                                                            "uri_file": "file://FILENAME_PLACEHOLDER_4",
+                                                            "parquet": {}
+                                                          }
+                                                        ]
+                                                      }
+                                                    }
+                                                  },
+                                                  "expression": {
+                                                    "literal": {
+                                                      "boolean": true,
+                                                      "nullable": false,
+                                                      "typeVariationReference": 0
+                                                    }
+                                                  },
+                                                  "type": "JOIN_TYPE_INNER"
+                                                }
+                                              },
+                                              "right": {
+                                                "read": {
+                                                  "common": {
+                                                    "direct": {
+                                                    }
+                                                  },
+                                                  "baseSchema": {
+                                                    "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"],
+                                                    "struct": {
+                                                      "types": [{
+                                                        "i64": {
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_REQUIRED"
+                                                        }
+                                                      }, {
+                                                        "fixedChar": {
+                                                          "length": 25,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      }, {
+                                                        "i64": {
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_REQUIRED"
+                                                        }
+                                                      }, {
+                                                        "varchar": {
+                                                          "length": 152,
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                        }
+                                                      }],
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  "local_files": {
+                                                    "items": [
+                                                      {
+                                                        "uri_file": "file://FILENAME_PLACEHOLDER_5",
+                                                        "parquet": {}
+                                                      }
+                                                    ]
+                                                  }
+                                                }
+                                              },
+                                              "expression": {
+                                                "literal": {
+                                                  "boolean": true,
+                                                  "nullable": false,
+                                                  "typeVariationReference": 0
+                                                }
+                                              },
+                                              "type": "JOIN_TYPE_INNER"
+                                            }
+                                          },
+                                          "condition": {
+                                            "scalarFunction": {
+                                              "functionReference": 0,
+                                              "args": [],
+                                              "outputType": {
+                                                "bool": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              "arguments": [{
+                                                "value": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 1,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "bool": {
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                      }
+                                                    },
+                                                    "arguments": [{
+                                                      "value": {
+                                                        "selection": {
+                                                          "directReference": {
+                                                            "structField": {
+                                                              "field": 1
+                                                            }
+                                                          },
+                                                          "rootReference": {
+                                                          }
+                                                        }
+                                                      }
+                                                    }, {
+                                                      "value": {
+                                                        "selection": {
+                                                          "directReference": {
+                                                            "structField": {
+                                                              "field": 5
+                                                            }
+                                                          },
+                                                          "rootReference": {
+                                                          }
+                                                        }
+                                                      }
+                                                    }]
+                                                  }
+                                                }
+                                              }, {
+                                                "value": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 1,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "bool": {
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                      }
+                                                    },
+                                                    "arguments": [{
+                                                      "value": {
+                                                        "selection": {
+                                                          "directReference": {
+                                                            "structField": {
+                                                              "field": 8
+                                                            }
+                                                          },
+                                                          "rootReference": {
+                                                          }
+                                                        }
+                                                      }
+                                                    }, {
+                                                      "value": {
+                                                        "selection": {
+                                                          "directReference": {
+                                                            "structField": {
+                                                              "field": 12
+                                                            }
+                                                          },
+                                                          "rootReference": {
+                                                          }
+                                                        }
+                                                      }
+                                                    }]
+                                                  }
+                                                }
+                                              }, {
+                                                "value": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 1,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "bool": {
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "arguments": [{
+                                                      "value": {
+                                                        "selection": {
+                                                          "directReference": {
+                                                            "structField": {
+                                                              "field": 13
+                                                            }
+                                                          },
+                                                          "rootReference": {
+                                                          }
+                                                        }
+                                                      }
+                                                    }, {
+                                                      "value": {
+                                                        "cast": {
+                                                          "type": {
+                                                            "fixedChar": {
+                                                              "length": 25,
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_REQUIRED"
+                                                            }
+                                                          },
+                                                          "input": {
+                                                            "literal": {
+                                                              "fixedChar": "JAPAN",
+                                                              "nullable": false,
+                                                              "typeVariationReference": 0
+                                                            }
+                                                          },
+                                                          "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                        }
+                                                      }
+                                                    }]
+                                                  }
+                                                }
+                                              }]
+                                            }
+                                          }
+                                        }
+                                      },
+                                      "expressions": [{
+                                        "scalarFunction": {
+                                          "functionReference": 2,
+                                          "args": [],
+                                          "outputType": {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          "arguments": [{
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 3
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          }, {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "decimal": {
+                                                    "scale": 0,
+                                                    "precision": 19,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 2
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }]
+                                        }
+                                      }]
+                                    }
+                                  },
+                                  "groupings": [{
+                                    "groupingExpressions": []
+                                  }],
+                                  "measures": [{
+                                    "measure": {
+                                      "functionReference": 3,
+                                      "args": [],
+                                      "sorts": [],
+                                      "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                                      "outputType": {
+                                        "decimal": {
+                                          "scale": 0,
+                                          "precision": 19,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      "invocation": "AGGREGATION_INVOCATION_ALL",
+                                      "arguments": [{
+                                        "value": {
+                                          "selection": {
+                                            "directReference": {
+                                              "structField": {
+                                                "field": 0
+                                              }
+                                            },
+                                            "rootReference": {
+                                            }
+                                          }
+                                        }
+                                      }]
+                                    }
+                                  }]
+                                }
+                              },
+                              "expressions": [{
+                                "scalarFunction": {
+                                  "functionReference": 2,
+                                  "args": [],
+                                  "outputType": {
+                                    "decimal": {
+                                      "scale": 10,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  "arguments": [{
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 0
+                                          }
+                                        },
+                                        "rootReference": {
+                                        }
+                                      }
+                                    }
+                                  }, {
+                                    "value": {
+                                      "literal": {
+                                        "decimal": {
+                                          "value": "QEIPAAAAAAAAAAAAAAAAAA==",
+                                          "precision": 11,
+                                          "scale": 10
+                                        },
+                                        "nullable": false,
+                                        "typeVariationReference": 0
+                                      }
+                                    }
+                                  }]
+                                }
+                              }]
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }]
+                }
+              }
+            }
+          },
+          "sorts": [{
+            "expr": {
+              "selection": {
+                "directReference": {
+                  "structField": {
+                    "field": 1
+                  }
+                },
+                "rootReference": {
+                }
+              }
+            },
+            "direction": "SORT_DIRECTION_DESC_NULLS_FIRST"
+          }]
+        }
+      },
+      "names": ["PS_PARTKEY", "value"]
+    }
+  }],
+  "expectedTypeUrls": []
+}
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_6.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_6.json
new file mode 100644
index 0000000000000..18fb9781da551
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_6.json
@@ -0,0 +1,585 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 1,
+      "uri": "/functions_boolean.yaml"
+    },
+    {
+      "extensionUriAnchor": 4,
+      "uri": "/functions_arithmetic_decimal.yaml"
+    },
+    {
+      "extensionUriAnchor": 2,
+      "uri": "/functions_datetime.yaml"
+    },
+    {
+      "extensionUriAnchor": 3,
+      "uri": "/functions_comparison.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "and:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 1,
+        "name": "gte:date_date"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 2,
+        "name": "lt:date_date"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 3,
+        "name": "gte:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 4,
+        "name": "lte:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 5,
+        "name": "lt:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 6,
+        "name": "multiply:opt_decimal_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 7,
+        "name": "sum:opt_decimal"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "aggregate": {
+            "common": {
+              "direct": {}
+            },
+            "input": {
+              "project": {
+                "common": {
+                  "emit": {
+                    "outputMapping": [
+                      16
+                    ]
+                  }
+                },
+                "input": {
+                  "filter": {
+                    "common": {
+                      "direct": {}
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "L_ORDERKEY",
+                            "L_PARTKEY",
+                            "L_SUPPKEY",
+                            "L_LINENUMBER",
+                            "L_QUANTITY",
+                            "L_EXTENDEDPRICE",
+                            "L_DISCOUNT",
+                            "L_TAX",
+                            "L_RETURNFLAG",
+                            "L_LINESTATUS",
+                            "L_SHIPDATE",
+                            "L_COMMITDATE",
+                            "L_RECEIPTDATE",
+                            "L_SHIPINSTRUCT",
+                            "L_SHIPMODE",
+                            "L_COMMENT"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              },
+                              {
+                                "i64": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              },
+                              {
+                                "i64": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              },
+                              {
+                                "i32": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "fixedChar": {
+                                  "length": 1,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "fixedChar": {
+                                  "length": 1,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "date": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "date": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "date": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "fixedChar": {
+                                  "length": 25,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "fixedChar": {
+                                  "length": 10,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              {
+                                "varchar": {
+                                  "length": 44,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "typeVariationReference": 0,
+                            "nullability": "NULLABILITY_REQUIRED"
+                          }
+                        },
+                        "local_files": {
+                          "items": [
+                            {
+                              "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                              "parquet": {}
+                            }
+                          ]
+                        }
+                      }
+                    },
+                    "condition": {
+                      "scalarFunction": {
+                        "functionReference": 0,
+                        "args": [],
+                        "outputType": {
+                          "bool": {
+                            "typeVariationReference": 0,
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "arguments": [
+                          {
+                            "value": {
+                              "scalarFunction": {
+                                "functionReference": 1,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 10
+                                          }
+                                        },
+                                        "rootReference": {}
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "cast": {
+                                        "type": {
+                                          "date": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "input": {
+                                          "literal": {
+                                            "fixedChar": "1994-01-01",
+                                            "nullable": false,
+                                            "typeVariationReference": 0
+                                          }
+                                        },
+                                        "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          },
+                          {
+                            "value": {
+                              "scalarFunction": {
+                                "functionReference": 2,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 10
+                                          }
+                                        },
+                                        "rootReference": {}
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "cast": {
+                                        "type": {
+                                          "date": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "input": {
+                                          "literal": {
+                                            "fixedChar": "1995-01-01",
+                                            "nullable": false,
+                                            "typeVariationReference": 0
+                                          }
+                                        },
+                                        "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          },
+                          {
+                            "value": {
+                              "scalarFunction": {
+                                "functionReference": 3,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 6
+                                          }
+                                        },
+                                        "rootReference": {}
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "literal": {
+                                        "decimal": {
+                                          "value": "BQAAAAAAAAAAAAAAAAAAAA==",
+                                          "precision": 3,
+                                          "scale": 2
+                                        },
+                                        "nullable": false,
+                                        "typeVariationReference": 0
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          },
+                          {
+                            "value": {
+                              "scalarFunction": {
+                                "functionReference": 4,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 6
+                                          }
+                                        },
+                                        "rootReference": {}
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "literal": {
+                                        "decimal": {
+                                          "value": "BwAAAAAAAAAAAAAAAAAAAA==",
+                                          "precision": 3,
+                                          "scale": 2
+                                        },
+                                        "nullable": false,
+                                        "typeVariationReference": 0
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          },
+                          {
+                            "value": {
+                              "scalarFunction": {
+                                "functionReference": 5,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 4
+                                          }
+                                        },
+                                        "rootReference": {}
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "cast": {
+                                        "type": {
+                                          "decimal": {
+                                            "scale": 0,
+                                            "precision": 19,
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "input": {
+                                          "literal": {
+                                            "i32": 24,
+                                            "nullable": false,
+                                            "typeVariationReference": 0
+                                          }
+                                        },
+                                        "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          }
+                        ]
+                      }
+                    }
+                  }
+                },
+                "expressions": [
+                  {
+                    "scalarFunction": {
+                      "functionReference": 6,
+                      "args": [],
+                      "outputType": {
+                        "decimal": {
+                          "scale": 0,
+                          "precision": 19,
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      "arguments": [
+                        {
+                          "value": {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 5
+                                }
+                              },
+                              "rootReference": {}
+                            }
+                          }
+                        },
+                        {
+                          "value": {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 6
+                                }
+                              },
+                              "rootReference": {}
+                            }
+                          }
+                        }
+                      ]
+                    }
+                  }
+                ]
+              }
+            },
+            "groupings": [
+              {
+                "groupingExpressions": []
+              }
+            ],
+            "measures": [
+              {
+                "measure": {
+                  "functionReference": 7,
+                  "args": [],
+                  "sorts": [],
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                  "outputType": {
+                    "decimal": {
+                      "scale": 0,
+                      "precision": 19,
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  },
+                  "invocation": "AGGREGATION_INVOCATION_ALL",
+                  "arguments": [
+                    {
+                      "value": {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 0
+                            }
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+            ]
+          }
+        },
+        "names": [
+          "REVENUE"
+        ]
+      }
+    }
+  ],
+  "expectedTypeUrls": []
+}
\ No newline at end of file

From 6cca0f7f3725406aef4deb1ff1bbe299867ce82c Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Wed, 10 Jul 2024 05:08:22 +0800
Subject: [PATCH 005/357] Fix bug when pushing projection under joins (#11333)

* Fix bug in `ProjectionPushdown`

* add order by

* Fix join on
---
 .../physical_optimizer/projection_pushdown.rs | 50 ++++++++++------
 datafusion/sqllogictest/test_files/join.slt   | 58 +++++++++++++++++++
 2 files changed, 89 insertions(+), 19 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 70524dfcea7d2..3c2be59f75040 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -46,7 +46,7 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
-use datafusion_common::{DataFusionError, JoinSide};
+use datafusion_common::{internal_err, JoinSide};
 use datafusion_physical_expr::expressions::{Column, Literal};
 use datafusion_physical_expr::{
     utils::collect_columns, Partitioning, PhysicalExpr, PhysicalExprRef,
@@ -640,6 +640,7 @@ fn try_pushdown_through_hash_join(
         &projection_as_columns[0..=far_right_left_col_ind as _],
         &projection_as_columns[far_left_right_col_ind as _..],
         hash_join.on(),
+        hash_join.left().schema().fields().len(),
     ) else {
         return Ok(None);
     };
@@ -649,8 +650,7 @@ fn try_pushdown_through_hash_join(
             &projection_as_columns[0..=far_right_left_col_ind as _],
             &projection_as_columns[far_left_right_col_ind as _..],
             filter,
-            hash_join.left(),
-            hash_join.right(),
+            hash_join.left().schema().fields().len(),
         ) {
             Some(updated_filter) => Some(updated_filter),
             None => return Ok(None),
@@ -750,8 +750,7 @@ fn try_swapping_with_nested_loop_join(
             &projection_as_columns[0..=far_right_left_col_ind as _],
             &projection_as_columns[far_left_right_col_ind as _..],
             filter,
-            nl_join.left(),
-            nl_join.right(),
+            nl_join.left().schema().fields().len(),
         ) {
             Some(updated_filter) => Some(updated_filter),
             None => return Ok(None),
@@ -806,6 +805,7 @@ fn try_swapping_with_sort_merge_join(
         &projection_as_columns[0..=far_right_left_col_ind as _],
         &projection_as_columns[far_left_right_col_ind as _..],
         sm_join.on(),
+        sm_join.left().schema().fields().len(),
     ) else {
         return Ok(None);
     };
@@ -859,6 +859,7 @@ fn try_swapping_with_sym_hash_join(
         &projection_as_columns[0..=far_right_left_col_ind as _],
         &projection_as_columns[far_left_right_col_ind as _..],
         sym_join.on(),
+        sym_join.left().schema().fields().len(),
     ) else {
         return Ok(None);
     };
@@ -868,8 +869,7 @@ fn try_swapping_with_sym_hash_join(
             &projection_as_columns[0..=far_right_left_col_ind as _],
             &projection_as_columns[far_left_right_col_ind as _..],
             filter,
-            sym_join.left(),
-            sym_join.right(),
+            sym_join.left().schema().fields().len(),
         ) {
             Some(updated_filter) => Some(updated_filter),
             None => return Ok(None),
@@ -1090,6 +1090,7 @@ fn update_join_on(
     proj_left_exprs: &[(Column, String)],
     proj_right_exprs: &[(Column, String)],
     hash_join_on: &[(PhysicalExprRef, PhysicalExprRef)],
+    left_field_size: usize,
 ) -> Option<Vec<(PhysicalExprRef, PhysicalExprRef)>> {
     // TODO: Clippy wants the "map" call removed, but doing so generates
     //       a compilation error. Remove the clippy directive once this
@@ -1100,8 +1101,9 @@ fn update_join_on(
         .map(|(left, right)| (left, right))
         .unzip();
 
-    let new_left_columns = new_columns_for_join_on(&left_idx, proj_left_exprs);
-    let new_right_columns = new_columns_for_join_on(&right_idx, proj_right_exprs);
+    let new_left_columns = new_columns_for_join_on(&left_idx, proj_left_exprs, 0);
+    let new_right_columns =
+        new_columns_for_join_on(&right_idx, proj_right_exprs, left_field_size);
 
     match (new_left_columns, new_right_columns) {
         (Some(left), Some(right)) => Some(left.into_iter().zip(right).collect()),
@@ -1112,9 +1114,14 @@ fn update_join_on(
 /// This function generates a new set of columns to be used in a hash join
 /// operation based on a set of equi-join conditions (`hash_join_on`) and a
 /// list of projection expressions (`projection_exprs`).
+///
+/// Notes: Column indices in the projection expressions are based on the join schema,
+/// whereas the join on expressions are based on the join child schema. `column_index_offset`
+/// represents the offset between them.
 fn new_columns_for_join_on(
     hash_join_on: &[&PhysicalExprRef],
     projection_exprs: &[(Column, String)],
+    column_index_offset: usize,
 ) -> Option<Vec<PhysicalExprRef>> {
     let new_columns = hash_join_on
         .iter()
@@ -1130,6 +1137,8 @@ fn new_columns_for_join_on(
                             .enumerate()
                             .find(|(_, (proj_column, _))| {
                                 column.name() == proj_column.name()
+                                    && column.index() + column_index_offset
+                                        == proj_column.index()
                             })
                             .map(|(index, (_, alias))| Column::new(alias, index));
                         if let Some(new_column) = new_column {
@@ -1138,10 +1147,10 @@ fn new_columns_for_join_on(
                             // If the column is not found in the projection expressions,
                             // it means that the column is not projected. In this case,
                             // we cannot push the projection down.
-                            Err(DataFusionError::Internal(format!(
+                            internal_err!(
                                 "Column {:?} not found in projection expressions",
                                 column
-                            )))
+                            )
                         }
                     } else {
                         Ok(Transformed::no(expr))
@@ -1160,21 +1169,20 @@ fn update_join_filter(
     projection_left_exprs: &[(Column, String)],
     projection_right_exprs: &[(Column, String)],
     join_filter: &JoinFilter,
-    join_left: &Arc<dyn ExecutionPlan>,
-    join_right: &Arc<dyn ExecutionPlan>,
+    left_field_size: usize,
 ) -> Option<JoinFilter> {
     let mut new_left_indices = new_indices_for_join_filter(
         join_filter,
         JoinSide::Left,
         projection_left_exprs,
-        join_left.schema(),
+        0,
     )
     .into_iter();
     let mut new_right_indices = new_indices_for_join_filter(
         join_filter,
         JoinSide::Right,
         projection_right_exprs,
-        join_right.schema(),
+        left_field_size,
     )
     .into_iter();
 
@@ -1204,20 +1212,24 @@ fn update_join_filter(
 /// This function determines and returns a vector of indices representing the
 /// positions of columns in `projection_exprs` that are involved in `join_filter`,
 /// and correspond to a particular side (`join_side`) of the join operation.
+///
+/// Notes: Column indices in the projection expressions are based on the join schema,
+/// whereas the join filter is based on the join child schema. `column_index_offset`
+/// represents the offset between them.
 fn new_indices_for_join_filter(
     join_filter: &JoinFilter,
     join_side: JoinSide,
     projection_exprs: &[(Column, String)],
-    join_child_schema: SchemaRef,
+    column_index_offset: usize,
 ) -> Vec<usize> {
     join_filter
         .column_indices()
         .iter()
         .filter(|col_idx| col_idx.side == join_side)
         .filter_map(|col_idx| {
-            projection_exprs.iter().position(|(col, _)| {
-                col.name() == join_child_schema.fields()[col_idx.index].name()
-            })
+            projection_exprs
+                .iter()
+                .position(|(col, _)| col_idx.index + column_index_offset == col.index())
         })
         .collect()
 }
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index 3c89109145d70..12cb8b3985c76 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -986,3 +986,61 @@ DROP TABLE employees
 
 statement ok
 DROP TABLE department
+
+
+# Test issue: https://github.com/apache/datafusion/issues/11269
+statement ok
+CREATE TABLE t1 (v0 BIGINT) AS VALUES (-503661263);
+
+statement ok
+CREATE TABLE t2 (v0 DOUBLE) AS VALUES (-1.663563947387);
+
+statement ok
+CREATE TABLE t3 (v0 DOUBLE) AS VALUES (0.05112015193508901);
+
+query RR
+SELECT t3.v0, t2.v0 FROM t1,t2,t3 WHERE t3.v0 >= t1.v0;
+----
+0.051120151935 -1.663563947387
+
+statement ok
+DROP TABLE t1;
+
+statement ok
+DROP TABLE t2;
+
+statement ok
+DROP TABLE t3;
+
+
+# Test issue: https://github.com/apache/datafusion/issues/11275
+statement ok
+CREATE TABLE t0 (v1 BOOLEAN) AS VALUES (false), (null);
+
+statement ok
+CREATE TABLE t1 (v1 BOOLEAN) AS VALUES (false), (null), (false);
+
+statement ok
+CREATE TABLE t2 (v1 BOOLEAN) AS VALUES (false), (true);
+
+query BB
+SELECT t2.v1, t1.v1 FROM t0, t1, t2 WHERE t2.v1 IS DISTINCT FROM t0.v1 ORDER BY 1,2;
+----
+false false
+false false
+false NULL
+true false
+true false
+true false
+true false
+true NULL
+true NULL
+
+statement ok
+DROP TABLE t0;
+
+statement ok
+DROP TABLE t1;
+
+statement ok
+DROP TABLE t2;

From 7f25d9dd63918ecfeecaa5810d2a5c4fc9155c5d Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Tue, 9 Jul 2024 14:09:57 -0700
Subject: [PATCH 006/357] Minor: some cosmetics in `filter.rs`, fix clippy due
 to logical conflict (#11368)

* Minor: some cosmetics in `filter.rs`

* Minor: some cosmetics in `filter.rs`
---
 datafusion/physical-plan/src/filter.rs | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 84afc227578f8..c5ba3992d3b41 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -15,9 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! FilterExec evaluates a boolean predicate against all input batches to determine which rows to
-//! include in its output batches.
-
 use std::any::Any;
 use std::pin::Pin;
 use std::sync::Arc;
@@ -60,7 +57,7 @@ pub struct FilterExec {
     input: Arc<dyn ExecutionPlan>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
-    /// Selectivity for statistics. 0 = no rows, 100 all rows
+    /// Selectivity for statistics. 0 = no rows, 100 = all rows
     default_selectivity: u8,
     cache: PlanProperties,
 }
@@ -91,14 +88,14 @@ impl FilterExec {
 
                 Ok(Self {
                     predicate,
-                    input: input.clone(),
+                    input: Arc::clone(&input),
                     metrics: ExecutionPlanMetricsSet::new(),
                     default_selectivity,
                     cache,
                 })
             }
             other => {
-                plan_err!("Filter predicate must return boolean values, not {other:?}")
+                plan_err!("Filter predicate must return BOOLEAN values, got {other:?}")
             }
         }
     }
@@ -108,7 +105,9 @@ impl FilterExec {
         default_selectivity: u8,
     ) -> Result<Self, DataFusionError> {
         if default_selectivity > 100 {
-            return plan_err!("Default filter selectivity needs to be less than 100");
+            return plan_err!(
+                "Default filter selectivity value needs to be less than or equal to 100"
+            );
         }
         self.default_selectivity = default_selectivity;
         Ok(self)
@@ -369,12 +368,12 @@ pub(crate) fn batch_filter(
         .and_then(|v| v.into_array(batch.num_rows()))
         .and_then(|array| {
             let filter_array = match as_boolean_array(&array) {
-                Ok(boolean_array) => {
-                    Ok(boolean_array.to_owned())
-                },
+                Ok(boolean_array) => Ok(boolean_array.to_owned()),
                 Err(_) => {
                     let Ok(null_array) = as_null_array(&array) else {
-                        return internal_err!("Cannot create filter_array from non-boolean predicates, unable to continute");
+                        return internal_err!(
+                            "Cannot create filter_array from non-boolean predicates"
+                        );
                     };
 
                     // if the predicate is null, then the result is also null

From 9df393ea5539d5c83f8c16b028f44468727e3bee Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 9 Jul 2024 17:11:48 -0400
Subject: [PATCH 007/357] Update prost-derive requirement from 0.12 to 0.13
 (#11355)

Updates the requirements on [prost-derive](https://github.com/tokio-rs/prost) to permit the latest version.
- [Release notes](https://github.com/tokio-rs/prost/releases)
- [Changelog](https://github.com/tokio-rs/prost/blob/master/CHANGELOG.md)
- [Commits](https://github.com/tokio-rs/prost/compare/v0.12.0...v0.13.0)

---
updated-dependencies:
- dependency-name: prost-derive
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 datafusion-examples/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 52e3a5525717d..626c365af21cb 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -73,7 +73,7 @@ mimalloc = { version = "0.1", default-features = false }
 num_cpus = { workspace = true }
 object_store = { workspace = true, features = ["aws", "http"] }
 prost = { version = "0.12", default-features = false }
-prost-derive = { version = "0.12", default-features = false }
+prost-derive = { version = "0.13", default-features = false }
 serde = { version = "1.0.136", features = ["derive"] }
 serde_json = { workspace = true }
 tempfile = { workspace = true }

From c018c74ae1ca2339bd530e5e6724e45a16e3900b Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 9 Jul 2024 17:44:39 -0400
Subject: [PATCH 008/357] Minor: update dashmap (#11335)

---
 Cargo.toml                |  2 +-
 datafusion-cli/Cargo.lock | 43 +++++++++++++++++++++++----------------
 2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index f87205f0d0671..6dd434abc87c9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -85,7 +85,7 @@ bigdecimal = "=0.4.1"
 bytes = "1.4"
 chrono = { version = "0.4.34", default-features = false }
 ctor = "0.2.0"
-dashmap = "5.5.0"
+dashmap = "6.0.1"
 datafusion = { path = "datafusion/core", version = "40.0.0", default-features = false }
 datafusion-common = { path = "datafusion/common", version = "40.0.0", default-features = false }
 datafusion-common-runtime = { path = "datafusion/common-runtime", version = "40.0.0" }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 42ec5922a73fe..8af42cb43932e 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -443,7 +443,7 @@ dependencies = [
  "fastrand 1.9.0",
  "hex",
  "http 0.2.12",
- "hyper 0.14.29",
+ "hyper 0.14.30",
  "ring 0.16.20",
  "time",
  "tokio",
@@ -609,7 +609,7 @@ dependencies = [
  "fastrand 1.9.0",
  "http 0.2.12",
  "http-body 0.4.6",
- "hyper 0.14.29",
+ "hyper 0.14.30",
  "hyper-rustls 0.23.2",
  "lazy_static",
  "pin-project-lite",
@@ -631,7 +631,7 @@ dependencies = [
  "futures-core",
  "http 0.2.12",
  "http-body 0.4.6",
- "hyper 0.14.29",
+ "hyper 0.14.30",
  "once_cell",
  "percent-encoding",
  "pin-project-lite",
@@ -875,9 +875,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.0.106"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2"
+checksum = "eaff6f8ce506b9773fa786672d63fc7a191ffea1be33f72bbd4aeacefca9ffc8"
 dependencies = [
  "jobserver",
  "libc",
@@ -1055,6 +1055,12 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
+
 [[package]]
 name = "crunchy"
 version = "0.2.2"
@@ -1110,11 +1116,12 @@ checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca"
 
 [[package]]
 name = "dashmap"
-version = "5.5.3"
+version = "6.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
+checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28"
 dependencies = [
  "cfg-if",
+ "crossbeam-utils",
  "hashbrown 0.14.5",
  "lock_api",
  "once_cell",
@@ -1941,9 +1948,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
 
 [[package]]
 name = "hyper"
-version = "0.14.29"
+version = "0.14.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f361cde2f109281a220d4307746cdfd5ee3f410da58a70377762396775634b33"
+checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9"
 dependencies = [
  "bytes",
  "futures-channel",
@@ -1965,9 +1972,9 @@ dependencies = [
 
 [[package]]
 name = "hyper"
-version = "1.4.0"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4fe55fb7a772d59a5ff1dfbff4fe0258d19b89fec4b233e75d35d5d2316badc"
+checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05"
 dependencies = [
  "bytes",
  "futures-channel",
@@ -1990,7 +1997,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c"
 dependencies = [
  "http 0.2.12",
- "hyper 0.14.29",
+ "hyper 0.14.30",
  "log",
  "rustls 0.20.9",
  "rustls-native-certs 0.6.3",
@@ -2006,7 +2013,7 @@ checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155"
 dependencies = [
  "futures-util",
  "http 1.1.0",
- "hyper 1.4.0",
+ "hyper 1.4.1",
  "hyper-util",
  "rustls 0.23.11",
  "rustls-native-certs 0.7.1",
@@ -2027,7 +2034,7 @@ dependencies = [
  "futures-util",
  "http 1.1.0",
  "http-body 1.0.0",
- "hyper 1.4.0",
+ "hyper 1.4.1",
  "pin-project-lite",
  "socket2",
  "tokio",
@@ -2502,7 +2509,7 @@ dependencies = [
  "chrono",
  "futures",
  "humantime",
- "hyper 1.4.0",
+ "hyper 1.4.1",
  "itertools",
  "md-5",
  "parking_lot",
@@ -2976,7 +2983,7 @@ dependencies = [
  "http 1.1.0",
  "http-body 1.0.0",
  "http-body-util",
- "hyper 1.4.0",
+ "hyper 1.4.1",
  "hyper-rustls 0.27.2",
  "hyper-util",
  "ipnet",
@@ -3908,9 +3915,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.9.1"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5de17fd2f7da591098415cff336e12965a28061ddace43b59cb3c430179c9439"
+checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
 dependencies = [
  "getrandom",
  "serde",

From 1e0c06e14ae821ac6aa344f8acb638431a898ae8 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 9 Jul 2024 17:45:34 -0400
Subject: [PATCH 009/357] Improve and test dataframe API examples in docs
 (#11290)

* Improve and test dataframe API examples in docs

* Update introduction with pointer to user guide

* Make example consistent

* Make read_csv comment consistent

* clarifications

* prettier + tweaks

* Update docs/source/library-user-guide/using-the-dataframe-api.md

Co-authored-by: Eric Fredine <eric.fredine@gmail.com>

* Update docs/source/library-user-guide/using-the-dataframe-api.md

Co-authored-by: Eric Fredine <eric.fredine@gmail.com>

---------

Co-authored-by: Eric Fredine <eric.fredine@gmail.com>
---
 datafusion-examples/README.md                 |   1 +
 datafusion/core/src/lib.rs                    |   8 +-
 .../using-the-dataframe-api.md                | 302 +++++++++++++-----
 .../library-user-guide/using-the-sql-api.md   |  17 +-
 4 files changed, 239 insertions(+), 89 deletions(-)

diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index 90469e6715a6a..2696f74775cf3 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -55,6 +55,7 @@ cargo run --example dataframe
 - [`composed_extension_codec`](examples/composed_extension_codec.rs): Example of using multiple extension codecs for serialization / deserialization
 - [`csv_sql_streaming.rs`](examples/csv_sql_streaming.rs): Build and run a streaming query plan from a SQL statement against a local CSV file
 - [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against a custom datasource (TableProvider)
+- [`custom_file_format.rs`](examples/custom_file_format.rs): Write data to a custom file format
 - [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3 and writing back to s3
 - [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame against a local parquet file
 - [`dataframe_in_memory.rs`](examples/dataframe_in_memory.rs): Run a query using a DataFrame against data in memory
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 956e9f7246a36..f5805bc069825 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -641,5 +641,11 @@ doc_comment::doctest!(
 #[cfg(doctest)]
 doc_comment::doctest!(
     "../../../docs/source/library-user-guide/using-the-sql-api.md",
-    library_user_guide_example_usage
+    library_user_guide_sql_api
+);
+
+#[cfg(doctest)]
+doc_comment::doctest!(
+    "../../../docs/source/library-user-guide/using-the-dataframe-api.md",
+    library_user_guide_dataframe_api
 );
diff --git a/docs/source/library-user-guide/using-the-dataframe-api.md b/docs/source/library-user-guide/using-the-dataframe-api.md
index c4f4ecd4f1370..9e7774cbb944c 100644
--- a/docs/source/library-user-guide/using-the-dataframe-api.md
+++ b/docs/source/library-user-guide/using-the-dataframe-api.md
@@ -19,129 +19,267 @@
 
 # Using the DataFrame API
 
-## What is a DataFrame
+The [Users Guide] introduces the [`DataFrame`] API and this section describes
+that API in more depth.
 
-`DataFrame` in `DataFrame` is modeled after the Pandas DataFrame interface, and is a thin wrapper over LogicalPlan that adds functionality for building and executing those plans.
+## What is a DataFrame?
 
-```rust
-pub struct DataFrame {
-    session_state: SessionState,
-    plan: LogicalPlan,
-}
-```
-
-You can build up `DataFrame`s using its methods, similarly to building `LogicalPlan`s using `LogicalPlanBuilder`:
-
-```rust
-let df = ctx.table("users").await?;
+As described in the [Users Guide], DataFusion [`DataFrame`]s are modeled after
+the [Pandas DataFrame] interface, and are implemented as thin wrapper over a
+[`LogicalPlan`] that adds functionality for building and executing those plans.
 
-// Create a new DataFrame sorted by  `id`, `bank_account`
-let new_df = df.select(vec![col("id"), col("bank_account")])?
-    .sort(vec![col("id")])?;
-
-// Build the same plan using the LogicalPlanBuilder
-let plan = LogicalPlanBuilder::from(&df.to_logical_plan())
-    .project(vec![col("id"), col("bank_account")])?
-    .sort(vec![col("id")])?
-    .build()?;
-```
-
-You can use `collect` or `execute_stream` to execute the query.
+The simplest possible dataframe is one that scans a table and that table can be
+in a file or in memory.
 
 ## How to generate a DataFrame
 
-You can directly use the `DataFrame` API or generate a `DataFrame` from a SQL query.
-
-For example, to use `sql` to construct `DataFrame`:
+You can construct [`DataFrame`]s programmatically using the API, similarly to
+other DataFrame APIs. For example, you can read an in memory `RecordBatch` into
+a `DataFrame`:
 
 ```rust
-let ctx = SessionContext::new();
-// Register the in-memory table containing the data
-ctx.register_table("users", Arc::new(create_memtable()?))?;
-let dataframe = ctx.sql("SELECT * FROM users;").await?;
+use std::sync::Arc;
+use datafusion::prelude::*;
+use datafusion::arrow::array::{ArrayRef, Int32Array};
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::error::Result;
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let ctx = SessionContext::new();
+    // Register an in-memory table containing the following data
+    // id | bank_account
+    // ---|-------------
+    // 1  | 9000
+    // 2  | 8000
+    // 3  | 7000
+    let data = RecordBatch::try_from_iter(vec![
+        ("id", Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef),
+        ("bank_account", Arc::new(Int32Array::from(vec![9000, 8000, 7000]))),
+    ])?;
+    // Create a DataFrame that scans the user table, and finds
+    // all users with a bank account at least 8000
+    // and sorts the results by bank account in descending order
+    let dataframe = ctx
+        .read_batch(data)?
+        .filter(col("bank_account").gt_eq(lit(8000)))? // bank_account >= 8000
+        .sort(vec![col("bank_account").sort(false, true)])?; // ORDER BY bank_account DESC
+
+    Ok(())
+}
 ```
 
-To construct `DataFrame` using the API:
+You can _also_ generate a `DataFrame` from a SQL query and use the DataFrame's APIs
+to manipulate the output of the query.
 
 ```rust
-let ctx = SessionContext::new();
-// Register the in-memory table containing the data
-ctx.register_table("users", Arc::new(create_memtable()?))?;
-let dataframe = ctx
-  .table("users")
-  .filter(col("a").lt_eq(col("b")))?
-  .sort(vec![col("a").sort(true, true), col("b").sort(false, false)])?;
+use std::sync::Arc;
+use datafusion::prelude::*;
+use datafusion::assert_batches_eq;
+use datafusion::arrow::array::{ArrayRef, Int32Array};
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::error::Result;
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let ctx = SessionContext::new();
+    // Register the same in-memory table as the previous example
+    let data = RecordBatch::try_from_iter(vec![
+        ("id", Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef),
+        ("bank_account", Arc::new(Int32Array::from(vec![9000, 8000, 7000]))),
+    ])?;
+    ctx.register_batch("users", data)?;
+    // Create a DataFrame using SQL
+    let dataframe = ctx.sql("SELECT * FROM users;")
+        .await?
+        // Note we can filter the output of the query using the DataFrame API
+        .filter(col("bank_account").gt_eq(lit(8000)))?; // bank_account >= 8000
+
+    let results = &dataframe.collect().await?;
+
+    // use the `assert_batches_eq` macro to show the output
+    assert_batches_eq!(
+        vec![
+            "+----+--------------+",
+            "| id | bank_account |",
+            "+----+--------------+",
+            "| 1  | 9000         |",
+            "| 2  | 8000         |",
+            "+----+--------------+",
+        ],
+        &results
+    );
+    Ok(())
+}
 ```
 
 ## Collect / Streaming Exec
 
-DataFusion `DataFrame`s are "lazy", meaning they do not do any processing until they are executed, which allows for additional optimizations.
+DataFusion [`DataFrame`]s are "lazy", meaning they do no processing until
+they are executed, which allows for additional optimizations.
 
-When you have a `DataFrame`, you can run it in one of three ways:
+You can run a `DataFrame` in one of three ways:
 
-1.  `collect` which executes the query and buffers all the output into a `Vec<RecordBatch>`
-2.  `streaming_exec`, which begins executions and returns a `SendableRecordBatchStream` which incrementally computes output on each call to `next()`
-3.  `cache` which executes the query and buffers the output into a new in memory DataFrame.
+1.  `collect`: executes the query and buffers all the output into a `Vec<RecordBatch>`
+2.  `execute_stream`: begins executions and returns a `SendableRecordBatchStream` which incrementally computes output on each call to `next()`
+3.  `cache`: executes the query and buffers the output into a new in memory `DataFrame.`
 
-You can just collect all outputs once like:
+To collect all outputs into a memory buffer, use the `collect` method:
 
 ```rust
-let ctx = SessionContext::new();
-let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
-let batches = df.collect().await?;
+use datafusion::prelude::*;
+use datafusion::error::Result;
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let ctx = SessionContext::new();
+    // read the contents of a CSV file into a DataFrame
+    let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    // execute the query and collect the results as a Vec<RecordBatch>
+    let batches = df.collect().await?;
+    for record_batch in batches {
+        println!("{record_batch:?}");
+    }
+    Ok(())
+}
 ```
 
-You can also use stream output to incrementally generate output one `RecordBatch` at a time
+Use `execute_stream` to incrementally generate output one `RecordBatch` at a time:
 
 ```rust
-let ctx = SessionContext::new();
-let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
-let mut stream = df.execute_stream().await?;
-while let Some(rb) = stream.next().await {
-    println!("{rb:?}");
+use datafusion::prelude::*;
+use datafusion::error::Result;
+use futures::stream::StreamExt;
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let ctx = SessionContext::new();
+    // read example.csv file into a DataFrame
+    let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    // begin execution (returns quickly, does not compute results)
+    let mut stream = df.execute_stream().await?;
+    // results are returned incrementally as they are computed
+    while let Some(record_batch) = stream.next().await {
+        println!("{record_batch:?}");
+    }
+    Ok(())
 }
 ```
 
 # Write DataFrame to Files
 
-You can also serialize `DataFrame` to a file. For now, `Datafusion` supports write `DataFrame` to `csv`, `json` and `parquet`.
-
-When writing a file, DataFusion will execute the DataFrame and stream the results to a file.
+You can also write the contents of a `DataFrame` to a file. When writing a file,
+DataFusion executes the `DataFrame` and streams the results to the output.
+DataFusion comes with support for writing `csv`, `json` `arrow` `avro`, and
+`parquet` files, and supports writing custom file formats via API (see
+[`custom_file_format.rs`] for an example)
 
-For example, to write a csv_file
+For example, to read a CSV file and write it to a parquet file, use the
+[`DataFrame::write_parquet`] method
 
 ```rust
-let ctx = SessionContext::new();
-// Register the in-memory table containing the data
-ctx.register_table("users", Arc::new(mem_table))?;
-let dataframe = ctx.sql("SELECT * FROM users;").await?;
-
-dataframe
-    .write_csv("user_dataframe.csv", DataFrameWriteOptions::default(), None)
-    .await;
+use datafusion::prelude::*;
+use datafusion::error::Result;
+use datafusion::dataframe::DataFrameWriteOptions;
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let ctx = SessionContext::new();
+    // read example.csv file into a DataFrame
+    let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    // stream the contents of the DataFrame to the `example.parquet` file
+    df.write_parquet(
+        "example.parquet",
+        DataFrameWriteOptions::new(),
+        None, // writer_options
+    ).await;
+    Ok(())
+}
 ```
 
-and the file will look like (Example Output):
+[`custom_file_format.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/custom_file_format.rs
 
-```
-id,bank_account
-1,9000
+The output file will look like (Example Output):
+
+```sql
+> select * from '../datafusion/core/example.parquet';
++---+---+---+
+| a | b | c |
++---+---+---+
+| 1 | 2 | 3 |
++---+---+---+
 ```
 
-## Transform between LogicalPlan and DataFrame
+## Relationship between `LogicalPlan`s and `DataFrame`s
 
-As shown above, `DataFrame` is just a very thin wrapper of `LogicalPlan`, so you can easily go back and forth between them.
+The `DataFrame` struct is defined like this:
 
 ```rust
-// Just combine LogicalPlan with SessionContext and you get a DataFrame
-let ctx = SessionContext::new();
-// Register the in-memory table containing the data
-ctx.register_table("users", Arc::new(mem_table))?;
-let dataframe = ctx.sql("SELECT * FROM users;").await?;
+use datafusion::execution::session_state::SessionState;
+use datafusion::logical_expr::LogicalPlan;
+pub struct DataFrame {
+    // state required to execute a LogicalPlan
+    session_state: Box<SessionState>,
+    // LogicalPlan that describes the computation to perform
+    plan: LogicalPlan,
+}
+```
 
-// get LogicalPlan in dataframe
-let plan = dataframe.logical_plan().clone();
+As shown above, `DataFrame` is a thin wrapper of `LogicalPlan`, so you can
+easily go back and forth between them.
 
-// construct a DataFrame with LogicalPlan
-let new_df = DataFrame::new(ctx.state(), plan);
+```rust
+use datafusion::prelude::*;
+use datafusion::error::Result;
+use datafusion::logical_expr::LogicalPlanBuilder;
+
+#[tokio::main]
+async fn main() -> Result<()>{
+    let ctx = SessionContext::new();
+    // read example.csv file into a DataFrame
+    let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    // You can easily get the LogicalPlan from the DataFrame
+    let (_state, plan) = df.into_parts();
+    // Just combine LogicalPlan with SessionContext and you get a DataFrame
+    // get LogicalPlan in dataframe
+    let new_df = DataFrame::new(ctx.state(), plan);
+    Ok(())
+}
 ```
+
+In fact, using the [`DataFrame`]s methods you can create the same
+[`LogicalPlan`]s as when using [`LogicalPlanBuilder`]:
+
+```rust
+use datafusion::prelude::*;
+use datafusion::error::Result;
+use datafusion::logical_expr::LogicalPlanBuilder;
+
+#[tokio::main]
+async fn main() -> Result<()>{
+    let ctx = SessionContext::new();
+    // read example.csv file into a DataFrame
+    let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    // Create a new DataFrame sorted by  `id`, `bank_account`
+    let new_df = df.select(vec![col("a"), col("b")])?
+        .sort(vec![col("a")])?;
+    // Build the same plan using the LogicalPlanBuilder
+    // Similar to `SELECT a, b FROM example.csv ORDER BY a`
+    let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    let (_state, plan) = df.into_parts(); // get the DataFrame's LogicalPlan
+    let plan = LogicalPlanBuilder::from(plan)
+        .project(vec![col("a"), col("b")])?
+        .sort(vec![col("a")])?
+        .build()?;
+    // prove they are the same
+    assert_eq!(new_df.logical_plan(), &plan);
+    Ok(())
+}
+```
+
+[users guide]: ../user-guide/dataframe.md
+[pandas dataframe]: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
+[`dataframe`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html
+[`logicalplan`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.LogicalPlan.html
+[`logicalplanbuilder`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.LogicalPlanBuilder.html
+[`dataframe::write_parquet`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.write_parquet
diff --git a/docs/source/library-user-guide/using-the-sql-api.md b/docs/source/library-user-guide/using-the-sql-api.md
index 1a25f078cc2e2..9c32004db4359 100644
--- a/docs/source/library-user-guide/using-the-sql-api.md
+++ b/docs/source/library-user-guide/using-the-sql-api.md
@@ -29,16 +29,15 @@ using the [`SessionContext::sql`] method. For lower level control such as
 preventing DDL, you can use [`SessionContext::sql_with_options`] or the
 [`SessionState`] APIs
 
-[`sessioncontext`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html
-[`sessioncontext::sql`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql
-[`sessioncontext::sql_with_options`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql_with_options
-[`sessionstate`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html
-
 ## Registering Data Sources using `SessionContext::register*`
 
 The `SessionContext::register*` methods tell DataFusion the name of
 the source and how to read data. Once registered, you can execute SQL queries
-using the `SessionContext::sql` method referring to your data source as a table.
+using the [`SessionContext::sql`] method referring to your data source as a table.
+
+The [`SessionContext::sql`] method returns a `DataFrame` for ease of
+use. See the ["Using the DataFrame API"] section for more information on how to
+work with DataFrames.
 
 ### Read a CSV File
 
@@ -215,3 +214,9 @@ async fn main() -> Result<()> {
     Ok(())
 }
 ```
+
+[`sessioncontext`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html
+[`sessioncontext::sql`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql
+[`sessioncontext::sql_with_options`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql_with_options
+[`sessionstate`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html
+["using the dataframe api"]: ../library-user-guide/using-the-dataframe-api.md

From 16a3148354e81e1ae4e2aebdd83c07799164ac14 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 9 Jul 2024 21:30:28 -0400
Subject: [PATCH 010/357] Remove redundant `unalias_nested` calls for creating
 Filter's (#11340)

* Remove uncessary unalias_nested calls when creating Filter

* simplify
---
 datafusion/expr/src/logical_plan/plan.rs      | 54 ++++---------------
 .../optimizer/src/common_subexpr_eliminate.rs | 11 +---
 datafusion/optimizer/src/push_down_filter.rs  |  4 +-
 3 files changed, 13 insertions(+), 56 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 998b5bdcb60c8..bde9655b8a390 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -41,9 +41,7 @@ use crate::{
 };
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion_common::tree_node::{
-    Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
-};
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
 use datafusion_common::{
     aggregate_functional_dependencies, internal_err, plan_err, Column, Constraints,
     DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence,
@@ -645,39 +643,6 @@ impl LogicalPlan {
                 Ok(LogicalPlan::Values(Values { schema, values }))
             }
             LogicalPlan::Filter(Filter { predicate, input }) => {
-                // todo: should this logic be moved to Filter::try_new?
-
-                // filter predicates should not contain aliased expressions so we remove any aliases
-                // before this logic was added we would have aliases within filters such as for
-                // benchmark q6:
-                //
-                // lineitem.l_shipdate >= Date32(\"8766\")
-                // AND lineitem.l_shipdate < Date32(\"9131\")
-                // AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount >=
-                // Decimal128(Some(49999999999999),30,15)
-                // AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount <=
-                // Decimal128(Some(69999999999999),30,15)
-                // AND lineitem.l_quantity < Decimal128(Some(2400),15,2)
-
-                let predicate = predicate
-                    .transform_down(|expr| {
-                        match expr {
-                            Expr::Exists { .. }
-                            | Expr::ScalarSubquery(_)
-                            | Expr::InSubquery(_) => {
-                                // subqueries could contain aliases so we don't recurse into those
-                                Ok(Transformed::new(expr, false, TreeNodeRecursion::Jump))
-                            }
-                            Expr::Alias(_) => Ok(Transformed::new(
-                                expr.unalias(),
-                                true,
-                                TreeNodeRecursion::Jump,
-                            )),
-                            _ => Ok(Transformed::no(expr)),
-                        }
-                    })
-                    .data()?;
-
                 Filter::try_new(predicate, input).map(LogicalPlan::Filter)
             }
             LogicalPlan::Repartition(_) => Ok(self),
@@ -878,7 +843,7 @@ impl LogicalPlan {
             }
             LogicalPlan::Filter { .. } => {
                 assert_eq!(1, expr.len());
-                let predicate = expr.pop().unwrap().unalias_nested().data;
+                let predicate = expr.pop().unwrap();
 
                 Filter::try_new(predicate, Arc::new(inputs.swap_remove(0)))
                     .map(LogicalPlan::Filter)
@@ -2117,6 +2082,9 @@ pub struct Filter {
 
 impl Filter {
     /// Create a new filter operator.
+    ///
+    /// Notes: as Aliases have no effect on the output of a filter operator,
+    /// they are removed from the predicate expression.
     pub fn try_new(predicate: Expr, input: Arc<LogicalPlan>) -> Result<Self> {
         // Filter predicates must return a boolean value so we try and validate that here.
         // Note that it is not always possible to resolve the predicate expression during plan
@@ -2940,7 +2908,7 @@ mod tests {
     use crate::logical_plan::table_scan;
     use crate::{col, exists, in_subquery, lit, placeholder, GroupingSet};
 
-    use datafusion_common::tree_node::TreeNodeVisitor;
+    use datafusion_common::tree_node::{TransformedResult, TreeNodeVisitor};
     use datafusion_common::{not_impl_err, Constraint, ScalarValue};
 
     use crate::test::function_stub::count;
@@ -3500,11 +3468,8 @@ digraph {
         }));
         let col = schema.field_names()[0].clone();
 
-        let filter = Filter::try_new(
-            Expr::Column(col.into()).eq(Expr::Literal(ScalarValue::Int32(Some(1)))),
-            scan,
-        )
-        .unwrap();
+        let filter =
+            Filter::try_new(Expr::Column(col.into()).eq(lit(1i32)), scan).unwrap();
         assert!(filter.is_scalar());
     }
 
@@ -3522,8 +3487,7 @@ digraph {
             .build()
             .unwrap();
 
-        let external_filter =
-            col("foo").eq(Expr::Literal(ScalarValue::Boolean(Some(true))));
+        let external_filter = col("foo").eq(lit(true));
 
         // after transformation, because plan is not the same anymore,
         // the parent plan is built again with call to LogicalPlan::with_new_inputs -> with_new_exprs
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 4a4933fe9cfdb..e18d8bc91bf60 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -342,16 +342,9 @@ impl CommonSubexprEliminate {
         let input = unwrap_arc(input);
         let expr = vec![predicate];
         self.try_unary_plan(expr, input, config)?
-            .transform_data(|(mut new_expr, new_input)| {
+            .map_data(|(mut new_expr, new_input)| {
                 assert_eq!(new_expr.len(), 1); // passed in vec![predicate]
-                let new_predicate = new_expr
-                    .pop()
-                    .unwrap()
-                    .unalias_nested()
-                    .update_data(|new_predicate| (new_predicate, new_input));
-                Ok(new_predicate)
-            })?
-            .map_data(|(new_predicate, new_input)| {
+                let new_predicate = new_expr.pop().unwrap();
                 Filter::try_new(new_predicate, Arc::new(new_input))
                     .map(LogicalPlan::Filter)
             })
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 1c3186b762b71..0a3bae154bd64 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -761,11 +761,11 @@ impl OptimizerRule for PushDownFilter {
 
                 // Push down non-unnest filter predicate
                 // Unnest
-                //   Unenst Input (Projection)
+                //   Unnest Input (Projection)
                 // -> rewritten to
                 // Unnest
                 //   Filter
-                //     Unenst Input (Projection)
+                //     Unnest Input (Projection)
 
                 let unnest_input = std::mem::take(&mut unnest.input);
 

From 146b679aa19c7749cc73d0c27440419d6498142b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Wed, 10 Jul 2024 16:01:53 +0800
Subject: [PATCH 011/357] Enable `clone_on_ref_ptr` clippy lint on optimizer
 (#11346)

* Enable clone_on_ref_ptr clippy lint on optimizer

* Fix lints

* fmt
---
 datafusion/optimizer/src/analyzer/subquery.rs |  5 ++--
 .../optimizer/src/analyzer/type_coercion.rs   |  8 +++---
 .../optimizer/src/common_subexpr_eliminate.rs |  2 +-
 datafusion/optimizer/src/decorrelate.rs       | 15 ++++++-----
 .../src/decorrelate_predicate_subquery.rs     |  4 +--
 .../optimizer/src/eliminate_cross_join.rs     |  6 ++---
 datafusion/optimizer/src/eliminate_filter.rs  |  3 ++-
 datafusion/optimizer/src/eliminate_limit.rs   |  3 ++-
 .../optimizer/src/eliminate_nested_union.rs   |  2 +-
 .../optimizer/src/eliminate_one_union.rs      |  2 +-
 .../optimizer/src/eliminate_outer_join.rs     |  2 +-
 .../src/extract_equijoin_predicate.rs         |  4 +--
 datafusion/optimizer/src/lib.rs               |  2 ++
 .../optimizer/src/optimize_projections/mod.rs | 12 ++++-----
 datafusion/optimizer/src/optimizer.rs         |  8 +++---
 datafusion/optimizer/src/plan_signature.rs    |  4 +--
 .../optimizer/src/propagate_empty_relation.rs | 26 +++++++++----------
 datafusion/optimizer/src/push_down_filter.rs  | 10 +++----
 .../optimizer/src/scalar_subquery_to_join.rs  |  2 +-
 .../simplify_expressions/expr_simplifier.rs   | 11 ++++----
 .../src/single_distinct_to_groupby.rs         |  2 +-
 datafusion/optimizer/src/test/mod.rs          |  2 +-
 .../src/unwrap_cast_in_comparison.rs          |  2 +-
 23 files changed, 73 insertions(+), 64 deletions(-)

diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs
index 5725a725e64a4..db39f8f7737d4 100644
--- a/datafusion/optimizer/src/analyzer/subquery.rs
+++ b/datafusion/optimizer/src/analyzer/subquery.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use std::ops::Deref;
+use std::sync::Arc;
 
 use crate::analyzer::check_plan;
 use crate::utils::collect_subquery_cols;
@@ -245,7 +246,7 @@ fn check_aggregation_in_scalar_subquery(
     if !agg.group_expr.is_empty() {
         let correlated_exprs = get_correlated_expressions(inner_plan)?;
         let inner_subquery_cols =
-            collect_subquery_cols(&correlated_exprs, agg.input.schema().clone())?;
+            collect_subquery_cols(&correlated_exprs, Arc::clone(agg.input.schema()))?;
         let mut group_columns = agg
             .group_expr
             .iter()
@@ -375,7 +376,7 @@ mod test {
             _inputs: Vec<LogicalPlan>,
         ) -> Result<Self> {
             Ok(Self {
-                empty_schema: self.empty_schema.clone(),
+                empty_schema: Arc::clone(&self.empty_schema),
             })
         }
     }
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 6c08b3e998b3d..3cab474df84e0 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -656,7 +656,7 @@ fn coerce_arguments_for_fun(
             .map(|expr| {
                 let data_type = expr.get_type(schema).unwrap();
                 if let DataType::FixedSizeList(field, _) = data_type {
-                    let to_type = DataType::List(field.clone());
+                    let to_type = DataType::List(Arc::clone(&field));
                     expr.cast_to(&to_type, schema)
                 } else {
                     Ok(expr)
@@ -1265,8 +1265,10 @@ mod test {
                 signature: Signature::variadic(vec![Utf8], Volatility::Immutable),
             })
             .call(args.to_vec());
-            let plan =
-                LogicalPlan::Projection(Projection::try_new(vec![expr], empty.clone())?);
+            let plan = LogicalPlan::Projection(Projection::try_new(
+                vec![expr],
+                Arc::clone(&empty),
+            )?);
             let expected =
                 "Projection: TestScalarUDF(a, Utf8(\"b\"), CAST(Boolean(true) AS Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS Utf8))\n  EmptyRelation";
             assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?;
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index e18d8bc91bf60..721987b917d4c 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -1385,7 +1385,7 @@ mod test {
                     "my_agg",
                     Signature::exact(vec![DataType::UInt32], Volatility::Stable),
                     return_type.clone(),
-                    accumulator.clone(),
+                    Arc::clone(&accumulator),
                     vec![Field::new("value", DataType::UInt32, true)],
                 ))),
                 vec![inner],
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index 5f8e0a85215aa..c998e8442548c 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -19,6 +19,7 @@
 
 use std::collections::{BTreeSet, HashMap};
 use std::ops::Deref;
+use std::sync::Arc;
 
 use crate::simplify_expressions::ExprSimplifier;
 use crate::utils::collect_subquery_cols;
@@ -147,7 +148,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr {
     }
 
     fn f_up(&mut self, plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
-        let subquery_schema = plan.schema().clone();
+        let subquery_schema = Arc::clone(plan.schema());
         match &plan {
             LogicalPlan::Filter(plan_filter) => {
                 let subquery_filter_exprs = split_conjunction(&plan_filter.predicate);
@@ -172,7 +173,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr {
                     if let Some(expr) = conjunction(subquery_filters.clone()) {
                         filter_exprs_evaluation_result_on_empty_batch(
                             &expr,
-                            plan_filter.input.schema().clone(),
+                            Arc::clone(plan_filter.input.schema()),
                             expr_result_map,
                             &mut expr_result_map_for_count_bug,
                         )?
@@ -230,7 +231,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr {
                 {
                     proj_exprs_evaluation_result_on_empty_batch(
                         &projection.expr,
-                        projection.input.schema().clone(),
+                        Arc::clone(projection.input.schema()),
                         expr_result_map,
                         &mut expr_result_map_for_count_bug,
                     )?;
@@ -276,7 +277,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr {
                 {
                     agg_exprs_evaluation_result_on_empty_batch(
                         &aggregate.aggr_expr,
-                        aggregate.input.schema().clone(),
+                        Arc::clone(aggregate.input.schema()),
                         &mut expr_result_map_for_count_bug,
                     )?;
                     if !expr_result_map_for_count_bug.is_empty() {
@@ -332,7 +333,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr {
                         if limit.fetch.filter(|limit_row| *limit_row == 0).is_some() {
                             LogicalPlan::EmptyRelation(EmptyRelation {
                                 produce_one_row: false,
-                                schema: limit.input.schema().clone(),
+                                schema: Arc::clone(limit.input.schema()),
                             })
                         } else {
                             LogicalPlanBuilder::from((*limit.input).clone()).build()?
@@ -456,7 +457,7 @@ fn agg_exprs_evaluation_result_on_empty_batch(
 
         let result_expr = result_expr.unalias();
         let props = ExecutionProps::new();
-        let info = SimplifyContext::new(&props).with_schema(schema.clone());
+        let info = SimplifyContext::new(&props).with_schema(Arc::clone(&schema));
         let simplifier = ExprSimplifier::new(info);
         let result_expr = simplifier.simplify(result_expr)?;
         if matches!(result_expr, Expr::Literal(ScalarValue::Int64(_))) {
@@ -492,7 +493,7 @@ fn proj_exprs_evaluation_result_on_empty_batch(
 
         if result_expr.ne(expr) {
             let props = ExecutionProps::new();
-            let info = SimplifyContext::new(&props).with_schema(schema.clone());
+            let info = SimplifyContext::new(&props).with_schema(Arc::clone(&schema));
             let simplifier = ExprSimplifier::new(info);
             let result_expr = simplifier.simplify(result_expr)?;
             let expr_name = match expr {
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index 81d6dc863af6b..4e3ca7e33a2eb 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -571,7 +571,7 @@ mod tests {
         );
         let plan = LogicalPlanBuilder::from(scan_tpch_table("customer"))
             .filter(
-                in_subquery(col("customer.c_custkey"), orders.clone())
+                in_subquery(col("customer.c_custkey"), Arc::clone(&orders))
                     .and(in_subquery(col("customer.c_custkey"), orders)),
             )?
             .project(vec![col("customer.c_custkey")])?
@@ -1358,7 +1358,7 @@ mod tests {
         );
 
         let plan = LogicalPlanBuilder::from(scan_tpch_table("customer"))
-            .filter(exists(orders.clone()).and(exists(orders)))?
+            .filter(exists(Arc::clone(&orders)).and(exists(orders)))?
             .project(vec![col("customer.c_custkey")])?
             .build()?;
 
diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs
index 6d6f84373a36b..729c45426ff29 100644
--- a/datafusion/optimizer/src/eliminate_cross_join.rs
+++ b/datafusion/optimizer/src/eliminate_cross_join.rs
@@ -86,7 +86,7 @@ impl OptimizerRule for EliminateCrossJoin {
         plan: LogicalPlan,
         config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
-        let plan_schema = plan.schema().clone();
+        let plan_schema = Arc::clone(plan.schema());
         let mut possible_join_keys = JoinKeySet::new();
         let mut all_inputs: Vec<LogicalPlan> = vec![];
 
@@ -155,7 +155,7 @@ impl OptimizerRule for EliminateCrossJoin {
         if &plan_schema != left.schema() {
             left = LogicalPlan::Projection(Projection::new_from_schema(
                 Arc::new(left),
-                plan_schema.clone(),
+                Arc::clone(&plan_schema),
             ));
         }
 
@@ -420,7 +420,7 @@ mod tests {
     };
 
     fn assert_optimized_plan_eq(plan: LogicalPlan, expected: Vec<&str>) {
-        let starting_schema = plan.schema().clone();
+        let starting_schema = Arc::clone(plan.schema());
         let rule = EliminateCrossJoin::new();
         let transformed_plan = rule.rewrite(plan, &OptimizerContext::new()).unwrap();
         assert!(transformed_plan.transformed, "failed to optimize plan");
diff --git a/datafusion/optimizer/src/eliminate_filter.rs b/datafusion/optimizer/src/eliminate_filter.rs
index 7c873b411d592..2d8d77b89ddc8 100644
--- a/datafusion/optimizer/src/eliminate_filter.rs
+++ b/datafusion/optimizer/src/eliminate_filter.rs
@@ -21,6 +21,7 @@ use datafusion_common::tree_node::Transformed;
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::{EmptyRelation, Expr, Filter, LogicalPlan};
+use std::sync::Arc;
 
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
@@ -68,7 +69,7 @@ impl OptimizerRule for EliminateFilter {
                 Some(false) | None => Ok(Transformed::yes(LogicalPlan::EmptyRelation(
                     EmptyRelation {
                         produce_one_row: false,
-                        schema: input.schema().clone(),
+                        schema: Arc::clone(input.schema()),
                     },
                 ))),
             },
diff --git a/datafusion/optimizer/src/eliminate_limit.rs b/datafusion/optimizer/src/eliminate_limit.rs
index b0a75fa47c277..165834e759752 100644
--- a/datafusion/optimizer/src/eliminate_limit.rs
+++ b/datafusion/optimizer/src/eliminate_limit.rs
@@ -21,6 +21,7 @@ use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::Transformed;
 use datafusion_common::Result;
 use datafusion_expr::logical_plan::{tree_node::unwrap_arc, EmptyRelation, LogicalPlan};
+use std::sync::Arc;
 
 /// Optimizer rule to replace `LIMIT 0` or `LIMIT` whose ancestor LIMIT's skip is
 /// greater than or equal to current's fetch
@@ -67,7 +68,7 @@ impl OptimizerRule for EliminateLimit {
                         return Ok(Transformed::yes(LogicalPlan::EmptyRelation(
                             EmptyRelation {
                                 produce_one_row: false,
-                                schema: limit.input.schema().clone(),
+                                schema: Arc::clone(limit.input.schema()),
                             },
                         )));
                     }
diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs
index 3732f7ed90c8a..c8ae937e128a6 100644
--- a/datafusion/optimizer/src/eliminate_nested_union.rs
+++ b/datafusion/optimizer/src/eliminate_nested_union.rs
@@ -79,7 +79,7 @@ impl OptimizerRule for EliminateNestedUnion {
                         Ok(Transformed::yes(LogicalPlan::Distinct(Distinct::All(
                             Arc::new(LogicalPlan::Union(Union {
                                 inputs: inputs.into_iter().map(Arc::new).collect_vec(),
-                                schema: schema.clone(),
+                                schema: Arc::clone(&schema),
                             })),
                         ))))
                     }
diff --git a/datafusion/optimizer/src/eliminate_one_union.rs b/datafusion/optimizer/src/eliminate_one_union.rs
index edf6b72d7e178..5e37b8cf7c1fa 100644
--- a/datafusion/optimizer/src/eliminate_one_union.rs
+++ b/datafusion/optimizer/src/eliminate_one_union.rs
@@ -110,7 +110,7 @@ mod tests {
             &table_scan(Some("table"), &schema(), None)?.build()?,
             &schema().to_dfschema()?,
         )?;
-        let schema = table_plan.schema().clone();
+        let schema = Arc::clone(table_plan.schema());
         let single_union_plan = LogicalPlan::Union(Union {
             inputs: vec![Arc::new(table_plan)],
             schema,
diff --git a/datafusion/optimizer/src/eliminate_outer_join.rs b/datafusion/optimizer/src/eliminate_outer_join.rs
index 13c483c6dfcca..12534e058152e 100644
--- a/datafusion/optimizer/src/eliminate_outer_join.rs
+++ b/datafusion/optimizer/src/eliminate_outer_join.rs
@@ -118,7 +118,7 @@ impl OptimizerRule for EliminateOuterJoin {
                         join_constraint: join.join_constraint,
                         on: join.on.clone(),
                         filter: join.filter.clone(),
-                        schema: join.schema.clone(),
+                        schema: Arc::clone(&join.schema),
                         null_equals_null: join.null_equals_null,
                     }));
                     Filter::try_new(filter.predicate, new_join)
diff --git a/datafusion/optimizer/src/extract_equijoin_predicate.rs b/datafusion/optimizer/src/extract_equijoin_predicate.rs
index 87d205139e8e9..0dae777ab5bdf 100644
--- a/datafusion/optimizer/src/extract_equijoin_predicate.rs
+++ b/datafusion/optimizer/src/extract_equijoin_predicate.rs
@@ -357,8 +357,8 @@ mod tests {
         let t1 = test_table_scan_with_name("t1")?;
         let t2 = test_table_scan_with_name("t2")?;
 
-        let t1_schema = t1.schema().clone();
-        let t2_schema = t2.schema().clone();
+        let t1_schema = Arc::clone(t1.schema());
+        let t2_schema = Arc::clone(t2.schema());
 
         // filter: t1.a + CAST(Int64(1), UInt32) = t2.a + CAST(Int64(2), UInt32) as t1.a + 1 = t2.a + 2
         let filter = Expr::eq(
diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs
index a6a9e5cf26eaf..332d3e9fe54e9 100644
--- a/datafusion/optimizer/src/lib.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -14,6 +14,8 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
 //! # DataFusion Optimizer
 //!
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 4684dbd3b043a..cae2a7b2cad2f 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -205,7 +205,7 @@ fn optimize_projections(
             });
         }
         LogicalPlan::Window(window) => {
-            let input_schema = window.input.schema().clone();
+            let input_schema = Arc::clone(window.input.schema());
             // Split parent requirements to child and window expression sections:
             let n_input_fields = input_schema.fields().len();
             // Offset window expression indices so that they point to valid
@@ -881,7 +881,7 @@ mod tests {
             Ok(Self {
                 exprs,
                 input: Arc::new(inputs.swap_remove(0)),
-                schema: self.schema.clone(),
+                schema: Arc::clone(&self.schema),
             })
         }
 
@@ -949,7 +949,7 @@ mod tests {
                 exprs,
                 left_child: Arc::new(inputs.remove(0)),
                 right_child: Arc::new(inputs.remove(0)),
-                schema: self.schema.clone(),
+                schema: Arc::clone(&self.schema),
             })
         }
 
@@ -1256,7 +1256,7 @@ mod tests {
         let table_scan = test_table_scan()?;
         let custom_plan = LogicalPlan::Extension(Extension {
             node: Arc::new(NoOpUserDefined::new(
-                table_scan.schema().clone(),
+                Arc::clone(table_scan.schema()),
                 Arc::new(table_scan.clone()),
             )),
         });
@@ -1281,7 +1281,7 @@ mod tests {
         let custom_plan = LogicalPlan::Extension(Extension {
             node: Arc::new(
                 NoOpUserDefined::new(
-                    table_scan.schema().clone(),
+                    Arc::clone(table_scan.schema()),
                     Arc::new(table_scan.clone()),
                 )
                 .with_exprs(exprs),
@@ -1316,7 +1316,7 @@ mod tests {
         let custom_plan = LogicalPlan::Extension(Extension {
             node: Arc::new(
                 NoOpUserDefined::new(
-                    table_scan.schema().clone(),
+                    Arc::clone(table_scan.schema()),
                     Arc::new(table_scan.clone()),
                 )
                 .with_exprs(exprs),
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index 14e5ac141eeb6..93923a4e1e74a 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -205,7 +205,7 @@ impl OptimizerConfig for OptimizerContext {
     }
 
     fn alias_generator(&self) -> Arc<AliasGenerator> {
-        self.alias_generator.clone()
+        Arc::clone(&self.alias_generator)
     }
 
     fn options(&self) -> &ConfigOptions {
@@ -381,7 +381,7 @@ impl Optimizer {
                     .skip_failed_rules
                     .then(|| new_plan.clone());
 
-                let starting_schema = new_plan.schema().clone();
+                let starting_schema = Arc::clone(new_plan.schema());
 
                 let result = match rule.apply_order() {
                     // optimizer handles recursion
@@ -579,7 +579,7 @@ mod tests {
         let config = OptimizerContext::new().with_skip_failing_rules(false);
 
         let input = Arc::new(test_table_scan()?);
-        let input_schema = input.schema().clone();
+        let input_schema = Arc::clone(input.schema());
 
         let plan = LogicalPlan::Projection(Projection::try_new_with_schema(
             vec![col("a"), col("b"), col("c")],
@@ -760,7 +760,7 @@ mod tests {
             }
 
             Ok(Transformed::yes(LogicalPlan::Projection(
-                Projection::try_new(exprs, projection.input.clone())?,
+                Projection::try_new(exprs, Arc::clone(&projection.input))?,
             )))
         }
     }
diff --git a/datafusion/optimizer/src/plan_signature.rs b/datafusion/optimizer/src/plan_signature.rs
index d22795797478c..73e6b418272a9 100644
--- a/datafusion/optimizer/src/plan_signature.rs
+++ b/datafusion/optimizer/src/plan_signature.rs
@@ -100,7 +100,7 @@ mod tests {
         let one_node_plan =
             Arc::new(LogicalPlan::EmptyRelation(datafusion_expr::EmptyRelation {
                 produce_one_row: false,
-                schema: schema.clone(),
+                schema: Arc::clone(&schema),
             }));
 
         assert_eq!(1, get_node_number(&one_node_plan).get());
@@ -112,7 +112,7 @@ mod tests {
         assert_eq!(2, get_node_number(&two_node_plan).get());
 
         let five_node_plan = Arc::new(LogicalPlan::Union(datafusion_expr::Union {
-            inputs: vec![two_node_plan.clone(), two_node_plan],
+            inputs: vec![Arc::clone(&two_node_plan), two_node_plan],
             schema,
         }));
 
diff --git a/datafusion/optimizer/src/propagate_empty_relation.rs b/datafusion/optimizer/src/propagate_empty_relation.rs
index 88bd1b17883b1..91044207c4e11 100644
--- a/datafusion/optimizer/src/propagate_empty_relation.rs
+++ b/datafusion/optimizer/src/propagate_empty_relation.rs
@@ -79,7 +79,7 @@ impl OptimizerRule for PropagateEmptyRelation {
                     return Ok(Transformed::yes(LogicalPlan::EmptyRelation(
                         EmptyRelation {
                             produce_one_row: false,
-                            schema: plan.schema().clone(),
+                            schema: Arc::clone(plan.schema()),
                         },
                     )));
                 }
@@ -99,43 +99,43 @@ impl OptimizerRule for PropagateEmptyRelation {
                     JoinType::Full if left_empty && right_empty => Ok(Transformed::yes(
                         LogicalPlan::EmptyRelation(EmptyRelation {
                             produce_one_row: false,
-                            schema: join.schema.clone(),
+                            schema: Arc::clone(&join.schema),
                         }),
                     )),
                     JoinType::Inner if left_empty || right_empty => Ok(Transformed::yes(
                         LogicalPlan::EmptyRelation(EmptyRelation {
                             produce_one_row: false,
-                            schema: join.schema.clone(),
+                            schema: Arc::clone(&join.schema),
                         }),
                     )),
                     JoinType::Left if left_empty => Ok(Transformed::yes(
                         LogicalPlan::EmptyRelation(EmptyRelation {
                             produce_one_row: false,
-                            schema: join.schema.clone(),
+                            schema: Arc::clone(&join.schema),
                         }),
                     )),
                     JoinType::Right if right_empty => Ok(Transformed::yes(
                         LogicalPlan::EmptyRelation(EmptyRelation {
                             produce_one_row: false,
-                            schema: join.schema.clone(),
+                            schema: Arc::clone(&join.schema),
                         }),
                     )),
                     JoinType::LeftSemi if left_empty || right_empty => Ok(
                         Transformed::yes(LogicalPlan::EmptyRelation(EmptyRelation {
                             produce_one_row: false,
-                            schema: join.schema.clone(),
+                            schema: Arc::clone(&join.schema),
                         })),
                     ),
                     JoinType::RightSemi if left_empty || right_empty => Ok(
                         Transformed::yes(LogicalPlan::EmptyRelation(EmptyRelation {
                             produce_one_row: false,
-                            schema: join.schema.clone(),
+                            schema: Arc::clone(&join.schema),
                         })),
                     ),
                     JoinType::LeftAnti if left_empty => Ok(Transformed::yes(
                         LogicalPlan::EmptyRelation(EmptyRelation {
                             produce_one_row: false,
-                            schema: join.schema.clone(),
+                            schema: Arc::clone(&join.schema),
                         }),
                     )),
                     JoinType::LeftAnti if right_empty => {
@@ -147,7 +147,7 @@ impl OptimizerRule for PropagateEmptyRelation {
                     JoinType::RightAnti if right_empty => Ok(Transformed::yes(
                         LogicalPlan::EmptyRelation(EmptyRelation {
                             produce_one_row: false,
-                            schema: join.schema.clone(),
+                            schema: Arc::clone(&join.schema),
                         }),
                     )),
                     _ => Ok(Transformed::no(plan)),
@@ -178,7 +178,7 @@ impl OptimizerRule for PropagateEmptyRelation {
                     Ok(Transformed::yes(LogicalPlan::EmptyRelation(
                         EmptyRelation {
                             produce_one_row: false,
-                            schema: plan.schema().clone(),
+                            schema: Arc::clone(plan.schema()),
                         },
                     )))
                 } else if new_inputs.len() == 1 {
@@ -191,14 +191,14 @@ impl OptimizerRule for PropagateEmptyRelation {
                         Ok(Transformed::yes(LogicalPlan::Projection(
                             Projection::new_from_schema(
                                 Arc::new(child),
-                                plan.schema().clone(),
+                                Arc::clone(plan.schema()),
                             ),
                         )))
                     }
                 } else {
                     Ok(Transformed::yes(LogicalPlan::Union(Union {
                         inputs: new_inputs,
-                        schema: union.schema.clone(),
+                        schema: Arc::clone(&union.schema),
                     })))
                 }
             }
@@ -232,7 +232,7 @@ fn empty_child(plan: &LogicalPlan) -> Result<Option<LogicalPlan>> {
                 if !empty.produce_one_row {
                     Ok(Some(LogicalPlan::EmptyRelation(EmptyRelation {
                         produce_one_row: false,
-                        schema: plan.schema().clone(),
+                        schema: Arc::clone(plan.schema()),
                     })))
                 } else {
                     Ok(None)
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 0a3bae154bd64..20e2ac07dffd8 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -652,7 +652,7 @@ impl OptimizerRule for PushDownFilter {
             return push_down_join(join, None);
         };
 
-        let plan_schema = plan.schema().clone();
+        let plan_schema = Arc::clone(plan.schema());
 
         let LogicalPlan::Filter(mut filter) = plan else {
             return Ok(Transformed::no(plan));
@@ -1498,7 +1498,7 @@ mod tests {
         let custom_plan = LogicalPlan::Extension(Extension {
             node: Arc::new(NoopPlan {
                 input: vec![table_scan.clone()],
-                schema: table_scan.schema().clone(),
+                schema: Arc::clone(table_scan.schema()),
             }),
         });
         let plan = LogicalPlanBuilder::from(custom_plan)
@@ -1514,7 +1514,7 @@ mod tests {
         let custom_plan = LogicalPlan::Extension(Extension {
             node: Arc::new(NoopPlan {
                 input: vec![table_scan.clone()],
-                schema: table_scan.schema().clone(),
+                schema: Arc::clone(table_scan.schema()),
             }),
         });
         let plan = LogicalPlanBuilder::from(custom_plan)
@@ -1531,7 +1531,7 @@ mod tests {
         let custom_plan = LogicalPlan::Extension(Extension {
             node: Arc::new(NoopPlan {
                 input: vec![table_scan.clone(), table_scan.clone()],
-                schema: table_scan.schema().clone(),
+                schema: Arc::clone(table_scan.schema()),
             }),
         });
         let plan = LogicalPlanBuilder::from(custom_plan)
@@ -1548,7 +1548,7 @@ mod tests {
         let custom_plan = LogicalPlan::Extension(Extension {
             node: Arc::new(NoopPlan {
                 input: vec![table_scan.clone(), table_scan.clone()],
-                schema: table_scan.schema().clone(),
+                schema: Arc::clone(table_scan.schema()),
             }),
         });
         let plan = LogicalPlanBuilder::from(custom_plan)
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 0333cc8dde368..35691847fb8e9 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -413,7 +413,7 @@ mod tests {
         let plan = LogicalPlanBuilder::from(scan_tpch_table("customer"))
             .filter(
                 lit(1)
-                    .lt(scalar_subquery(orders.clone()))
+                    .lt(scalar_subquery(Arc::clone(&orders)))
                     .and(lit(1).lt(scalar_subquery(orders))),
             )?
             .project(vec![col("customer.c_custkey")])?
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 36dd85ac96e1f..17855e17bef8b 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -1807,8 +1807,9 @@ mod tests {
     fn basic_coercion() {
         let schema = test_schema();
         let props = ExecutionProps::new();
-        let simplifier =
-            ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema.clone()));
+        let simplifier = ExprSimplifier::new(
+            SimplifyContext::new(&props).with_schema(Arc::clone(&schema)),
+        );
 
         // Note expr type is int32 (not int64)
         // (1i64 + 2i32) < i
@@ -3340,15 +3341,15 @@ mod tests {
         assert_eq!(
             simplify(in_list(
                 col("c1"),
-                vec![scalar_subquery(subquery.clone())],
+                vec![scalar_subquery(Arc::clone(&subquery))],
                 false
             )),
-            in_subquery(col("c1"), subquery.clone())
+            in_subquery(col("c1"), Arc::clone(&subquery))
         );
         assert_eq!(
             simplify(in_list(
                 col("c1"),
-                vec![scalar_subquery(subquery.clone())],
+                vec![scalar_subquery(Arc::clone(&subquery))],
                 true
             )),
             not_in_subquery(col("c1"), subquery)
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index 7c66d659cbaf5..f2b4abdd6cbd5 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -279,7 +279,7 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                                 let alias_str = format!("alias{}", index);
                                 inner_aggr_exprs.push(
                                     Expr::AggregateFunction(AggregateFunction::new_udf(
-                                        udf.clone(),
+                                        Arc::clone(&udf),
                                         args,
                                         false,
                                         None,
diff --git a/datafusion/optimizer/src/test/mod.rs b/datafusion/optimizer/src/test/mod.rs
index 2c7e8644026ed..4dccb42941dd0 100644
--- a/datafusion/optimizer/src/test/mod.rs
+++ b/datafusion/optimizer/src/test/mod.rs
@@ -176,7 +176,7 @@ pub fn assert_optimized_plan_eq(
     // Apply the rule once
     let opt_context = OptimizerContext::new().with_max_passes(1);
 
-    let optimizer = Optimizer::with_rules(vec![rule.clone()]);
+    let optimizer = Optimizer::with_rules(vec![Arc::clone(&rule)]);
     let optimized_plan = optimizer.optimize(plan, &opt_context, observe)?;
     let formatted_plan = format!("{optimized_plan:?}");
     assert_eq!(formatted_plan, expected);
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index de471d59c4660..3447082525597 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -832,7 +832,7 @@ mod tests {
 
     fn optimize_test(expr: Expr, schema: &DFSchemaRef) -> Expr {
         let mut expr_rewriter = UnwrapCastExprRewriter {
-            schema: schema.clone(),
+            schema: Arc::clone(schema),
         };
         expr.rewrite(&mut expr_rewriter).data().unwrap()
     }

From 9a8f8b7188fba8dcba52028443b90556aeff7f5f Mon Sep 17 00:00:00 2001
From: Matthew Cramerus <8771538+suremarc@users.noreply.github.com>
Date: Wed, 10 Jul 2024 08:14:28 -0500
Subject: [PATCH 012/357] fix: Fix eq properties regression from #10434
 (#11363)

* discover new orderings when constants are added

* more comments

* reduce nesting + describe argument

* lint?
---
 .../src/equivalence/properties.rs             | 183 +++++++++++-------
 1 file changed, 113 insertions(+), 70 deletions(-)

diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs
index d9d19c0bcf47e..8c327fbaf4098 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -223,56 +223,11 @@ impl EquivalenceProperties {
             }
         }
 
-        // Discover new valid orderings in light of the new equality. For a discussion, see:
-        // https://github.com/apache/datafusion/issues/9812
-        let mut new_orderings = vec![];
-        for ordering in self.normalized_oeq_class().iter() {
-            let expressions = if left.eq(&ordering[0].expr) {
-                // Left expression is leading ordering
-                Some((ordering[0].options, right))
-            } else if right.eq(&ordering[0].expr) {
-                // Right expression is leading ordering
-                Some((ordering[0].options, left))
-            } else {
-                None
-            };
-            if let Some((leading_ordering, other_expr)) = expressions {
-                // Currently, we only handle expressions with a single child.
-                // TODO: It should be possible to handle expressions orderings like
-                //       f(a, b, c), a, b, c if f is monotonic in all arguments.
-                // First expression after leading ordering
-                if let Some(next_expr) = ordering.get(1) {
-                    let children = other_expr.children();
-                    if children.len() == 1
-                        && children[0].eq(&next_expr.expr)
-                        && SortProperties::Ordered(leading_ordering)
-                            == other_expr
-                                .get_properties(&[ExprProperties {
-                                    sort_properties: SortProperties::Ordered(
-                                        leading_ordering,
-                                    ),
-                                    range: Interval::make_unbounded(
-                                        &other_expr.data_type(&self.schema)?,
-                                    )?,
-                                }])?
-                                .sort_properties
-                    {
-                        // Assume existing ordering is [a ASC, b ASC]
-                        // When equality a = f(b) is given, If we know that given ordering `[b ASC]`, ordering `[f(b) ASC]` is valid,
-                        // then we can deduce that ordering `[b ASC]` is also valid.
-                        // Hence, ordering `[b ASC]` can be added to the state as valid ordering.
-                        // (e.g. existing ordering where leading ordering is removed)
-                        new_orderings.push(ordering[1..].to_vec());
-                    }
-                }
-            }
-        }
-        if !new_orderings.is_empty() {
-            self.oeq_class.add_new_orderings(new_orderings);
-        }
-
         // Add equal expressions to the state
         self.eq_group.add_equal_conditions(left, right);
+
+        // Discover any new orderings
+        self.discover_new_orderings(left)?;
         Ok(())
     }
 
@@ -304,9 +259,78 @@ impl EquivalenceProperties {
                 self.constants.push(const_expr);
             }
         }
+
+        for ordering in self.normalized_oeq_class().iter() {
+            if let Err(e) = self.discover_new_orderings(&ordering[0].expr) {
+                log::debug!("error discovering new orderings: {e}");
+            }
+        }
+
         self
     }
 
+    // Discover new valid orderings in light of a new equality.
+    // Accepts a single argument (`expr`) which is used to determine
+    // which orderings should be updated.
+    // When constants or equivalence classes are changed, there may be new orderings
+    // that can be discovered with the new equivalence properties.
+    // For a discussion, see: https://github.com/apache/datafusion/issues/9812
+    fn discover_new_orderings(&mut self, expr: &Arc<dyn PhysicalExpr>) -> Result<()> {
+        let normalized_expr = self.eq_group().normalize_expr(Arc::clone(expr));
+        let eq_class = self
+            .eq_group
+            .classes
+            .iter()
+            .find_map(|class| {
+                class
+                    .contains(&normalized_expr)
+                    .then(|| class.clone().into_vec())
+            })
+            .unwrap_or_else(|| vec![Arc::clone(&normalized_expr)]);
+
+        let mut new_orderings: Vec<LexOrdering> = vec![];
+        for (ordering, next_expr) in self
+            .normalized_oeq_class()
+            .iter()
+            .filter(|ordering| ordering[0].expr.eq(&normalized_expr))
+            // First expression after leading ordering
+            .filter_map(|ordering| Some(ordering).zip(ordering.get(1)))
+        {
+            let leading_ordering = ordering[0].options;
+            // Currently, we only handle expressions with a single child.
+            // TODO: It should be possible to handle expressions orderings like
+            //       f(a, b, c), a, b, c if f is monotonic in all arguments.
+            for equivalent_expr in &eq_class {
+                let children = equivalent_expr.children();
+                if children.len() == 1
+                    && children[0].eq(&next_expr.expr)
+                    && SortProperties::Ordered(leading_ordering)
+                        == equivalent_expr
+                            .get_properties(&[ExprProperties {
+                                sort_properties: SortProperties::Ordered(
+                                    leading_ordering,
+                                ),
+                                range: Interval::make_unbounded(
+                                    &equivalent_expr.data_type(&self.schema)?,
+                                )?,
+                            }])?
+                            .sort_properties
+                {
+                    // Assume existing ordering is [a ASC, b ASC]
+                    // When equality a = f(b) is given, If we know that given ordering `[b ASC]`, ordering `[f(b) ASC]` is valid,
+                    // then we can deduce that ordering `[b ASC]` is also valid.
+                    // Hence, ordering `[b ASC]` can be added to the state as valid ordering.
+                    // (e.g. existing ordering where leading ordering is removed)
+                    new_orderings.push(ordering[1..].to_vec());
+                    break;
+                }
+            }
+        }
+
+        self.oeq_class.add_new_orderings(new_orderings);
+        Ok(())
+    }
+
     /// Updates the ordering equivalence group within assuming that the table
     /// is re-sorted according to the argument `sort_exprs`. Note that constants
     /// and equivalence classes are unchanged as they are unaffected by a re-sort.
@@ -2454,30 +2478,49 @@ mod tests {
         ];
 
         for case in cases {
-            let mut properties = base_properties
-                .clone()
-                .add_constants(case.constants.into_iter().map(ConstExpr::from));
-            for [left, right] in &case.equal_conditions {
-                properties.add_equal_conditions(left, right)?
-            }
-
-            let sort = case
-                .sort_columns
-                .iter()
-                .map(|&name| {
-                    col(name, &schema).map(|col| PhysicalSortExpr {
-                        expr: col,
-                        options: SortOptions::default(),
+            // Construct the equivalence properties in different orders
+            // to exercise different code paths
+            // (The resulting properties _should_ be the same)
+            for properties in [
+                // Equal conditions before constants
+                {
+                    let mut properties = base_properties.clone();
+                    for [left, right] in &case.equal_conditions {
+                        properties.add_equal_conditions(left, right)?
+                    }
+                    properties.add_constants(
+                        case.constants.iter().cloned().map(ConstExpr::from),
+                    )
+                },
+                // Constants before equal conditions
+                {
+                    let mut properties = base_properties.clone().add_constants(
+                        case.constants.iter().cloned().map(ConstExpr::from),
+                    );
+                    for [left, right] in &case.equal_conditions {
+                        properties.add_equal_conditions(left, right)?
+                    }
+                    properties
+                },
+            ] {
+                let sort = case
+                    .sort_columns
+                    .iter()
+                    .map(|&name| {
+                        col(name, &schema).map(|col| PhysicalSortExpr {
+                            expr: col,
+                            options: SortOptions::default(),
+                        })
                     })
-                })
-                .collect::<Result<Vec<_>>>()?;
+                    .collect::<Result<Vec<_>>>()?;
 
-            assert_eq!(
-                properties.ordering_satisfy(&sort),
-                case.should_satisfy_ordering,
-                "failed test '{}'",
-                case.name
-            );
+                assert_eq!(
+                    properties.ordering_satisfy(&sort),
+                    case.should_satisfy_ordering,
+                    "failed test '{}'",
+                    case.name
+                );
+            }
         }
 
         Ok(())

From d99002cf0b39843afb9c224bbb880d2266acefc6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 10 Jul 2024 09:24:55 -0400
Subject: [PATCH 013/357] Update termtree requirement from 0.4.1 to 0.5.0
 (#11383)

Updates the requirements on [termtree](https://github.com/rust-cli/termtree) to permit the latest version.
- [Changelog](https://github.com/rust-cli/termtree/blob/main/CHANGELOG.md)
- [Commits](https://github.com/rust-cli/termtree/compare/v0.4.1...v0.5.0)

---
updated-dependencies:
- dependency-name: termtree
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 datafusion/physical-plan/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 4292f95fe4061..f5f756417ebf8 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -66,7 +66,7 @@ tokio = { workspace = true }
 [dev-dependencies]
 rstest = { workspace = true }
 rstest_reuse = "0.7.0"
-termtree = "0.4.1"
+termtree = "0.5.0"
 tokio = { workspace = true, features = [
     "rt-multi-thread",
     "fs",

From b96186fdef1ff410663ec8fce41186c018f8e09a Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Wed, 10 Jul 2024 08:09:51 -0700
Subject: [PATCH 014/357] Introduce `resources_err!` error macro (#11374)

---
 datafusion/common/src/error.rs               | 3 +++
 datafusion/execution/src/disk_manager.rs     | 6 +++---
 datafusion/execution/src/memory_pool/pool.rs | 5 +++--
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index b1fdb652af481..9be662ca283e6 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -553,6 +553,9 @@ make_error!(config_err, config_datafusion_err, Configuration);
 // Exposes a macro to create `DataFusionError::Substrait` with optional backtrace
 make_error!(substrait_err, substrait_datafusion_err, Substrait);
 
+// Exposes a macro to create `DataFusionError::ResourcesExhausted` with optional backtrace
+make_error!(resources_err, resources_datafusion_err, ResourcesExhausted);
+
 // Exposes a macro to create `DataFusionError::SQL` with optional backtrace
 #[macro_export]
 macro_rules! sql_datafusion_err {
diff --git a/datafusion/execution/src/disk_manager.rs b/datafusion/execution/src/disk_manager.rs
index cca25c7c3e885..c98d7e5579f0f 100644
--- a/datafusion/execution/src/disk_manager.rs
+++ b/datafusion/execution/src/disk_manager.rs
@@ -18,7 +18,7 @@
 //! Manages files generated during query execution, files are
 //! hashed among the directories listed in RuntimeConfig::local_dirs.
 
-use datafusion_common::{DataFusionError, Result};
+use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
 use log::debug;
 use parking_lot::Mutex;
 use rand::{thread_rng, Rng};
@@ -119,9 +119,9 @@ impl DiskManager {
     ) -> Result<RefCountedTempFile> {
         let mut guard = self.local_dirs.lock();
         let local_dirs = guard.as_mut().ok_or_else(|| {
-            DataFusionError::ResourcesExhausted(format!(
+            resources_datafusion_err!(
                 "Memory Exhausted while {request_description} (DiskManager is disabled)"
-            ))
+            )
         })?;
 
         // Create a temporary directory if needed
diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs
index 4a491630fe205..fd7724f3076c4 100644
--- a/datafusion/execution/src/memory_pool/pool.rs
+++ b/datafusion/execution/src/memory_pool/pool.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
-use datafusion_common::{DataFusionError, Result};
+use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
 use log::debug;
 use parking_lot::Mutex;
 use std::sync::atomic::{AtomicUsize, Ordering};
@@ -231,12 +231,13 @@ impl MemoryPool for FairSpillPool {
     }
 }
 
+#[inline(always)]
 fn insufficient_capacity_err(
     reservation: &MemoryReservation,
     additional: usize,
     available: usize,
 ) -> DataFusionError {
-    DataFusionError::ResourcesExhausted(format!("Failed to allocate additional {} bytes for {} with {} bytes already allocated - maximum available is {}", additional, reservation.registration.consumer.name, reservation.size, available))
+    resources_datafusion_err!("Failed to allocate additional {} bytes for {} with {} bytes already allocated - maximum available is {}", additional, reservation.registration.consumer.name, reservation.size, available)
 }
 
 #[cfg(test)]

From 585504a31fd7d9a44c97f3f19af42bace08b8cc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Thu, 11 Jul 2024 00:58:53 +0800
Subject: [PATCH 015/357] Enable clone_on_ref_ptr clippy lint on common
 (#11384)

---
 datafusion/common-runtime/src/lib.rs          |  2 +
 datafusion/common/src/dfschema.rs             | 12 +--
 datafusion/common/src/hash_utils.rs           | 19 ++--
 datafusion/common/src/lib.rs                  |  2 +
 datafusion/common/src/scalar/mod.rs           | 97 ++++++++++---------
 .../common/src/scalar/struct_builder.rs       |  2 +-
 datafusion/common/src/utils/mod.rs            |  5 +-
 7 files changed, 77 insertions(+), 62 deletions(-)

diff --git a/datafusion/common-runtime/src/lib.rs b/datafusion/common-runtime/src/lib.rs
index e8624163f2240..8145bb110464e 100644
--- a/datafusion/common-runtime/src/lib.rs
+++ b/datafusion/common-runtime/src/lib.rs
@@ -14,6 +14,8 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
 pub mod common;
 
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 3c2cc89fc0142..7598cbc4d86a0 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -211,7 +211,7 @@ impl DFSchema {
         schema: &SchemaRef,
     ) -> Result<Self> {
         let dfschema = Self {
-            inner: schema.clone(),
+            inner: Arc::clone(schema),
             field_qualifiers: qualifiers,
             functional_dependencies: FunctionalDependencies::empty(),
         };
@@ -311,7 +311,7 @@ impl DFSchema {
             };
             if !duplicated_field {
                 // self.inner.fields.push(field.clone());
-                schema_builder.push(field.clone());
+                schema_builder.push(Arc::clone(field));
                 qualifiers.push(qualifier.cloned());
             }
         }
@@ -1276,7 +1276,7 @@ mod tests {
         let arrow_schema_ref = Arc::new(arrow_schema.clone());
 
         let df_schema = DFSchema {
-            inner: arrow_schema_ref.clone(),
+            inner: Arc::clone(&arrow_schema_ref),
             field_qualifiers: vec![None; arrow_schema_ref.fields.len()],
             functional_dependencies: FunctionalDependencies::empty(),
         };
@@ -1284,7 +1284,7 @@ mod tests {
 
         {
             let arrow_schema = arrow_schema.clone();
-            let arrow_schema_ref = arrow_schema_ref.clone();
+            let arrow_schema_ref = Arc::clone(&arrow_schema_ref);
 
             assert_eq!(df_schema, arrow_schema.to_dfschema().unwrap());
             assert_eq!(df_schema, arrow_schema_ref.to_dfschema().unwrap());
@@ -1292,7 +1292,7 @@ mod tests {
 
         {
             let arrow_schema = arrow_schema.clone();
-            let arrow_schema_ref = arrow_schema_ref.clone();
+            let arrow_schema_ref = Arc::clone(&arrow_schema_ref);
 
             assert_eq!(df_schema_ref, arrow_schema.to_dfschema_ref().unwrap());
             assert_eq!(df_schema_ref, arrow_schema_ref.to_dfschema_ref().unwrap());
@@ -1322,7 +1322,7 @@ mod tests {
         let schema = Arc::new(Schema::new(vec![a_field, b_field]));
 
         let df_schema = DFSchema {
-            inner: schema.clone(),
+            inner: Arc::clone(&schema),
             field_qualifiers: vec![None; schema.fields.len()],
             functional_dependencies: FunctionalDependencies::empty(),
         };
diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs
index c972536c4d23e..c8adae34f6455 100644
--- a/datafusion/common/src/hash_utils.rs
+++ b/datafusion/common/src/hash_utils.rs
@@ -244,7 +244,7 @@ fn hash_list_array<OffsetSize>(
 where
     OffsetSize: OffsetSizeTrait,
 {
-    let values = array.values().clone();
+    let values = Arc::clone(array.values());
     let offsets = array.value_offsets();
     let nulls = array.nulls();
     let mut values_hashes = vec![0u64; values.len()];
@@ -274,7 +274,7 @@ fn hash_fixed_list_array(
     random_state: &RandomState,
     hashes_buffer: &mut [u64],
 ) -> Result<()> {
-    let values = array.values().clone();
+    let values = Arc::clone(array.values());
     let value_len = array.value_length();
     let offset_size = value_len as usize / array.len();
     let nulls = array.nulls();
@@ -622,19 +622,19 @@ mod tests {
             vec![
                 (
                     Arc::new(Field::new("bool", DataType::Boolean, false)),
-                    boolarr.clone() as ArrayRef,
+                    Arc::clone(&boolarr) as ArrayRef,
                 ),
                 (
                     Arc::new(Field::new("i32", DataType::Int32, false)),
-                    i32arr.clone() as ArrayRef,
+                    Arc::clone(&i32arr) as ArrayRef,
                 ),
                 (
                     Arc::new(Field::new("i32", DataType::Int32, false)),
-                    i32arr.clone() as ArrayRef,
+                    Arc::clone(&i32arr) as ArrayRef,
                 ),
                 (
                     Arc::new(Field::new("bool", DataType::Boolean, false)),
-                    boolarr.clone() as ArrayRef,
+                    Arc::clone(&boolarr) as ArrayRef,
                 ),
             ],
             Buffer::from(&[0b001011]),
@@ -710,7 +710,12 @@ mod tests {
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
 
         let mut one_col_hashes = vec![0; strings1.len()];
-        create_hashes(&[dict_array.clone()], &random_state, &mut one_col_hashes).unwrap();
+        create_hashes(
+            &[Arc::clone(&dict_array) as ArrayRef],
+            &random_state,
+            &mut one_col_hashes,
+        )
+        .unwrap();
 
         let mut two_col_hashes = vec![0; strings1.len()];
         create_hashes(
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index c275152642f0e..8cd64e7d16a26 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -14,6 +14,8 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
 mod column;
 mod dfschema;
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 26e03a3b9893e..c8f21788cbbdf 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -1758,8 +1758,11 @@ impl ScalarValue {
                 if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
                     for array in arrays.iter_mut() {
                         if array.is_null(0) {
-                            *array =
-                                Arc::new(FixedSizeListArray::new_null(f.clone(), l, 1));
+                            *array = Arc::new(FixedSizeListArray::new_null(
+                                Arc::clone(&f),
+                                l,
+                                1,
+                            ));
                         }
                     }
                 }
@@ -3298,16 +3301,16 @@ impl TryFrom<&DataType> for ScalarValue {
             ),
             // `ScalaValue::List` contains single element `ListArray`.
             DataType::List(field_ref) => ScalarValue::List(Arc::new(
-                GenericListArray::new_null(field_ref.clone(), 1),
+                GenericListArray::new_null(Arc::clone(field_ref), 1),
             )),
             // `ScalarValue::LargeList` contains single element `LargeListArray`.
             DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
-                GenericListArray::new_null(field_ref.clone(), 1),
+                GenericListArray::new_null(Arc::clone(field_ref), 1),
             )),
             // `ScalaValue::FixedSizeList` contains single element `FixedSizeList`.
             DataType::FixedSizeList(field_ref, fixed_length) => {
                 ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
-                    field_ref.clone(),
+                    Arc::clone(field_ref),
                     *fixed_length,
                     1,
                 )))
@@ -3746,11 +3749,11 @@ mod tests {
         let expected = StructArray::from(vec![
             (
                 Arc::new(Field::new("b", DataType::Boolean, false)),
-                boolean.clone() as ArrayRef,
+                Arc::clone(&boolean) as ArrayRef,
             ),
             (
                 Arc::new(Field::new("c", DataType::Int32, false)),
-                int.clone() as ArrayRef,
+                Arc::clone(&int) as ArrayRef,
             ),
         ]);
 
@@ -3792,11 +3795,11 @@ mod tests {
         let struct_array = StructArray::from(vec![
             (
                 Arc::new(Field::new("b", DataType::Boolean, false)),
-                boolean.clone() as ArrayRef,
+                Arc::clone(&boolean) as ArrayRef,
             ),
             (
                 Arc::new(Field::new("c", DataType::Int32, false)),
-                int.clone() as ArrayRef,
+                Arc::clone(&int) as ArrayRef,
             ),
         ]);
         let sv = ScalarValue::Struct(Arc::new(struct_array));
@@ -3810,11 +3813,11 @@ mod tests {
         let struct_array = StructArray::from(vec![
             (
                 Arc::new(Field::new("b", DataType::Boolean, false)),
-                boolean.clone() as ArrayRef,
+                Arc::clone(&boolean) as ArrayRef,
             ),
             (
                 Arc::new(Field::new("c", DataType::Int32, false)),
-                int.clone() as ArrayRef,
+                Arc::clone(&int) as ArrayRef,
             ),
         ]);
 
@@ -3846,7 +3849,7 @@ mod tests {
     fn test_to_array_of_size_for_fsl() {
         let values = Int32Array::from_iter([Some(1), None, Some(2)]);
         let field = Arc::new(Field::new("item", DataType::Int32, true));
-        let arr = FixedSizeListArray::new(field.clone(), 3, Arc::new(values), None);
+        let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
         let sv = ScalarValue::FixedSizeList(Arc::new(arr));
         let actual_arr = sv
             .to_array_of_size(2)
@@ -3932,13 +3935,13 @@ mod tests {
     fn test_iter_to_array_fixed_size_list() {
         let field = Arc::new(Field::new("item", DataType::Int32, true));
         let f1 = Arc::new(FixedSizeListArray::new(
-            field.clone(),
+            Arc::clone(&field),
             3,
             Arc::new(Int32Array::from(vec![1, 2, 3])),
             None,
         ));
         let f2 = Arc::new(FixedSizeListArray::new(
-            field.clone(),
+            Arc::clone(&field),
             3,
             Arc::new(Int32Array::from(vec![4, 5, 6])),
             None,
@@ -3946,7 +3949,7 @@ mod tests {
         let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
 
         let scalars = vec![
-            ScalarValue::FixedSizeList(f_nulls.clone()),
+            ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
             ScalarValue::FixedSizeList(f1),
             ScalarValue::FixedSizeList(f2),
             ScalarValue::FixedSizeList(f_nulls),
@@ -4780,7 +4783,7 @@ mod tests {
         let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
 
         // Test for List
-        let data_type = &DataType::List(inner_field.clone());
+        let data_type = &DataType::List(Arc::clone(&inner_field));
         let scalar: ScalarValue = data_type.try_into().unwrap();
         let expected = ScalarValue::List(
             new_null_array(data_type, 1)
@@ -4792,7 +4795,7 @@ mod tests {
         assert!(expected.is_null());
 
         // Test for LargeList
-        let data_type = &DataType::LargeList(inner_field.clone());
+        let data_type = &DataType::LargeList(Arc::clone(&inner_field));
         let scalar: ScalarValue = data_type.try_into().unwrap();
         let expected = ScalarValue::LargeList(
             new_null_array(data_type, 1)
@@ -4804,7 +4807,7 @@ mod tests {
         assert!(expected.is_null());
 
         // Test for FixedSizeList(5)
-        let data_type = &DataType::FixedSizeList(inner_field.clone(), 5);
+        let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
         let scalar: ScalarValue = data_type.try_into().unwrap();
         let expected = ScalarValue::FixedSizeList(
             new_null_array(data_type, 1)
@@ -5212,35 +5215,35 @@ mod tests {
         let field_f = Arc::new(Field::new("f", DataType::Int64, false));
         let field_d = Arc::new(Field::new(
             "D",
-            DataType::Struct(vec![field_e.clone(), field_f.clone()].into()),
+            DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
             false,
         ));
 
         let struct_array = StructArray::from(vec![
             (
-                field_e.clone(),
+                Arc::clone(&field_e),
                 Arc::new(Int16Array::from(vec![2])) as ArrayRef,
             ),
             (
-                field_f.clone(),
+                Arc::clone(&field_f),
                 Arc::new(Int64Array::from(vec![3])) as ArrayRef,
             ),
         ]);
 
         let struct_array = StructArray::from(vec![
             (
-                field_a.clone(),
+                Arc::clone(&field_a),
                 Arc::new(Int32Array::from(vec![23])) as ArrayRef,
             ),
             (
-                field_b.clone(),
+                Arc::clone(&field_b),
                 Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
             ),
             (
-                field_c.clone(),
+                Arc::clone(&field_c),
                 Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
             ),
-            (field_d.clone(), Arc::new(struct_array) as ArrayRef),
+            (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
         ]);
         let scalar = ScalarValue::Struct(Arc::new(struct_array));
 
@@ -5250,26 +5253,26 @@ mod tests {
 
         let expected = Arc::new(StructArray::from(vec![
             (
-                field_a.clone(),
+                Arc::clone(&field_a),
                 Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
             ),
             (
-                field_b.clone(),
+                Arc::clone(&field_b),
                 Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
             ),
             (
-                field_c.clone(),
+                Arc::clone(&field_c),
                 Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
             ),
             (
-                field_d.clone(),
+                Arc::clone(&field_d),
                 Arc::new(StructArray::from(vec![
                     (
-                        field_e.clone(),
+                        Arc::clone(&field_e),
                         Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
                     ),
                     (
-                        field_f.clone(),
+                        Arc::clone(&field_f),
                         Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
                     ),
                 ])) as ArrayRef,
@@ -5348,26 +5351,26 @@ mod tests {
 
         let expected = Arc::new(StructArray::from(vec![
             (
-                field_a.clone(),
+                Arc::clone(&field_a),
                 Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
             ),
             (
-                field_b.clone(),
+                Arc::clone(&field_b),
                 Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
             ),
             (
-                field_c.clone(),
+                Arc::clone(&field_c),
                 Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
             ),
             (
-                field_d.clone(),
+                Arc::clone(&field_d),
                 Arc::new(StructArray::from(vec![
                     (
-                        field_e.clone(),
+                        Arc::clone(&field_e),
                         Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
                     ),
                     (
-                        field_f.clone(),
+                        Arc::clone(&field_f),
                         Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
                     ),
                 ])) as ArrayRef,
@@ -5431,11 +5434,11 @@ mod tests {
         let array = as_struct_array(&array).unwrap();
         let expected = StructArray::from(vec![
             (
-                field_a.clone(),
+                Arc::clone(&field_a),
                 Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
             ),
             (
-                field_primitive_list.clone(),
+                Arc::clone(&field_primitive_list),
                 Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
                     Some(vec![Some(1), Some(2), Some(3)]),
                     Some(vec![Some(4), Some(5)]),
@@ -6195,18 +6198,18 @@ mod tests {
 
         let struct_value = vec![
             (
-                fields[0].clone(),
+                Arc::clone(&fields[0]),
                 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
             ),
             (
-                fields[1].clone(),
+                Arc::clone(&fields[1]),
                 Arc::new(StructArray::from(vec![
                     (
-                        fields_b[0].clone(),
+                        Arc::clone(&fields_b[0]),
                         Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
                     ),
                     (
-                        fields_b[1].clone(),
+                        Arc::clone(&fields_b[1]),
                         Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
                     ),
                 ])) as ArrayRef,
@@ -6215,19 +6218,19 @@ mod tests {
 
         let struct_value_with_nulls = vec![
             (
-                fields[0].clone(),
+                Arc::clone(&fields[0]),
                 Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
             ),
             (
-                fields[1].clone(),
+                Arc::clone(&fields[1]),
                 Arc::new(StructArray::from((
                     vec![
                         (
-                            fields_b[0].clone(),
+                            Arc::clone(&fields_b[0]),
                             Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
                         ),
                         (
-                            fields_b[1].clone(),
+                            Arc::clone(&fields_b[1]),
                             Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
                         ),
                     ],
diff --git a/datafusion/common/src/scalar/struct_builder.rs b/datafusion/common/src/scalar/struct_builder.rs
index b1a34e4a61d01..4a6a8f0289a7d 100644
--- a/datafusion/common/src/scalar/struct_builder.rs
+++ b/datafusion/common/src/scalar/struct_builder.rs
@@ -144,7 +144,7 @@ impl IntoFieldRef for FieldRef {
 
 impl IntoFieldRef for &FieldRef {
     fn into_field_ref(self) -> FieldRef {
-        self.clone()
+        Arc::clone(self)
     }
 }
 
diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index dd7b80333cf81..8264b48725929 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -245,7 +245,10 @@ pub fn evaluate_partition_ranges(
             end: num_rows,
         }]
     } else {
-        let cols: Vec<_> = partition_columns.iter().map(|x| x.values.clone()).collect();
+        let cols: Vec<_> = partition_columns
+            .iter()
+            .map(|x| Arc::clone(&x.values))
+            .collect();
         partition(&cols)?.ranges()
     })
 }

From 6038f4cfac536dbb54ea2761828f7344a23b94f0 Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Wed, 10 Jul 2024 11:21:01 -0700
Subject: [PATCH 016/357] Track parquet writer encoding memory usage on
 MemoryPool (#11345)

* feat(11344): track memory used for non-parallel writes

* feat(11344): track memory usage during parallel writes

* test(11344): create bounded stream for testing

* test(11344): test ParquetSink memory reservation

* feat(11344): track bytes in file writer

* refactor(11344): tweak the ordering to add col bytes to rg_reservation, before selecting shrinking for data bytes flushed

* refactor: move each col_reservation and rg_reservation to match the parallelized call stack for col vs rg

* test(11344): add memory_limit enforcement test for parquet sink

* chore: cleanup to remove unnecessary reservation management steps

* fix: fix CI test failure due to file extension rename
---
 .../src/datasource/file_format/parquet.rs     | 165 ++++++++++++++++--
 datafusion/core/src/test_util/mod.rs          |  36 ++++
 datafusion/core/tests/memory_limit/mod.rs     |  25 +++
 3 files changed, 216 insertions(+), 10 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 27d783cd89b5f..694c949285374 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -48,6 +48,7 @@ use datafusion_common::{
     DEFAULT_PARQUET_EXTENSION,
 };
 use datafusion_common_runtime::SpawnedTask;
+use datafusion_execution::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::expressions::{MaxAccumulator, MinAccumulator};
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
@@ -749,9 +750,13 @@ impl DataSink for ParquetSink {
                         parquet_props.writer_options().clone(),
                     )
                     .await?;
+                let mut reservation =
+                    MemoryConsumer::new(format!("ParquetSink[{}]", path))
+                        .register(context.memory_pool());
                 file_write_tasks.spawn(async move {
                     while let Some(batch) = rx.recv().await {
                         writer.write(&batch).await?;
+                        reservation.try_resize(writer.memory_size())?;
                     }
                     let file_metadata = writer
                         .close()
@@ -771,6 +776,7 @@ impl DataSink for ParquetSink {
                 let schema = self.get_writer_schema();
                 let props = parquet_props.clone();
                 let parallel_options_clone = parallel_options.clone();
+                let pool = Arc::clone(context.memory_pool());
                 file_write_tasks.spawn(async move {
                     let file_metadata = output_single_parquet_file_parallelized(
                         writer,
@@ -778,6 +784,7 @@ impl DataSink for ParquetSink {
                         schema,
                         props.writer_options(),
                         parallel_options_clone,
+                        pool,
                     )
                     .await?;
                     Ok((path, file_metadata))
@@ -818,14 +825,16 @@ impl DataSink for ParquetSink {
 async fn column_serializer_task(
     mut rx: Receiver<ArrowLeafColumn>,
     mut writer: ArrowColumnWriter,
-) -> Result<ArrowColumnWriter> {
+    mut reservation: MemoryReservation,
+) -> Result<(ArrowColumnWriter, MemoryReservation)> {
     while let Some(col) = rx.recv().await {
         writer.write(&col)?;
+        reservation.try_resize(writer.memory_size())?;
     }
-    Ok(writer)
+    Ok((writer, reservation))
 }
 
-type ColumnWriterTask = SpawnedTask<Result<ArrowColumnWriter>>;
+type ColumnWriterTask = SpawnedTask<Result<(ArrowColumnWriter, MemoryReservation)>>;
 type ColSender = Sender<ArrowLeafColumn>;
 
 /// Spawns a parallel serialization task for each column
@@ -835,6 +844,7 @@ fn spawn_column_parallel_row_group_writer(
     schema: Arc<Schema>,
     parquet_props: Arc<WriterProperties>,
     max_buffer_size: usize,
+    pool: &Arc<dyn MemoryPool>,
 ) -> Result<(Vec<ColumnWriterTask>, Vec<ColSender>)> {
     let schema_desc = arrow_to_parquet_schema(&schema)?;
     let col_writers = get_column_writers(&schema_desc, &parquet_props, &schema)?;
@@ -848,7 +858,13 @@ fn spawn_column_parallel_row_group_writer(
             mpsc::channel::<ArrowLeafColumn>(max_buffer_size);
         col_array_channels.push(send_array);
 
-        let task = SpawnedTask::spawn(column_serializer_task(recieve_array, writer));
+        let reservation =
+            MemoryConsumer::new("ParquetSink(ArrowColumnWriter)").register(pool);
+        let task = SpawnedTask::spawn(column_serializer_task(
+            recieve_array,
+            writer,
+            reservation,
+        ));
         col_writer_tasks.push(task);
     }
 
@@ -864,7 +880,7 @@ struct ParallelParquetWriterOptions {
 
 /// This is the return type of calling [ArrowColumnWriter].close() on each column
 /// i.e. the Vec of encoded columns which can be appended to a row group
-type RBStreamSerializeResult = Result<(Vec<ArrowColumnChunk>, usize)>;
+type RBStreamSerializeResult = Result<(Vec<ArrowColumnChunk>, MemoryReservation, usize)>;
 
 /// Sends the ArrowArrays in passed [RecordBatch] through the channels to their respective
 /// parallel column serializers.
@@ -895,16 +911,22 @@ async fn send_arrays_to_col_writers(
 fn spawn_rg_join_and_finalize_task(
     column_writer_tasks: Vec<ColumnWriterTask>,
     rg_rows: usize,
+    pool: &Arc<dyn MemoryPool>,
 ) -> SpawnedTask<RBStreamSerializeResult> {
+    let mut rg_reservation =
+        MemoryConsumer::new("ParquetSink(SerializedRowGroupWriter)").register(pool);
+
     SpawnedTask::spawn(async move {
         let num_cols = column_writer_tasks.len();
         let mut finalized_rg = Vec::with_capacity(num_cols);
         for task in column_writer_tasks.into_iter() {
-            let writer = task.join_unwind().await?;
+            let (writer, _col_reservation) = task.join_unwind().await?;
+            let encoded_size = writer.get_estimated_total_bytes();
+            rg_reservation.grow(encoded_size);
             finalized_rg.push(writer.close()?);
         }
 
-        Ok((finalized_rg, rg_rows))
+        Ok((finalized_rg, rg_reservation, rg_rows))
     })
 }
 
@@ -922,6 +944,7 @@ fn spawn_parquet_parallel_serialization_task(
     schema: Arc<Schema>,
     writer_props: Arc<WriterProperties>,
     parallel_options: ParallelParquetWriterOptions,
+    pool: Arc<dyn MemoryPool>,
 ) -> SpawnedTask<Result<(), DataFusionError>> {
     SpawnedTask::spawn(async move {
         let max_buffer_rb = parallel_options.max_buffered_record_batches_per_stream;
@@ -931,6 +954,7 @@ fn spawn_parquet_parallel_serialization_task(
                 schema.clone(),
                 writer_props.clone(),
                 max_buffer_rb,
+                &pool,
             )?;
         let mut current_rg_rows = 0;
 
@@ -957,6 +981,7 @@ fn spawn_parquet_parallel_serialization_task(
                     let finalize_rg_task = spawn_rg_join_and_finalize_task(
                         column_writer_handles,
                         max_row_group_rows,
+                        &pool,
                     );
 
                     serialize_tx.send(finalize_rg_task).await.map_err(|_| {
@@ -973,6 +998,7 @@ fn spawn_parquet_parallel_serialization_task(
                             schema.clone(),
                             writer_props.clone(),
                             max_buffer_rb,
+                            &pool,
                         )?;
                 }
             }
@@ -981,8 +1007,11 @@ fn spawn_parquet_parallel_serialization_task(
         drop(col_array_channels);
         // Handle leftover rows as final rowgroup, which may be smaller than max_row_group_rows
         if current_rg_rows > 0 {
-            let finalize_rg_task =
-                spawn_rg_join_and_finalize_task(column_writer_handles, current_rg_rows);
+            let finalize_rg_task = spawn_rg_join_and_finalize_task(
+                column_writer_handles,
+                current_rg_rows,
+                &pool,
+            );
 
             serialize_tx.send(finalize_rg_task).await.map_err(|_| {
                 DataFusionError::Internal(
@@ -1002,9 +1031,13 @@ async fn concatenate_parallel_row_groups(
     schema: Arc<Schema>,
     writer_props: Arc<WriterProperties>,
     mut object_store_writer: Box<dyn AsyncWrite + Send + Unpin>,
+    pool: Arc<dyn MemoryPool>,
 ) -> Result<FileMetaData> {
     let merged_buff = SharedBuffer::new(INITIAL_BUFFER_BYTES);
 
+    let mut file_reservation =
+        MemoryConsumer::new("ParquetSink(SerializedFileWriter)").register(&pool);
+
     let schema_desc = arrow_to_parquet_schema(schema.as_ref())?;
     let mut parquet_writer = SerializedFileWriter::new(
         merged_buff.clone(),
@@ -1015,15 +1048,20 @@ async fn concatenate_parallel_row_groups(
     while let Some(task) = serialize_rx.recv().await {
         let result = task.join_unwind().await;
         let mut rg_out = parquet_writer.next_row_group()?;
-        let (serialized_columns, _cnt) = result?;
+        let (serialized_columns, mut rg_reservation, _cnt) = result?;
         for chunk in serialized_columns {
             chunk.append_to_row_group(&mut rg_out)?;
+            rg_reservation.free();
+
             let mut buff_to_flush = merged_buff.buffer.try_lock().unwrap();
+            file_reservation.try_resize(buff_to_flush.len())?;
+
             if buff_to_flush.len() > BUFFER_FLUSH_BYTES {
                 object_store_writer
                     .write_all(buff_to_flush.as_slice())
                     .await?;
                 buff_to_flush.clear();
+                file_reservation.try_resize(buff_to_flush.len())?; // will set to zero
             }
         }
         rg_out.close()?;
@@ -1034,6 +1072,7 @@ async fn concatenate_parallel_row_groups(
 
     object_store_writer.write_all(final_buff.as_slice()).await?;
     object_store_writer.shutdown().await?;
+    file_reservation.free();
 
     Ok(file_metadata)
 }
@@ -1048,6 +1087,7 @@ async fn output_single_parquet_file_parallelized(
     output_schema: Arc<Schema>,
     parquet_props: &WriterProperties,
     parallel_options: ParallelParquetWriterOptions,
+    pool: Arc<dyn MemoryPool>,
 ) -> Result<FileMetaData> {
     let max_rowgroups = parallel_options.max_parallel_row_groups;
     // Buffer size of this channel limits maximum number of RowGroups being worked on in parallel
@@ -1061,12 +1101,14 @@ async fn output_single_parquet_file_parallelized(
         output_schema.clone(),
         arc_props.clone(),
         parallel_options,
+        Arc::clone(&pool),
     );
     let file_metadata = concatenate_parallel_row_groups(
         serialize_rx,
         output_schema.clone(),
         arc_props.clone(),
         object_store_writer,
+        pool,
     )
     .await?;
 
@@ -1158,8 +1200,10 @@ mod tests {
     use super::super::test_util::scan_format;
     use crate::datasource::listing::{ListingTableUrl, PartitionedFile};
     use crate::physical_plan::collect;
+    use crate::test_util::bounded_stream;
     use std::fmt::{Display, Formatter};
     use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::time::Duration;
 
     use super::*;
 
@@ -2177,4 +2221,105 @@ mod tests {
 
         Ok(())
     }
+
+    #[tokio::test]
+    async fn parquet_sink_write_memory_reservation() -> Result<()> {
+        async fn test_memory_reservation(global: ParquetOptions) -> Result<()> {
+            let field_a = Field::new("a", DataType::Utf8, false);
+            let field_b = Field::new("b", DataType::Utf8, false);
+            let schema = Arc::new(Schema::new(vec![field_a, field_b]));
+            let object_store_url = ObjectStoreUrl::local_filesystem();
+
+            let file_sink_config = FileSinkConfig {
+                object_store_url: object_store_url.clone(),
+                file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)],
+                table_paths: vec![ListingTableUrl::parse("file:///")?],
+                output_schema: schema.clone(),
+                table_partition_cols: vec![],
+                overwrite: true,
+                keep_partition_by_columns: false,
+            };
+            let parquet_sink = Arc::new(ParquetSink::new(
+                file_sink_config,
+                TableParquetOptions {
+                    key_value_metadata: std::collections::HashMap::from([
+                        ("my-data".to_string(), Some("stuff".to_string())),
+                        ("my-data-bool-key".to_string(), None),
+                    ]),
+                    global,
+                    ..Default::default()
+                },
+            ));
+
+            // create data
+            let col_a: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar"]));
+            let col_b: ArrayRef = Arc::new(StringArray::from(vec!["baz", "baz"]));
+            let batch =
+                RecordBatch::try_from_iter(vec![("a", col_a), ("b", col_b)]).unwrap();
+
+            // create task context
+            let task_context = build_ctx(object_store_url.as_ref());
+            assert_eq!(
+                task_context.memory_pool().reserved(),
+                0,
+                "no bytes are reserved yet"
+            );
+
+            let mut write_task = parquet_sink.write_all(
+                Box::pin(RecordBatchStreamAdapter::new(
+                    schema,
+                    bounded_stream(batch, 1000),
+                )),
+                &task_context,
+            );
+
+            // incrementally poll and check for memory reservation
+            let mut reserved_bytes = 0;
+            while futures::poll!(&mut write_task).is_pending() {
+                reserved_bytes += task_context.memory_pool().reserved();
+                tokio::time::sleep(Duration::from_micros(1)).await;
+            }
+            assert!(
+                reserved_bytes > 0,
+                "should have bytes reserved during write"
+            );
+            assert_eq!(
+                task_context.memory_pool().reserved(),
+                0,
+                "no leaking byte reservation"
+            );
+
+            Ok(())
+        }
+
+        let write_opts = ParquetOptions {
+            allow_single_file_parallelism: false,
+            ..Default::default()
+        };
+        test_memory_reservation(write_opts)
+            .await
+            .expect("should track for non-parallel writes");
+
+        let row_parallel_write_opts = ParquetOptions {
+            allow_single_file_parallelism: true,
+            maximum_parallel_row_group_writers: 10,
+            maximum_buffered_record_batches_per_stream: 1,
+            ..Default::default()
+        };
+        test_memory_reservation(row_parallel_write_opts)
+            .await
+            .expect("should track for row-parallel writes");
+
+        let col_parallel_write_opts = ParquetOptions {
+            allow_single_file_parallelism: true,
+            maximum_parallel_row_group_writers: 1,
+            maximum_buffered_record_batches_per_stream: 2,
+            ..Default::default()
+        };
+        test_memory_reservation(col_parallel_write_opts)
+            .await
+            .expect("should track for column-parallel writes");
+
+        Ok(())
+    }
 }
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index 059fa8fc6da77..ba0509f3f51ac 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -366,3 +366,39 @@ pub fn register_unbounded_file_with_ordering(
     ctx.register_table(table_name, Arc::new(StreamTable::new(Arc::new(config))))?;
     Ok(())
 }
+
+struct BoundedStream {
+    limit: usize,
+    count: usize,
+    batch: RecordBatch,
+}
+
+impl Stream for BoundedStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(
+        mut self: Pin<&mut Self>,
+        _cx: &mut Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        if self.count >= self.limit {
+            return Poll::Ready(None);
+        }
+        self.count += 1;
+        Poll::Ready(Some(Ok(self.batch.clone())))
+    }
+}
+
+impl RecordBatchStream for BoundedStream {
+    fn schema(&self) -> SchemaRef {
+        self.batch.schema()
+    }
+}
+
+/// Creates an bounded stream for testing purposes.
+pub fn bounded_stream(batch: RecordBatch, limit: usize) -> SendableRecordBatchStream {
+    Box::pin(BoundedStream {
+        count: 0,
+        limit,
+        batch,
+    })
+}
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index f61ee5d9ab984..f7402357d1c76 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -31,6 +31,7 @@ use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
 use futures::StreamExt;
 use std::any::Any;
 use std::sync::{Arc, OnceLock};
+use tokio::fs::File;
 
 use datafusion::datasource::streaming::StreamingTable;
 use datafusion::datasource::{MemTable, TableProvider};
@@ -323,6 +324,30 @@ async fn oom_recursive_cte() {
         .await
 }
 
+#[tokio::test]
+async fn oom_parquet_sink() {
+    let dir = tempfile::tempdir().unwrap();
+    let path = dir.into_path().join("test.parquet");
+    let _ = File::create(path.clone()).await.unwrap();
+
+    TestCase::new()
+        .with_query(format!(
+            "
+            COPY (select * from t)
+            TO '{}'
+            STORED AS PARQUET OPTIONS (compression 'uncompressed');
+        ",
+            path.to_string_lossy()
+        ))
+        .with_expected_errors(vec![
+            // TODO: update error handling in ParquetSink
+            "Unable to send array to writer!",
+        ])
+        .with_memory_limit(200_000)
+        .run()
+        .await
+}
+
 /// Run the query with the specified memory limit,
 /// and verifies the expected errors are returned
 #[derive(Clone, Debug)]

From 32cb3c5a54bd0297d473792c8a3b0e7fd51c2e3b Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 10 Jul 2024 14:21:44 -0400
Subject: [PATCH 017/357] Minor: remove clones and unnecessary Arcs in
 `from_substrait_rex` (#11337)

---
 .../substrait/src/logical_plan/consumer.rs    | 146 +++++++-----------
 1 file changed, 59 insertions(+), 87 deletions(-)

diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 89a6dde51e42c..a4f7242024754 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -411,11 +411,11 @@ pub async fn from_substrait_rel(
                         from_substrait_rex(ctx, e, input.clone().schema(), extensions)
                             .await?;
                     // if the expression is WindowFunction, wrap in a Window relation
-                    if let Expr::WindowFunction(_) = x.as_ref() {
+                    if let Expr::WindowFunction(_) = &x {
                         // Adding the same expression here and in the project below
                         // works because the project's builder uses columnize_expr(..)
                         // to transform it into a column reference
-                        input = input.window(vec![x.as_ref().clone()])?
+                        input = input.window(vec![x.clone()])?
                     }
                     // Ensure the expression has a unique display name, so that project's
                     // validate_unique_names doesn't fail
@@ -426,12 +426,12 @@ pub async fn from_substrait_rel(
                         new_name = format!("{}__temp__{}", name, i);
                         i += 1;
                     }
-                    names.insert(new_name.clone());
                     if new_name != name {
-                        exprs.push(x.as_ref().clone().alias(new_name.clone()));
+                        exprs.push(x.alias(new_name.clone()));
                     } else {
-                        exprs.push(x.as_ref().clone());
+                        exprs.push(x);
                     }
+                    names.insert(new_name);
                 }
                 input.project(exprs)?.build()
             } else {
@@ -447,7 +447,7 @@ pub async fn from_substrait_rel(
                     let expr =
                         from_substrait_rex(ctx, condition, input.schema(), extensions)
                             .await?;
-                    input.filter(expr.as_ref().clone())?.build()
+                    input.filter(expr)?.build()
                 } else {
                     not_impl_err!("Filter without an condition is not valid")
                 }
@@ -499,7 +499,7 @@ pub async fn from_substrait_rel(
                             let x =
                                 from_substrait_rex(ctx, e, input.schema(), extensions)
                                     .await?;
-                            group_expr.push(x.as_ref().clone());
+                            group_expr.push(x);
                         }
                     }
                     _ => {
@@ -514,7 +514,7 @@ pub async fn from_substrait_rel(
                                     extensions,
                                 )
                                 .await?;
-                                grouping_set.push(x.as_ref().clone());
+                                grouping_set.push(x);
                             }
                             grouping_sets.push(grouping_set);
                         }
@@ -532,9 +532,7 @@ pub async fn from_substrait_rel(
                     let filter = match &m.filter {
                         Some(fil) => Some(Box::new(
                             from_substrait_rex(ctx, fil, input.schema(), extensions)
-                                .await?
-                                .as_ref()
-                                .clone(),
+                                .await?,
                         )),
                         None => None,
                     };
@@ -931,7 +929,7 @@ pub async fn from_substrait_sorts(
         };
         let (asc, nulls_first) = asc_nullfirst.unwrap();
         sorts.push(Expr::Sort(Sort {
-            expr: Box::new(expr.as_ref().clone()),
+            expr: Box::new(expr),
             asc,
             nulls_first,
         }));
@@ -949,7 +947,7 @@ pub async fn from_substrait_rex_vec(
     let mut expressions: Vec<Expr> = vec![];
     for expr in exprs {
         let expression = from_substrait_rex(ctx, expr, input_schema, extensions).await?;
-        expressions.push(expression.as_ref().clone());
+        expressions.push(expression);
     }
     Ok(expressions)
 }
@@ -969,7 +967,7 @@ pub async fn from_substrait_func_args(
             }
             _ => not_impl_err!("Function argument non-Value type not supported"),
         };
-        args.push(arg_expr?.as_ref().clone());
+        args.push(arg_expr?);
     }
     Ok(args)
 }
@@ -1028,17 +1026,15 @@ pub async fn from_substrait_rex(
     e: &Expression,
     input_schema: &DFSchema,
     extensions: &HashMap<u32, &String>,
-) -> Result<Arc<Expr>> {
+) -> Result<Expr> {
     match &e.rex_type {
         Some(RexType::SingularOrList(s)) => {
             let substrait_expr = s.value.as_ref().unwrap();
             let substrait_list = s.options.as_ref();
-            Ok(Arc::new(Expr::InList(InList {
+            Ok(Expr::InList(InList {
                 expr: Box::new(
                     from_substrait_rex(ctx, substrait_expr, input_schema, extensions)
-                        .await?
-                        .as_ref()
-                        .clone(),
+                        .await?,
                 ),
                 list: from_substrait_rex_vec(
                     ctx,
@@ -1048,11 +1044,11 @@ pub async fn from_substrait_rex(
                 )
                 .await?,
                 negated: false,
-            })))
+            }))
+        }
+        Some(RexType::Selection(field_ref)) => {
+            Ok(from_substrait_field_reference(field_ref, input_schema)?)
         }
-        Some(RexType::Selection(field_ref)) => Ok(Arc::new(
-            from_substrait_field_reference(field_ref, input_schema)?,
-        )),
         Some(RexType::IfThen(if_then)) => {
             // Parse `ifs`
             // If the first element does not have a `then` part, then we can assume it's a base expression
@@ -1069,9 +1065,7 @@ pub async fn from_substrait_rex(
                                 input_schema,
                                 extensions,
                             )
-                            .await?
-                            .as_ref()
-                            .clone(),
+                            .await?,
                         ));
                         continue;
                     }
@@ -1084,9 +1078,7 @@ pub async fn from_substrait_rex(
                             input_schema,
                             extensions,
                         )
-                        .await?
-                        .as_ref()
-                        .clone(),
+                        .await?,
                     ),
                     Box::new(
                         from_substrait_rex(
@@ -1095,27 +1087,22 @@ pub async fn from_substrait_rex(
                             input_schema,
                             extensions,
                         )
-                        .await?
-                        .as_ref()
-                        .clone(),
+                        .await?,
                     ),
                 ));
             }
             // Parse `else`
             let else_expr = match &if_then.r#else {
                 Some(e) => Some(Box::new(
-                    from_substrait_rex(ctx, e, input_schema, extensions)
-                        .await?
-                        .as_ref()
-                        .clone(),
+                    from_substrait_rex(ctx, e, input_schema, extensions).await?,
                 )),
                 None => None,
             };
-            Ok(Arc::new(Expr::Case(Case {
+            Ok(Expr::Case(Case {
                 expr,
                 when_then_expr,
                 else_expr,
-            })))
+            }))
         }
         Some(RexType::ScalarFunction(f)) => {
             let Some(fn_name) = extensions.get(&f.function_reference) else {
@@ -1133,8 +1120,9 @@ pub async fn from_substrait_rex(
             // try to first match the requested function into registered udfs, then built-in ops
             // and finally built-in expressions
             if let Some(func) = ctx.state().scalar_functions().get(fn_name) {
-                Ok(Arc::new(Expr::ScalarFunction(
-                    expr::ScalarFunction::new_udf(func.to_owned(), args),
+                Ok(Expr::ScalarFunction(expr::ScalarFunction::new_udf(
+                    func.to_owned(),
+                    args,
                 )))
             } else if let Some(op) = name_to_op(fn_name) {
                 if f.arguments.len() < 2 {
@@ -1147,17 +1135,14 @@ pub async fn from_substrait_rex(
                 // In those cases we iterate through all the arguments, applying the binary expression against them all
                 let combined_expr = args
                     .into_iter()
-                    .fold(None, |combined_expr: Option<Arc<Expr>>, arg: Expr| {
+                    .fold(None, |combined_expr: Option<Expr>, arg: Expr| {
                         Some(match combined_expr {
-                            Some(expr) => Arc::new(Expr::BinaryExpr(BinaryExpr {
-                                left: Box::new(
-                                    Arc::try_unwrap(expr)
-                                        .unwrap_or_else(|arc: Arc<Expr>| (*arc).clone()),
-                                ), // Avoid cloning if possible
+                            Some(expr) => Expr::BinaryExpr(BinaryExpr {
+                                left: Box::new(expr),
                                 op,
                                 right: Box::new(arg),
-                            })),
-                            None => Arc::new(arg),
+                            }),
+                            None => arg,
                         })
                     })
                     .unwrap();
@@ -1171,10 +1156,10 @@ pub async fn from_substrait_rex(
         }
         Some(RexType::Literal(lit)) => {
             let scalar_value = from_substrait_literal_without_names(lit)?;
-            Ok(Arc::new(Expr::Literal(scalar_value)))
+            Ok(Expr::Literal(scalar_value))
         }
         Some(RexType::Cast(cast)) => match cast.as_ref().r#type.as_ref() {
-            Some(output_type) => Ok(Arc::new(Expr::Cast(Cast::new(
+            Some(output_type) => Ok(Expr::Cast(Cast::new(
                 Box::new(
                     from_substrait_rex(
                         ctx,
@@ -1182,12 +1167,10 @@ pub async fn from_substrait_rex(
                         input_schema,
                         extensions,
                     )
-                    .await?
-                    .as_ref()
-                    .clone(),
+                    .await?,
                 ),
                 from_substrait_type_without_names(output_type)?,
-            )))),
+            ))),
             None => substrait_err!("Cast expression without output type is not allowed"),
         },
         Some(RexType::WindowFunction(window)) => {
@@ -1232,7 +1215,7 @@ pub async fn from_substrait_rex(
                         }
                     }
                 };
-            Ok(Arc::new(Expr::WindowFunction(expr::WindowFunction {
+            Ok(Expr::WindowFunction(expr::WindowFunction {
                 fun,
                 args: from_substrait_func_args(
                     ctx,
@@ -1255,7 +1238,7 @@ pub async fn from_substrait_rex(
                     from_substrait_bound(&window.upper_bound, false)?,
                 ),
                 null_treatment: None,
-            })))
+            }))
         }
         Some(RexType::Subquery(subquery)) => match &subquery.as_ref().subquery_type {
             Some(subquery_type) => match subquery_type {
@@ -1270,7 +1253,7 @@ pub async fn from_substrait_rex(
                                 from_substrait_rel(ctx, haystack_expr, extensions)
                                     .await?;
                             let outer_refs = haystack_expr.all_out_ref_exprs();
-                            Ok(Arc::new(Expr::InSubquery(InSubquery {
+                            Ok(Expr::InSubquery(InSubquery {
                                 expr: Box::new(
                                     from_substrait_rex(
                                         ctx,
@@ -1278,16 +1261,14 @@ pub async fn from_substrait_rex(
                                         input_schema,
                                         extensions,
                                     )
-                                    .await?
-                                    .as_ref()
-                                    .clone(),
+                                    .await?,
                                 ),
                                 subquery: Subquery {
                                     subquery: Arc::new(haystack_expr),
                                     outer_ref_columns: outer_refs,
                                 },
                                 negated: false,
-                            })))
+                            }))
                         } else {
                             substrait_err!("InPredicate Subquery type must have a Haystack expression")
                         }
@@ -1301,10 +1282,10 @@ pub async fn from_substrait_rex(
                     )
                     .await?;
                     let outer_ref_columns = plan.all_out_ref_exprs();
-                    Ok(Arc::new(Expr::ScalarSubquery(Subquery {
+                    Ok(Expr::ScalarSubquery(Subquery {
                         subquery: Arc::new(plan),
                         outer_ref_columns,
-                    })))
+                    }))
                 }
                 SubqueryType::SetPredicate(predicate) => {
                     match predicate.predicate_op() {
@@ -1318,13 +1299,13 @@ pub async fn from_substrait_rex(
                             )
                             .await?;
                             let outer_ref_columns = plan.all_out_ref_exprs();
-                            Ok(Arc::new(Expr::Exists(Exists::new(
+                            Ok(Expr::Exists(Exists::new(
                                 Subquery {
                                     subquery: Arc::new(plan),
                                     outer_ref_columns,
                                 },
                                 false,
-                            ))))
+                            )))
                         }
                         other_type => substrait_err!(
                             "unimplemented type {:?} for set predicate",
@@ -1337,7 +1318,7 @@ pub async fn from_substrait_rex(
                 }
             },
             None => {
-                substrait_err!("Subquery experssion without SubqueryType is not allowed")
+                substrait_err!("Subquery expression without SubqueryType is not allowed")
             }
         },
         _ => not_impl_err!("unsupported rex_type"),
@@ -2001,7 +1982,7 @@ impl BuiltinExprBuilder {
         f: &ScalarFunction,
         input_schema: &DFSchema,
         extensions: &HashMap<u32, &String>,
-    ) -> Result<Arc<Expr>> {
+    ) -> Result<Expr> {
         match self.expr_name.as_str() {
             "like" => {
                 Self::build_like_expr(ctx, false, f, input_schema, extensions).await
@@ -2026,17 +2007,15 @@ impl BuiltinExprBuilder {
         f: &ScalarFunction,
         input_schema: &DFSchema,
         extensions: &HashMap<u32, &String>,
-    ) -> Result<Arc<Expr>> {
+    ) -> Result<Expr> {
         if f.arguments.len() != 1 {
             return substrait_err!("Expect one argument for {fn_name} expr");
         }
         let Some(ArgType::Value(expr_substrait)) = &f.arguments[0].arg_type else {
             return substrait_err!("Invalid arguments type for {fn_name} expr");
         };
-        let arg = from_substrait_rex(ctx, expr_substrait, input_schema, extensions)
-            .await?
-            .as_ref()
-            .clone();
+        let arg =
+            from_substrait_rex(ctx, expr_substrait, input_schema, extensions).await?;
         let arg = Box::new(arg);
 
         let expr = match fn_name {
@@ -2053,7 +2032,7 @@ impl BuiltinExprBuilder {
             _ => return not_impl_err!("Unsupported builtin expression: {}", fn_name),
         };
 
-        Ok(Arc::new(expr))
+        Ok(expr)
     }
 
     async fn build_like_expr(
@@ -2062,7 +2041,7 @@ impl BuiltinExprBuilder {
         f: &ScalarFunction,
         input_schema: &DFSchema,
         extensions: &HashMap<u32, &String>,
-    ) -> Result<Arc<Expr>> {
+    ) -> Result<Expr> {
         let fn_name = if case_insensitive { "ILIKE" } else { "LIKE" };
         if f.arguments.len() != 2 && f.arguments.len() != 3 {
             return substrait_err!("Expect two or three arguments for `{fn_name}` expr");
@@ -2071,18 +2050,13 @@ impl BuiltinExprBuilder {
         let Some(ArgType::Value(expr_substrait)) = &f.arguments[0].arg_type else {
             return substrait_err!("Invalid arguments type for `{fn_name}` expr");
         };
-        let expr = from_substrait_rex(ctx, expr_substrait, input_schema, extensions)
-            .await?
-            .as_ref()
-            .clone();
+        let expr =
+            from_substrait_rex(ctx, expr_substrait, input_schema, extensions).await?;
         let Some(ArgType::Value(pattern_substrait)) = &f.arguments[1].arg_type else {
             return substrait_err!("Invalid arguments type for `{fn_name}` expr");
         };
         let pattern =
-            from_substrait_rex(ctx, pattern_substrait, input_schema, extensions)
-                .await?
-                .as_ref()
-                .clone();
+            from_substrait_rex(ctx, pattern_substrait, input_schema, extensions).await?;
 
         // Default case: escape character is Literal(Utf8(None))
         let escape_char = if f.arguments.len() == 3 {
@@ -2093,9 +2067,7 @@ impl BuiltinExprBuilder {
 
             let escape_char_expr =
                 from_substrait_rex(ctx, escape_char_substrait, input_schema, extensions)
-                    .await?
-                    .as_ref()
-                    .clone();
+                    .await?;
 
             match escape_char_expr {
                 Expr::Literal(ScalarValue::Utf8(escape_char_string)) => {
@@ -2112,12 +2084,12 @@ impl BuiltinExprBuilder {
             None
         };
 
-        Ok(Arc::new(Expr::Like(Like {
+        Ok(Expr::Like(Like {
             negated: false,
             expr: Box::new(expr),
             pattern: Box::new(pattern),
             escape_char,
             case_insensitive,
-        })))
+        }))
     }
 }

From cc7484e0b73fe0b36e5f76741399c95e5e7ff1c7 Mon Sep 17 00:00:00 2001
From: June <61218022+itsjunetime@users.noreply.github.com>
Date: Wed, 10 Jul 2024 13:11:48 -0600
Subject: [PATCH 018/357] Minor: Change no-statement error message to be
 clearer (#11394)

* Change no-statement error message to be clearer and add tests for said change

* Run fmt to pass CI
---
 .../core/src/execution/session_state.rs       |  2 +-
 datafusion/core/tests/sql/sql_api.rs          | 34 +++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index c123ebb22ecb2..60745076c2427 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -555,7 +555,7 @@ impl SessionState {
         }
         let statement = statements.pop_front().ok_or_else(|| {
             DataFusionError::NotImplemented(
-                "The context requires a statement!".to_string(),
+                "No SQL statements were provided in the query string".to_string(),
             )
         })?;
         Ok(statement)
diff --git a/datafusion/core/tests/sql/sql_api.rs b/datafusion/core/tests/sql/sql_api.rs
index 4a6424fc24b62..e7c40d2c8aa88 100644
--- a/datafusion/core/tests/sql/sql_api.rs
+++ b/datafusion/core/tests/sql/sql_api.rs
@@ -113,6 +113,40 @@ async fn unsupported_statement_returns_error() {
     ctx.sql_with_options(sql, options).await.unwrap();
 }
 
+#[tokio::test]
+async fn empty_statement_returns_error() {
+    let ctx = SessionContext::new();
+    ctx.sql("CREATE TABLE test (x int)").await.unwrap();
+
+    let state = ctx.state();
+
+    // Give it an empty string which contains no statements
+    let plan_res = state.create_logical_plan("").await;
+    assert_eq!(
+        plan_res.unwrap_err().strip_backtrace(),
+        "This feature is not implemented: No SQL statements were provided in the query string"
+    );
+}
+
+#[tokio::test]
+async fn multiple_statements_returns_error() {
+    let ctx = SessionContext::new();
+    ctx.sql("CREATE TABLE test (x int)").await.unwrap();
+
+    let state = ctx.state();
+
+    // Give it a string that contains multiple statements
+    let plan_res = state
+        .create_logical_plan(
+            "INSERT INTO test (x) VALUES (1); INSERT INTO test (x) VALUES (2)",
+        )
+        .await;
+    assert_eq!(
+        plan_res.unwrap_err().strip_backtrace(),
+        "This feature is not implemented: The context currently only supports a single SQL statement"
+    );
+}
+
 #[tokio::test]
 async fn ddl_can_not_be_planned_by_session_state() {
     let ctx = SessionContext::new();

From d3f63728d222cc5cf30cf03a12ec9a0b41399b18 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Thu, 11 Jul 2024 07:32:03 +0800
Subject: [PATCH 019/357] Change `array_agg` to return `null` on no input
 rather than empty list (#11299)

* change array agg semantic for empty result

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* return null

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix order sensitive

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add more test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix null

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix multi-phase case

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix clone

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/common/src/scalar/mod.rs           |  10 ++
 datafusion/core/tests/dataframe/mod.rs        |   2 +-
 datafusion/core/tests/sql/aggregates.rs       |   2 +-
 datafusion/expr/src/aggregate_function.rs     |   2 +-
 .../physical-expr/src/aggregate/array_agg.rs  |  17 +-
 .../src/aggregate/array_agg_distinct.rs       |  11 +-
 .../src/aggregate/array_agg_ordered.rs        |  12 +-
 .../physical-expr/src/aggregate/build_in.rs   |   4 +-
 .../sqllogictest/test_files/aggregate.slt     | 155 +++++++++++++-----
 9 files changed, 161 insertions(+), 54 deletions(-)

diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index c8f21788cbbdf..6c03e8698e80b 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -1984,6 +1984,16 @@ impl ScalarValue {
         Self::new_list(values, data_type, true)
     }
 
+    /// Create ListArray with Null with specific data type
+    ///
+    /// - new_null_list(i32, nullable, 1): `ListArray[NULL]`
+    pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
+        let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
+        Self::List(Arc::new(ListArray::from(ArrayData::new_null(
+            &data_type, null_len,
+        ))))
+    }
+
     /// Converts `IntoIterator<Item = ScalarValue>` where each element has type corresponding to
     /// `data_type`, to a [`ListArray`].
     ///
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index 2d1904d9e1667..f1d57c44293be 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -1388,7 +1388,7 @@ async fn unnest_with_redundant_columns() -> Result<()> {
     let expected = vec![
         "Projection: shapes.shape_id [shape_id:UInt32]",
         "  Unnest: lists[shape_id2] structs[] [shape_id:UInt32, shape_id2:UInt32;N]",
-        "    Aggregate: groupBy=[[shapes.shape_id]], aggr=[[ARRAY_AGG(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { name: \"item\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} })]",
+        "    Aggregate: groupBy=[[shapes.shape_id]], aggr=[[ARRAY_AGG(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { name: \"item\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} });N]",
         "      TableScan: shapes projection=[shape_id] [shape_id:UInt32]",
     ];
 
diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
index e503b74992c3f..86032dc9bc963 100644
--- a/datafusion/core/tests/sql/aggregates.rs
+++ b/datafusion/core/tests/sql/aggregates.rs
@@ -37,7 +37,7 @@ async fn csv_query_array_agg_distinct() -> Result<()> {
         Schema::new(vec![Field::new_list(
             "ARRAY_AGG(DISTINCT aggregate_test_100.c2)",
             Field::new("item", DataType::UInt32, false),
-            false
+            true
         ),])
     );
 
diff --git a/datafusion/expr/src/aggregate_function.rs b/datafusion/expr/src/aggregate_function.rs
index 23e98714dfa4c..3cae78eaed9b6 100644
--- a/datafusion/expr/src/aggregate_function.rs
+++ b/datafusion/expr/src/aggregate_function.rs
@@ -118,7 +118,7 @@ impl AggregateFunction {
     pub fn nullable(&self) -> Result<bool> {
         match self {
             AggregateFunction::Max | AggregateFunction::Min => Ok(true),
-            AggregateFunction::ArrayAgg => Ok(false),
+            AggregateFunction::ArrayAgg => Ok(true),
         }
     }
 }
diff --git a/datafusion/physical-expr/src/aggregate/array_agg.rs b/datafusion/physical-expr/src/aggregate/array_agg.rs
index 634a0a0179037..38a9738029335 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg.rs
@@ -71,7 +71,7 @@ impl AggregateExpr for ArrayAgg {
             &self.name,
             // This should be the same as return type of AggregateFunction::ArrayAgg
             Field::new("item", self.input_data_type.clone(), self.nullable),
-            false,
+            true,
         ))
     }
 
@@ -86,7 +86,7 @@ impl AggregateExpr for ArrayAgg {
         Ok(vec![Field::new_list(
             format_state_name(&self.name, "array_agg"),
             Field::new("item", self.input_data_type.clone(), self.nullable),
-            false,
+            true,
         )])
     }
 
@@ -137,8 +137,11 @@ impl Accumulator for ArrayAggAccumulator {
             return Ok(());
         }
         assert!(values.len() == 1, "array_agg can only take 1 param!");
+
         let val = Arc::clone(&values[0]);
-        self.values.push(val);
+        if val.len() > 0 {
+            self.values.push(val);
+        }
         Ok(())
     }
 
@@ -162,13 +165,15 @@ impl Accumulator for ArrayAggAccumulator {
 
     fn evaluate(&mut self) -> Result<ScalarValue> {
         // Transform Vec<ListArr> to ListArr
-
         let element_arrays: Vec<&dyn Array> =
             self.values.iter().map(|a| a.as_ref()).collect();
 
         if element_arrays.is_empty() {
-            let arr = ScalarValue::new_list(&[], &self.datatype, self.nullable);
-            return Ok(ScalarValue::List(arr));
+            return Ok(ScalarValue::new_null_list(
+                self.datatype.clone(),
+                self.nullable,
+                1,
+            ));
         }
 
         let concated_array = arrow::compute::concat(&element_arrays)?;
diff --git a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
index a59d85e84a203..368d11d7421ab 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
@@ -75,7 +75,7 @@ impl AggregateExpr for DistinctArrayAgg {
             &self.name,
             // This should be the same as return type of AggregateFunction::ArrayAgg
             Field::new("item", self.input_data_type.clone(), self.nullable),
-            false,
+            true,
         ))
     }
 
@@ -90,7 +90,7 @@ impl AggregateExpr for DistinctArrayAgg {
         Ok(vec![Field::new_list(
             format_state_name(&self.name, "distinct_array_agg"),
             Field::new("item", self.input_data_type.clone(), self.nullable),
-            false,
+            true,
         )])
     }
 
@@ -165,6 +165,13 @@ impl Accumulator for DistinctArrayAggAccumulator {
 
     fn evaluate(&mut self) -> Result<ScalarValue> {
         let values: Vec<ScalarValue> = self.values.iter().cloned().collect();
+        if values.is_empty() {
+            return Ok(ScalarValue::new_null_list(
+                self.datatype.clone(),
+                self.nullable,
+                1,
+            ));
+        }
         let arr = ScalarValue::new_list(&values, &self.datatype, self.nullable);
         Ok(ScalarValue::List(arr))
     }
diff --git a/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs b/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
index a64d97637c3bf..d44811192f667 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
@@ -92,7 +92,7 @@ impl AggregateExpr for OrderSensitiveArrayAgg {
             &self.name,
             // This should be the same as return type of AggregateFunction::ArrayAgg
             Field::new("item", self.input_data_type.clone(), self.nullable),
-            false,
+            true,
         ))
     }
 
@@ -111,7 +111,7 @@ impl AggregateExpr for OrderSensitiveArrayAgg {
         let mut fields = vec![Field::new_list(
             format_state_name(&self.name, "array_agg"),
             Field::new("item", self.input_data_type.clone(), self.nullable),
-            false, // This should be the same as field()
+            true, // This should be the same as field()
         )];
         let orderings = ordering_fields(&self.ordering_req, &self.order_by_data_types);
         fields.push(Field::new_list(
@@ -309,6 +309,14 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
     }
 
     fn evaluate(&mut self) -> Result<ScalarValue> {
+        if self.values.is_empty() {
+            return Ok(ScalarValue::new_null_list(
+                self.datatypes[0].clone(),
+                self.nullable,
+                1,
+            ));
+        }
+
         let values = self.values.clone();
         let array = if self.reverse {
             ScalarValue::new_list_from_iter(
diff --git a/datafusion/physical-expr/src/aggregate/build_in.rs b/datafusion/physical-expr/src/aggregate/build_in.rs
index d4cd3d51d1744..68c9b4859f1f8 100644
--- a/datafusion/physical-expr/src/aggregate/build_in.rs
+++ b/datafusion/physical-expr/src/aggregate/build_in.rs
@@ -147,7 +147,7 @@ mod tests {
                         Field::new_list(
                             "c1",
                             Field::new("item", data_type.clone(), true),
-                            false,
+                            true,
                         ),
                         result_agg_phy_exprs.field().unwrap()
                     );
@@ -167,7 +167,7 @@ mod tests {
                         Field::new_list(
                             "c1",
                             Field::new("item", data_type.clone(), true),
-                            false,
+                            true,
                         ),
                         result_agg_phy_exprs.field().unwrap()
                     );
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index e891093c81560..7dd1ea82b3275 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -1694,7 +1694,7 @@ SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 ORDER BY c13 LIMIT
 query ?
 SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 LIMIT 0) test
 ----
-[]
+NULL
 
 # csv_query_array_agg_one
 query ?
@@ -1753,31 +1753,12 @@ NULL 4 29 1.260869565217 123 -117 23
 NULL 5 -194 -13.857142857143 118 -101 14
 NULL NULL 781 7.81 125 -117 100
 
-# TODO: array_agg_distinct output is non-deterministic -- rewrite with array_sort(list_sort)
-#       unnest is also not available, so manually unnesting via CROSS JOIN
-# additional count(1) forces array_agg_distinct instead of array_agg over aggregated by c2 data
-#
+# select with count to forces array_agg_distinct function, since single distinct expression is converted to group by by optimizer
 # csv_query_array_agg_distinct
-query III
-WITH indices AS (
-  SELECT 1 AS idx UNION ALL
-  SELECT 2 AS idx UNION ALL
-  SELECT 3 AS idx UNION ALL
-  SELECT 4 AS idx UNION ALL
-  SELECT 5 AS idx
-)
-SELECT data.arr[indices.idx] as element, array_length(data.arr) as array_len, dummy
-FROM (
-  SELECT array_agg(distinct c2) as arr, count(1) as dummy FROM aggregate_test_100
-) data
-  CROSS JOIN indices
-ORDER BY 1
-----
-1 5 100
-2 5 100
-3 5 100
-4 5 100
-5 5 100
+query ?I
+SELECT array_sort(array_agg(distinct c2)), count(1) FROM aggregate_test_100
+----
+[1, 2, 3, 4, 5] 100
 
 # aggregate_time_min_and_max
 query TT
@@ -2732,6 +2713,16 @@ SELECT COUNT(DISTINCT c1) FROM test
 
 # TODO: aggregate_with_alias
 
+# test_approx_percentile_cont_decimal_support
+query TI
+SELECT c1, approx_percentile_cont(c2, cast(0.85 as decimal(10,2))) apc FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
+----
+a 4
+b 5
+c 4
+d 4
+e 4
+
 # array_agg_zero
 query ?
 SELECT ARRAY_AGG([])
@@ -2744,28 +2735,114 @@ SELECT ARRAY_AGG([1])
 ----
 [[1]]
 
-# test_approx_percentile_cont_decimal_support
-query TI
-SELECT c1, approx_percentile_cont(c2, cast(0.85 as decimal(10,2))) apc FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
+# test array_agg with no row qualified
+statement ok
+create table t(a int, b float, c bigint) as values (1, 1.2, 2);
+
+# returns NULL, follows DuckDB's behaviour
+query ?
+select array_agg(a) from t where a > 2;
 ----
-a 4
-b 5
-c 4
-d 4
-e 4
+NULL
 
+query ?
+select array_agg(b) from t where b > 3.1;
+----
+NULL
 
-# array_agg_zero
 query ?
-SELECT ARRAY_AGG([]);
+select array_agg(c) from t where c > 3;
 ----
-[[]]
+NULL
 
-# array_agg_one
+query ?I
+select array_agg(c), count(1) from t where c > 3;
+----
+NULL 0
+
+# returns 0 rows if group by is applied, follows DuckDB's behaviour
 query ?
-SELECT ARRAY_AGG([1]);
+select array_agg(a) from t where a > 3 group by a;
 ----
-[[1]]
+
+query ?I
+select array_agg(a), count(1) from t where a > 3 group by a;
+----
+
+# returns NULL, follows DuckDB's behaviour
+query ?
+select array_agg(distinct a) from t where a > 3;
+----
+NULL
+
+query ?I
+select array_agg(distinct a), count(1) from t where a > 3;
+----
+NULL 0
+
+# returns 0 rows if group by is applied, follows DuckDB's behaviour
+query ?
+select array_agg(distinct a) from t where a > 3 group by a;
+----
+
+query ?I
+select array_agg(distinct a), count(1) from t where a > 3 group by a;
+----
+
+# test order sensitive array agg
+query ?
+select array_agg(a order by a) from t where a > 3;
+----
+NULL
+
+query ?
+select array_agg(a order by a) from t where a > 3 group by a;
+----
+
+query ?I
+select array_agg(a order by a), count(1) from t where a > 3 group by a;
+----
+
+statement ok
+drop table t;
+
+# test with no values
+statement ok
+create table t(a int, b float, c bigint);
+
+query ?
+select array_agg(a) from t;
+----
+NULL
+
+query ?
+select array_agg(b) from t;
+----
+NULL
+
+query ?
+select array_agg(c) from t;
+----
+NULL
+
+query ?I
+select array_agg(distinct a), count(1) from t;
+----
+NULL 0
+
+query ?I
+select array_agg(distinct b), count(1) from t;
+----
+NULL 0
+
+query ?I
+select array_agg(distinct b), count(1) from t;
+----
+NULL 0
+
+statement ok
+drop table t;
+
 
 # array_agg_i32
 statement ok

From 7a23ea9bce32dc8ae195caa8ca052673031c06c9 Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Thu, 11 Jul 2024 09:38:15 +0800
Subject: [PATCH 020/357] Minor: return "not supported" for `COUNT DISTINCT`
 with multiple arguments (#11391)

* Minor: return "not supported" for COUNT DISTINCT with multiple arguments

* update condition
---
 datafusion/functions-aggregate/src/count.rs      | 6 +++++-
 datafusion/sqllogictest/test_files/aggregate.slt | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index bd0155df0271b..0a667d35dce5e 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -37,7 +37,7 @@ use arrow::{
     buffer::BooleanBuffer,
 };
 use datafusion_common::{
-    downcast_value, internal_err, DataFusionError, Result, ScalarValue,
+    downcast_value, internal_err, not_impl_err, DataFusionError, Result, ScalarValue,
 };
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::{
@@ -138,6 +138,10 @@ impl AggregateUDFImpl for Count {
             return Ok(Box::new(CountAccumulator::new()));
         }
 
+        if acc_args.input_exprs.len() > 1 {
+            return not_impl_err!("COUNT DISTINCT with multiple arguments");
+        }
+
         let data_type = acc_args.input_type;
         Ok(match data_type {
             // try and use a specialized accumulator if possible, otherwise fall back to generic accumulator
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 7dd1ea82b3275..6fafc0a74110c 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -2019,6 +2019,10 @@ SELECT count(c1, c2) FROM test
 ----
 3
 
+# count(distinct) with multiple arguments
+query error DataFusion error: This feature is not implemented: COUNT DISTINCT with multiple arguments
+SELECT count(distinct c1, c2) FROM test
+
 # count_null
 query III
 SELECT count(null), count(null, null), count(distinct null) FROM test

From 2413155a3ed808285e31421a8b6aac23b8abdb91 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Thu, 11 Jul 2024 08:56:47 -0600
Subject: [PATCH 021/357] feat: Add `fail_on_overflow` option to `BinaryExpr`
 (#11400)

* update tests

* update tests

* add rustdoc

* update PartialEq impl

* fix

* address feedback about improving api
---
 datafusion/core/src/physical_planner.rs       |   4 +-
 .../physical-expr/src/expressions/binary.rs   | 126 +++++++++++++++++-
 2 files changed, 121 insertions(+), 9 deletions(-)

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 6aad4d5755320..d2bc334ec3248 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -2312,7 +2312,7 @@ mod tests {
         // verify that the plan correctly casts u8 to i64
         // the cast from u8 to i64 for literal will be simplified, and get lit(int64(5))
         // the cast here is implicit so has CastOptions with safe=true
-        let expected = "BinaryExpr { left: Column { name: \"c7\", index: 2 }, op: Lt, right: Literal { value: Int64(5) } }";
+        let expected = "BinaryExpr { left: Column { name: \"c7\", index: 2 }, op: Lt, right: Literal { value: Int64(5) }, fail_on_overflow: false }";
         assert!(format!("{exec_plan:?}").contains(expected));
         Ok(())
     }
@@ -2551,7 +2551,7 @@ mod tests {
         let execution_plan = plan(&logical_plan).await?;
         // verify that the plan correctly adds cast from Int64(1) to Utf8, and the const will be evaluated.
 
-        let expected = "expr: [(BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\") } }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\") } } }";
+        let expected = "expr: [(BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\") }, fail_on_overflow: false }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\") }, fail_on_overflow: false }, fail_on_overflow: false }";
 
         let actual = format!("{execution_plan:?}");
         assert!(actual.contains(expected), "{}", actual);
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index c153ead9639fe..c34dcdfb75988 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -53,6 +53,8 @@ pub struct BinaryExpr {
     left: Arc<dyn PhysicalExpr>,
     op: Operator,
     right: Arc<dyn PhysicalExpr>,
+    /// Specifies whether an error is returned on overflow or not
+    fail_on_overflow: bool,
 }
 
 impl BinaryExpr {
@@ -62,7 +64,22 @@ impl BinaryExpr {
         op: Operator,
         right: Arc<dyn PhysicalExpr>,
     ) -> Self {
-        Self { left, op, right }
+        Self {
+            left,
+            op,
+            right,
+            fail_on_overflow: false,
+        }
+    }
+
+    /// Create new binary expression with explicit fail_on_overflow value
+    pub fn with_fail_on_overflow(self, fail_on_overflow: bool) -> Self {
+        Self {
+            left: self.left,
+            op: self.op,
+            right: self.right,
+            fail_on_overflow,
+        }
     }
 
     /// Get the left side of the binary expression
@@ -273,8 +290,11 @@ impl PhysicalExpr for BinaryExpr {
         }
 
         match self.op {
+            Operator::Plus if self.fail_on_overflow => return apply(&lhs, &rhs, add),
             Operator::Plus => return apply(&lhs, &rhs, add_wrapping),
+            Operator::Minus if self.fail_on_overflow => return apply(&lhs, &rhs, sub),
             Operator::Minus => return apply(&lhs, &rhs, sub_wrapping),
+            Operator::Multiply if self.fail_on_overflow => return apply(&lhs, &rhs, mul),
             Operator::Multiply => return apply(&lhs, &rhs, mul_wrapping),
             Operator::Divide => return apply(&lhs, &rhs, div),
             Operator::Modulo => return apply(&lhs, &rhs, rem),
@@ -327,11 +347,10 @@ impl PhysicalExpr for BinaryExpr {
         self: Arc<Self>,
         children: Vec<Arc<dyn PhysicalExpr>>,
     ) -> Result<Arc<dyn PhysicalExpr>> {
-        Ok(Arc::new(BinaryExpr::new(
-            Arc::clone(&children[0]),
-            self.op,
-            Arc::clone(&children[1]),
-        )))
+        Ok(Arc::new(
+            BinaryExpr::new(Arc::clone(&children[0]), self.op, Arc::clone(&children[1]))
+                .with_fail_on_overflow(self.fail_on_overflow),
+        ))
     }
 
     fn evaluate_bounds(&self, children: &[&Interval]) -> Result<Interval> {
@@ -496,7 +515,12 @@ impl PartialEq<dyn Any> for BinaryExpr {
     fn eq(&self, other: &dyn Any) -> bool {
         down_cast_any_ref(other)
             .downcast_ref::<Self>()
-            .map(|x| self.left.eq(&x.left) && self.op == x.op && self.right.eq(&x.right))
+            .map(|x| {
+                self.left.eq(&x.left)
+                    && self.op == x.op
+                    && self.right.eq(&x.right)
+                    && self.fail_on_overflow.eq(&x.fail_on_overflow)
+            })
             .unwrap_or(false)
     }
 }
@@ -661,6 +685,7 @@ mod tests {
 
     use datafusion_common::plan_datafusion_err;
     use datafusion_expr::type_coercion::binary::get_input_types;
+    use datafusion_physical_expr_common::expressions::column::Column;
 
     /// Performs a binary operation, applying any type coercion necessary
     fn binary_op(
@@ -4008,4 +4033,91 @@ mod tests {
         .unwrap();
         assert_eq!(&casted, &dictionary);
     }
+
+    #[test]
+    fn test_add_with_overflow() -> Result<()> {
+        // create test data
+        let l = Arc::new(Int32Array::from(vec![1, i32::MAX]));
+        let r = Arc::new(Int32Array::from(vec![2, 1]));
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("l", DataType::Int32, false),
+            Field::new("r", DataType::Int32, false),
+        ]));
+        let batch = RecordBatch::try_new(schema, vec![l, r])?;
+
+        // create expression
+        let expr = BinaryExpr::new(
+            Arc::new(Column::new("l", 0)),
+            Operator::Plus,
+            Arc::new(Column::new("r", 1)),
+        )
+        .with_fail_on_overflow(true);
+
+        // evaluate expression
+        let result = expr.evaluate(&batch);
+        assert!(result
+            .err()
+            .unwrap()
+            .to_string()
+            .contains("Overflow happened on: 2147483647 + 1"));
+        Ok(())
+    }
+
+    #[test]
+    fn test_subtract_with_overflow() -> Result<()> {
+        // create test data
+        let l = Arc::new(Int32Array::from(vec![1, i32::MIN]));
+        let r = Arc::new(Int32Array::from(vec![2, 1]));
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("l", DataType::Int32, false),
+            Field::new("r", DataType::Int32, false),
+        ]));
+        let batch = RecordBatch::try_new(schema, vec![l, r])?;
+
+        // create expression
+        let expr = BinaryExpr::new(
+            Arc::new(Column::new("l", 0)),
+            Operator::Minus,
+            Arc::new(Column::new("r", 1)),
+        )
+        .with_fail_on_overflow(true);
+
+        // evaluate expression
+        let result = expr.evaluate(&batch);
+        assert!(result
+            .err()
+            .unwrap()
+            .to_string()
+            .contains("Overflow happened on: -2147483648 - 1"));
+        Ok(())
+    }
+
+    #[test]
+    fn test_mul_with_overflow() -> Result<()> {
+        // create test data
+        let l = Arc::new(Int32Array::from(vec![1, i32::MAX]));
+        let r = Arc::new(Int32Array::from(vec![2, 2]));
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("l", DataType::Int32, false),
+            Field::new("r", DataType::Int32, false),
+        ]));
+        let batch = RecordBatch::try_new(schema, vec![l, r])?;
+
+        // create expression
+        let expr = BinaryExpr::new(
+            Arc::new(Column::new("l", 0)),
+            Operator::Multiply,
+            Arc::new(Column::new("r", 1)),
+        )
+        .with_fail_on_overflow(true);
+
+        // evaluate expression
+        let result = expr.evaluate(&batch);
+        assert!(result
+            .err()
+            .unwrap()
+            .to_string()
+            .contains("Overflow happened on: 2147483647 * 2"));
+        Ok(())
+    }
 }

From ed65c11065f74d72995619450d5325234aba0b5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Thu, 11 Jul 2024 22:58:20 +0800
Subject: [PATCH 022/357] Enable clone_on_ref_ptr clippy lint on sql (#11380)

---
 datafusion/sql/examples/sql.rs     | 2 +-
 datafusion/sql/src/cte.rs          | 2 +-
 datafusion/sql/src/expr/mod.rs     | 2 +-
 datafusion/sql/src/lib.rs          | 2 ++
 datafusion/sql/src/statement.rs    | 4 ++--
 datafusion/sql/tests/common/mod.rs | 2 +-
 6 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/datafusion/sql/examples/sql.rs b/datafusion/sql/examples/sql.rs
index aee4cf5a38ed3..1b92a7e116b16 100644
--- a/datafusion/sql/examples/sql.rs
+++ b/datafusion/sql/examples/sql.rs
@@ -119,7 +119,7 @@ fn create_table_source(fields: Vec<Field>) -> Arc<dyn TableSource> {
 impl ContextProvider for MyContextProvider {
     fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
         match self.tables.get(name.table()) {
-            Some(table) => Ok(table.clone()),
+            Some(table) => Ok(Arc::clone(table)),
             _ => plan_err!("Table not found: {}", name.table()),
         }
     }
diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs
index 0035dcda6ed7d..3dfe00e3c5e0b 100644
--- a/datafusion/sql/src/cte.rs
+++ b/datafusion/sql/src/cte.rs
@@ -144,7 +144,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // as the input to the recursive term
         let work_table_plan = LogicalPlanBuilder::scan(
             cte_name.to_string(),
-            work_table_source.clone(),
+            Arc::clone(&work_table_source),
             None,
         )?
         .build()?;
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 0546a101fcb25..859842e212be7 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -981,7 +981,7 @@ mod tests {
     impl ContextProvider for TestContextProvider {
         fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
             match self.tables.get(name.table()) {
-                Some(table) => Ok(table.clone()),
+                Some(table) => Ok(Arc::clone(table)),
                 _ => plan_err!("Table not found: {}", name.table()),
             }
         }
diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs
index 1040cc61c702b..eb5fec7a3c8bb 100644
--- a/datafusion/sql/src/lib.rs
+++ b/datafusion/sql/src/lib.rs
@@ -14,6 +14,8 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
 //! This module provides:
 //!
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 6cdb2f959cd88..1acfac79acc0b 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -870,12 +870,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     self.context_provider.get_table_source(table_ref.clone())?;
                 let plan =
                     LogicalPlanBuilder::scan(table_name, table_source, None)?.build()?;
-                let input_schema = plan.schema().clone();
+                let input_schema = Arc::clone(plan.schema());
                 (plan, input_schema, Some(table_ref))
             }
             CopyToSource::Query(query) => {
                 let plan = self.query_to_plan(query, &mut PlannerContext::new())?;
-                let input_schema = plan.schema().clone();
+                let input_schema = Arc::clone(plan.schema());
                 (plan, input_schema, None)
             }
         };
diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs
index f5caaefb3ea08..b8d8bd12d28bb 100644
--- a/datafusion/sql/tests/common/mod.rs
+++ b/datafusion/sql/tests/common/mod.rs
@@ -258,6 +258,6 @@ impl TableSource for EmptyTable {
     }
 
     fn schema(&self) -> SchemaRef {
-        self.table_schema.clone()
+        Arc::clone(&self.table_schema)
     }
 }

From 0b2eb50c0f980562a6c009f541c4dbd5831b5fe1 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 11 Jul 2024 10:58:53 -0400
Subject: [PATCH 023/357] Move configuration information out of example usage
 page (#11300)

---
 datafusion/core/src/lib.rs                    |   6 +
 docs/source/index.rst                         |   8 +-
 docs/source/library-user-guide/index.md       |  21 ++-
 docs/source/user-guide/crate-configuration.md | 146 ++++++++++++++++++
 docs/source/user-guide/example-usage.md       | 129 ----------------
 5 files changed, 177 insertions(+), 133 deletions(-)
 create mode 100644 docs/source/user-guide/crate-configuration.md

diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index f5805bc069825..63dbe824c2314 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -620,6 +620,12 @@ doc_comment::doctest!(
     user_guide_example_usage
 );
 
+#[cfg(doctest)]
+doc_comment::doctest!(
+    "../../../docs/source/user-guide/crate-configuration.md",
+    user_guide_crate_configuration
+);
+
 #[cfg(doctest)]
 doc_comment::doctest!(
     "../../../docs/source/user-guide/configs.md",
diff --git a/docs/source/index.rst b/docs/source/index.rst
index d491df04f7fe7..8fbff208f5617 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -41,13 +41,16 @@ DataFusion offers SQL and Dataframe APIs, excellent
 CSV, Parquet, JSON, and Avro, extensive customization, and a great
 community.
 
-To get started with examples, see the `example usage`_ section of the user guide and the `datafusion-examples`_ directory.
+To get started, see
 
-See the `developer’s guide`_ for contributing and `communication`_ for getting in touch with us.
+* The `example usage`_ section of the user guide and the `datafusion-examples`_ directory.
+* The `library user guide`_ for examples of using DataFusion's extension APIs
+* The `developer’s guide`_ for contributing and `communication`_ for getting in touch with us.
 
 .. _example usage: user-guide/example-usage.html
 .. _datafusion-examples: https://github.com/apache/datafusion/tree/main/datafusion-examples
 .. _developer’s guide: contributor-guide/index.html#developer-s-guide
+.. _library user guide: library-user-guide/index.html
 .. _communication: contributor-guide/communication.html
 
 .. _toc.asf-links:
@@ -80,6 +83,7 @@ See the `developer’s guide`_ for contributing and `communication`_ for getting
 
    user-guide/introduction
    user-guide/example-usage
+   user-guide/crate-configuration
    user-guide/cli/index
    user-guide/dataframe
    user-guide/expressions
diff --git a/docs/source/library-user-guide/index.md b/docs/source/library-user-guide/index.md
index 47257e0c926e7..fd126a1120edf 100644
--- a/docs/source/library-user-guide/index.md
+++ b/docs/source/library-user-guide/index.md
@@ -19,8 +19,25 @@
 
 # Introduction
 
-The library user guide explains how to use the DataFusion library as a dependency in your Rust project. Please check out the user-guide for more details on how to use DataFusion's SQL and DataFrame APIs, or the contributor guide for details on how to contribute to DataFusion.
+The library user guide explains how to use the DataFusion library as a
+dependency in your Rust project and customize its behavior using its extension APIs.
 
-If you haven't reviewed the [architecture section in the docs][docs], it's a useful place to get the lay of the land before starting down a specific path.
+Please check out the [user guide] for getting started using
+DataFusion's SQL and DataFrame APIs, or the [contributor guide]
+for details on how to contribute to DataFusion.
 
+If you haven't reviewed the [architecture section in the docs][docs], it's a
+useful place to get the lay of the land before starting down a specific path.
+
+DataFusion is designed to be extensible at all points, including
+
+- [x] User Defined Functions (UDFs)
+- [x] User Defined Aggregate Functions (UDAFs)
+- [x] User Defined Table Source (`TableProvider`) for tables
+- [x] User Defined `Optimizer` passes (plan rewrites)
+- [x] User Defined `LogicalPlan` nodes
+- [x] User Defined `ExecutionPlan` nodes
+
+[user guide]: ../user-guide/example-usage.md
+[contributor guide]: ../contributor-guide/index.md
 [docs]: https://docs.rs/datafusion/latest/datafusion/#architecture
diff --git a/docs/source/user-guide/crate-configuration.md b/docs/source/user-guide/crate-configuration.md
new file mode 100644
index 0000000000000..0587d06a39191
--- /dev/null
+++ b/docs/source/user-guide/crate-configuration.md
@@ -0,0 +1,146 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Crate Configuration
+
+This section contains information on how to configure DataFusion in your Rust
+project. See the [Configuration Settings] section for a list of options that
+control DataFusion's behavior.
+
+[configuration settings]: configs.md
+
+## Add latest non published DataFusion dependency
+
+DataFusion changes are published to `crates.io` according to the [release schedule](https://github.com/apache/datafusion/blob/main/dev/release/README.md#release-process)
+
+If you would like to test out DataFusion changes which are merged but not yet
+published, Cargo supports adding dependency directly to GitHub branch:
+
+```toml
+datafusion = { git = "https://github.com/apache/datafusion", branch = "main"}
+```
+
+Also it works on the package level
+
+```toml
+datafusion-common = { git = "https://github.com/apache/datafusion", branch = "main", package = "datafusion-common"}
+```
+
+And with features
+
+```toml
+datafusion = { git = "https://github.com/apache/datafusion", branch = "main", default-features = false, features = ["unicode_expressions"] }
+```
+
+More on [Cargo dependencies](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#specifying-dependencies)
+
+## Optimized Configuration
+
+For an optimized build several steps are required. First, use the below in your `Cargo.toml`. It is
+worth noting that using the settings in the `[profile.release]` section will significantly increase the build time.
+
+```toml
+[dependencies]
+datafusion = { version = "22.0" }
+tokio = { version = "^1.0", features = ["rt-multi-thread"] }
+snmalloc-rs = "0.3"
+
+[profile.release]
+lto = true
+codegen-units = 1
+```
+
+Then, in `main.rs.` update the memory allocator with the below after your imports:
+
+```rust ,ignore
+use datafusion::prelude::*;
+
+#[global_allocator]
+static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
+
+#[tokio::main]
+async fn main() -> datafusion::error::Result<()> {
+  Ok(())
+}
+```
+
+Based on the instruction set architecture you are building on you will want to configure the `target-cpu` as well, ideally
+with `native` or at least `avx2`.
+
+```shell
+RUSTFLAGS='-C target-cpu=native' cargo run --release
+```
+
+## Enable backtraces
+
+By default Datafusion returns errors as a plain message. There is option to enable more verbose details about the error,
+like error backtrace. To enable a backtrace you need to add Datafusion `backtrace` feature to your `Cargo.toml` file:
+
+```toml
+datafusion = { version = "31.0.0", features = ["backtrace"]}
+```
+
+Set environment [variables](https://doc.rust-lang.org/std/backtrace/index.html#environment-variables)
+
+```bash
+RUST_BACKTRACE=1 ./target/debug/datafusion-cli
+DataFusion CLI v31.0.0
+> select row_numer() over (partition by a order by a) from (select 1 a);
+Error during planning: Invalid function 'row_numer'.
+Did you mean 'ROW_NUMBER'?
+
+backtrace:    0: std::backtrace_rs::backtrace::libunwind::trace
+             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5
+   1: std::backtrace_rs::backtrace::trace_unsynchronized
+             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
+   2: std::backtrace::Backtrace::create
+             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/backtrace.rs:332:13
+   3: std::backtrace::Backtrace::capture
+             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/backtrace.rs:298:9
+   4: datafusion_common::error::DataFusionError::get_back_trace
+             at /datafusion/datafusion/common/src/error.rs:436:30
+   5: datafusion_sql::expr::function::<impl datafusion_sql::planner::SqlToRel<S>>::sql_function_to_expr
+   ............
+```
+
+The backtraces are useful when debugging code. If there is a test in `datafusion/core/src/physical_planner.rs`
+
+```
+#[tokio::test]
+async fn test_get_backtrace_for_failed_code() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let sql = "
+    select row_numer() over (partition by a order by a) from (select 1 a);
+    ";
+
+    let _ = ctx.sql(sql).await?.collect().await?;
+
+    Ok(())
+}
+```
+
+To obtain a backtrace:
+
+```bash
+cargo build --features=backtrace
+RUST_BACKTRACE=1 cargo test --features=backtrace --package datafusion --lib -- physical_planner::tests::test_get_backtrace_for_failed_code --exact --nocapture
+```
+
+Note: The backtrace wrapped into systems calls, so some steps on top of the backtrace can be ignored
diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md
index 7dbd4045e75bd..813dbb1bc02ae 100644
--- a/docs/source/user-guide/example-usage.md
+++ b/docs/source/user-guide/example-usage.md
@@ -33,29 +33,6 @@ datafusion = "latest_version"
 tokio = { version = "1.0", features = ["rt-multi-thread"] }
 ```
 
-## Add latest non published DataFusion dependency
-
-DataFusion changes are published to `crates.io` according to [release schedule](https://github.com/apache/datafusion/blob/main/dev/release/README.md#release-process)
-In case if it is required to test out DataFusion changes which are merged but yet to be published, Cargo supports adding dependency directly to GitHub branch
-
-```toml
-datafusion = { git = "https://github.com/apache/datafusion", branch = "main"}
-```
-
-Also it works on the package level
-
-```toml
-datafusion-common = { git = "https://github.com/apache/datafusion", branch = "main", package = "datafusion-common"}
-```
-
-And with features
-
-```toml
-datafusion = { git = "https://github.com/apache/datafusion", branch = "main", default-features = false, features = ["unicode_expressions"] }
-```
-
-More on [Cargo dependencies](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#specifying-dependencies)
-
 ## Run a SQL query against data stored in a CSV
 
 ```rust
@@ -201,109 +178,3 @@ async fn main() -> datafusion::error::Result<()> {
 | 1 | 2      |
 +---+--------+
 ```
-
-## Extensibility
-
-DataFusion is designed to be extensible at all points. To that end, you can provide your own custom:
-
-- [x] User Defined Functions (UDFs)
-- [x] User Defined Aggregate Functions (UDAFs)
-- [x] User Defined Table Source (`TableProvider`) for tables
-- [x] User Defined `Optimizer` passes (plan rewrites)
-- [x] User Defined `LogicalPlan` nodes
-- [x] User Defined `ExecutionPlan` nodes
-
-## Optimized Configuration
-
-For an optimized build several steps are required. First, use the below in your `Cargo.toml`. It is
-worth noting that using the settings in the `[profile.release]` section will significantly increase the build time.
-
-```toml
-[dependencies]
-datafusion = { version = "22.0" }
-tokio = { version = "^1.0", features = ["rt-multi-thread"] }
-snmalloc-rs = "0.3"
-
-[profile.release]
-lto = true
-codegen-units = 1
-```
-
-Then, in `main.rs.` update the memory allocator with the below after your imports:
-
-```rust ,ignore
-use datafusion::prelude::*;
-
-#[global_allocator]
-static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
-
-#[tokio::main]
-async fn main() -> datafusion::error::Result<()> {
-  Ok(())
-}
-```
-
-Based on the instruction set architecture you are building on you will want to configure the `target-cpu` as well, ideally
-with `native` or at least `avx2`.
-
-```shell
-RUSTFLAGS='-C target-cpu=native' cargo run --release
-```
-
-## Enable backtraces
-
-By default Datafusion returns errors as a plain message. There is option to enable more verbose details about the error,
-like error backtrace. To enable a backtrace you need to add Datafusion `backtrace` feature to your `Cargo.toml` file:
-
-```toml
-datafusion = { version = "31.0.0", features = ["backtrace"]}
-```
-
-Set environment [variables](https://doc.rust-lang.org/std/backtrace/index.html#environment-variables)
-
-```bash
-RUST_BACKTRACE=1 ./target/debug/datafusion-cli
-DataFusion CLI v31.0.0
-> select row_number() over (partition by a order by a) from (select 1 a);
-Error during planning: Invalid function 'row_number'.
-Did you mean 'ROW_NUMBER'?
-
-backtrace:    0: std::backtrace_rs::backtrace::libunwind::trace
-             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5
-   1: std::backtrace_rs::backtrace::trace_unsynchronized
-             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
-   2: std::backtrace::Backtrace::create
-             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/backtrace.rs:332:13
-   3: std::backtrace::Backtrace::capture
-             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/backtrace.rs:298:9
-   4: datafusion_common::error::DataFusionError::get_back_trace
-             at /datafusion/datafusion/common/src/error.rs:436:30
-   5: datafusion_sql::expr::function::<impl datafusion_sql::planner::SqlToRel<S>>::sql_function_to_expr
-   ............
-```
-
-The backtraces are useful when debugging code. If there is a test in `datafusion/core/src/physical_planner.rs`
-
-```
-#[tokio::test]
-async fn test_get_backtrace_for_failed_code() -> Result<()> {
-    let ctx = SessionContext::new();
-
-    let sql = "
-    select row_number() over (partition by a order by a) from (select 1 a);
-    ";
-
-    let _ = ctx.sql(sql).await?.collect().await?;
-
-    Ok(())
-}
-```
-
-To obtain a backtrace:
-
-```bash
-cargo build --features=backtrace
-RUST_BACKTRACE=1 cargo test --features=backtrace --package datafusion --lib -- physical_planner::tests::test_get_backtrace_for_failed_code --exact --nocapture
-```
-
-Note: The backtrace wrapped into systems calls, so some steps on top of the backtrace can be ignored

From faa1e98fc4bec6040c8de07d6c19973e572ad62d Mon Sep 17 00:00:00 2001
From: Arttu <Blizzara@users.noreply.github.com>
Date: Thu, 11 Jul 2024 18:07:53 +0200
Subject: [PATCH 024/357] reuse a single function to create the tpch test
 contexts (#11396)

---
 .../tests/cases/consumer_integration.rs       | 207 ++++++------------
 1 file changed, 62 insertions(+), 145 deletions(-)

diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs
index 6133c239873b2..10c1319b903b5 100644
--- a/datafusion/substrait/tests/cases/consumer_integration.rs
+++ b/datafusion/substrait/tests/cases/consumer_integration.rs
@@ -32,151 +32,22 @@ mod tests {
     use std::io::BufReader;
     use substrait::proto::Plan;
 
-    async fn register_csv(
-        ctx: &SessionContext,
-        table_name: &str,
-        file_path: &str,
-    ) -> Result<()> {
-        ctx.register_csv(table_name, file_path, CsvReadOptions::default())
-            .await
-    }
-
-    async fn create_context_tpch1() -> Result<SessionContext> {
-        let ctx = SessionContext::new();
-        register_csv(
-            &ctx,
-            "FILENAME_PLACEHOLDER_0",
-            "tests/testdata/tpch/lineitem.csv",
-        )
-        .await?;
-        Ok(ctx)
-    }
-
-    async fn create_context_tpch2() -> Result<SessionContext> {
-        let ctx = SessionContext::new();
-
-        let registrations = vec![
-            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/part.csv"),
-            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/supplier.csv"),
-            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/partsupp.csv"),
-            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/nation.csv"),
-            ("FILENAME_PLACEHOLDER_4", "tests/testdata/tpch/region.csv"),
-            ("FILENAME_PLACEHOLDER_5", "tests/testdata/tpch/partsupp.csv"),
-            ("FILENAME_PLACEHOLDER_6", "tests/testdata/tpch/supplier.csv"),
-            ("FILENAME_PLACEHOLDER_7", "tests/testdata/tpch/nation.csv"),
-            ("FILENAME_PLACEHOLDER_8", "tests/testdata/tpch/region.csv"),
-        ];
-
-        for (table_name, file_path) in registrations {
-            register_csv(&ctx, table_name, file_path).await?;
-        }
-
-        Ok(ctx)
-    }
-
-    async fn create_context_tpch3() -> Result<SessionContext> {
-        let ctx = SessionContext::new();
-
-        let registrations = vec![
-            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/customer.csv"),
-            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/orders.csv"),
-            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/lineitem.csv"),
-        ];
-
-        for (table_name, file_path) in registrations {
-            register_csv(&ctx, table_name, file_path).await?;
-        }
-
-        Ok(ctx)
-    }
-
-    async fn create_context_tpch4() -> Result<SessionContext> {
-        let ctx = SessionContext::new();
-
-        let registrations = vec![
-            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/orders.csv"),
-            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/lineitem.csv"),
-        ];
-
-        for (table_name, file_path) in registrations {
-            register_csv(&ctx, table_name, file_path).await?;
-        }
-
-        Ok(ctx)
-    }
-
-    async fn create_context_tpch5() -> Result<SessionContext> {
-        let ctx = SessionContext::new();
-
-        let registrations = vec![
-            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/customer.csv"),
-            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/orders.csv"),
-            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/lineitem.csv"),
-            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/supplier.csv"),
-            ("NATION", "tests/testdata/tpch/nation.csv"),
-            ("REGION", "tests/testdata/tpch/region.csv"),
-        ];
-
-        for (table_name, file_path) in registrations {
-            register_csv(&ctx, table_name, file_path).await?;
-        }
-
-        Ok(ctx)
-    }
-
-    async fn create_context_tpch6() -> Result<SessionContext> {
-        let ctx = SessionContext::new();
-
-        let registrations =
-            vec![("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/lineitem.csv")];
-
-        for (table_name, file_path) in registrations {
-            register_csv(&ctx, table_name, file_path).await?;
-        }
-
-        Ok(ctx)
-    }
-    // missing context for query 7,8,9
-
-    async fn create_context_tpch10() -> Result<SessionContext> {
+    async fn create_context(files: Vec<(&str, &str)>) -> Result<SessionContext> {
         let ctx = SessionContext::new();
-
-        let registrations = vec![
-            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/customer.csv"),
-            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/orders.csv"),
-            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/lineitem.csv"),
-            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/nation.csv"),
-        ];
-
-        for (table_name, file_path) in registrations {
-            register_csv(&ctx, table_name, file_path).await?;
+        for (table_name, file_path) in files {
+            ctx.register_csv(table_name, file_path, CsvReadOptions::default())
+                .await?;
         }
-
-        Ok(ctx)
-    }
-
-    async fn create_context_tpch11() -> Result<SessionContext> {
-        let ctx = SessionContext::new();
-
-        let registrations = vec![
-            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/partsupp.csv"),
-            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/supplier.csv"),
-            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/nation.csv"),
-            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/partsupp.csv"),
-            ("FILENAME_PLACEHOLDER_4", "tests/testdata/tpch/supplier.csv"),
-            ("FILENAME_PLACEHOLDER_5", "tests/testdata/tpch/nation.csv"),
-        ];
-
-        for (table_name, file_path) in registrations {
-            register_csv(&ctx, table_name, file_path).await?;
-        }
-
         Ok(ctx)
     }
 
     #[tokio::test]
     async fn tpch_test_1() -> Result<()> {
-        let ctx = create_context_tpch1().await?;
+        let ctx = create_context(vec![(
+            "FILENAME_PLACEHOLDER_0",
+            "tests/testdata/tpch/lineitem.csv",
+        )])
+        .await?;
         let path = "tests/testdata/tpch_substrait_plans/query_1.json";
         let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
             File::open(path).expect("file not found"),
@@ -200,7 +71,18 @@ mod tests {
 
     #[tokio::test]
     async fn tpch_test_2() -> Result<()> {
-        let ctx = create_context_tpch2().await?;
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/part.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/supplier.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/partsupp.csv"),
+            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/nation.csv"),
+            ("FILENAME_PLACEHOLDER_4", "tests/testdata/tpch/region.csv"),
+            ("FILENAME_PLACEHOLDER_5", "tests/testdata/tpch/partsupp.csv"),
+            ("FILENAME_PLACEHOLDER_6", "tests/testdata/tpch/supplier.csv"),
+            ("FILENAME_PLACEHOLDER_7", "tests/testdata/tpch/nation.csv"),
+            ("FILENAME_PLACEHOLDER_8", "tests/testdata/tpch/region.csv"),
+        ])
+        .await?;
         let path = "tests/testdata/tpch_substrait_plans/query_2.json";
         let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
             File::open(path).expect("file not found"),
@@ -242,7 +124,12 @@ mod tests {
 
     #[tokio::test]
     async fn tpch_test_3() -> Result<()> {
-        let ctx = create_context_tpch3().await?;
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/customer.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/orders.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/lineitem.csv"),
+        ])
+        .await?;
         let path = "tests/testdata/tpch_substrait_plans/query_3.json";
         let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
             File::open(path).expect("file not found"),
@@ -267,7 +154,11 @@ mod tests {
 
     #[tokio::test]
     async fn tpch_test_4() -> Result<()> {
-        let ctx = create_context_tpch4().await?;
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/orders.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/lineitem.csv"),
+        ])
+        .await?;
         let path = "tests/testdata/tpch_substrait_plans/query_4.json";
         let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
             File::open(path).expect("file not found"),
@@ -289,7 +180,15 @@ mod tests {
 
     #[tokio::test]
     async fn tpch_test_5() -> Result<()> {
-        let ctx = create_context_tpch5().await?;
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/customer.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/orders.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/lineitem.csv"),
+            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/supplier.csv"),
+            ("NATION", "tests/testdata/tpch/nation.csv"),
+            ("REGION", "tests/testdata/tpch/region.csv"),
+        ])
+        .await?;
         let path = "tests/testdata/tpch_substrait_plans/query_5.json";
         let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
             File::open(path).expect("file not found"),
@@ -319,7 +218,11 @@ mod tests {
 
     #[tokio::test]
     async fn tpch_test_6() -> Result<()> {
-        let ctx = create_context_tpch6().await?;
+        let ctx = create_context(vec![(
+            "FILENAME_PLACEHOLDER_0",
+            "tests/testdata/tpch/lineitem.csv",
+        )])
+        .await?;
         let path = "tests/testdata/tpch_substrait_plans/query_6.json";
         let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
             File::open(path).expect("file not found"),
@@ -338,7 +241,13 @@ mod tests {
     // TODO: missing plan 7, 8, 9
     #[tokio::test]
     async fn tpch_test_10() -> Result<()> {
-        let ctx = create_context_tpch10().await?;
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/customer.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/orders.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/lineitem.csv"),
+            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/nation.csv"),
+        ])
+        .await?;
         let path = "tests/testdata/tpch_substrait_plans/query_10.json";
         let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
             File::open(path).expect("file not found"),
@@ -365,7 +274,15 @@ mod tests {
 
     #[tokio::test]
     async fn tpch_test_11() -> Result<()> {
-        let ctx = create_context_tpch11().await?;
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/partsupp.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/supplier.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/nation.csv"),
+            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/partsupp.csv"),
+            ("FILENAME_PLACEHOLDER_4", "tests/testdata/tpch/supplier.csv"),
+            ("FILENAME_PLACEHOLDER_5", "tests/testdata/tpch/nation.csv"),
+        ])
+        .await?;
         let path = "tests/testdata/tpch_substrait_plans/query_11.json";
         let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
             File::open(path).expect("file not found"),

From 6692382f22f04542534bba0183cf0682fd932da1 Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Thu, 11 Jul 2024 18:17:03 +0200
Subject: [PATCH 025/357] refactor: change error type for "no statement"
 (#11411)

Amends #11394 (sorry, I should have reviewed that).

While reporting "not implemented" for "multiple statements" seems
reasonable, I think the user should get a plan error (which roughly
translates to "invalid argument") if they don't provide any statement. I
don't see any reasonable way to support "no statement" ever, hence "not
implemented" seems like a wrong promise.
---
 datafusion/core/src/execution/session_state.rs | 4 +---
 datafusion/core/tests/sql/sql_api.rs           | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 60745076c2427..dbfba9ea93521 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -554,9 +554,7 @@ impl SessionState {
             );
         }
         let statement = statements.pop_front().ok_or_else(|| {
-            DataFusionError::NotImplemented(
-                "No SQL statements were provided in the query string".to_string(),
-            )
+            plan_datafusion_err!("No SQL statements were provided in the query string")
         })?;
         Ok(statement)
     }
diff --git a/datafusion/core/tests/sql/sql_api.rs b/datafusion/core/tests/sql/sql_api.rs
index e7c40d2c8aa88..48f4a66b65dcf 100644
--- a/datafusion/core/tests/sql/sql_api.rs
+++ b/datafusion/core/tests/sql/sql_api.rs
@@ -124,7 +124,7 @@ async fn empty_statement_returns_error() {
     let plan_res = state.create_logical_plan("").await;
     assert_eq!(
         plan_res.unwrap_err().strip_backtrace(),
-        "This feature is not implemented: No SQL statements were provided in the query string"
+        "Error during planning: No SQL statements were provided in the query string"
     );
 }
 

From f284e3bb73e089abc0c06b3314014522411bf1da Mon Sep 17 00:00:00 2001
From: Chunchun Ye <14298407+appletreeisyellow@users.noreply.github.com>
Date: Thu, 11 Jul 2024 11:17:09 -0500
Subject: [PATCH 026/357] feat: add UDF to_local_time() (#11347)

* feat: add UDF `to_local_time()`

* chore: support column value in array

* chore: lint

* chore: fix conversion for us, ms, and s

* chore: add more tests for daylight savings time

* chore: add function description

* refactor: update tests and add examples in description

* chore: add description and example

* chore: doc

chore: doc

chore: doc

chore: doc

chore: doc

* chore: stop copying

* chore: fix typo

* chore: mention that the offset varies based on daylight savings time

* refactor: parse timezone once and update examples in description

* refactor: replace map..concat with flat_map

* chore: add hard code timestamp value in test

chore: doc

chore: doc

* chore: handle errors and remove panics

* chore: move some test to slt

* chore: clone time_value

* chore: typo

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions/src/datetime/mod.rs      |  11 +-
 .../functions/src/datetime/to_local_time.rs   | 564 ++++++++++++++++++
 .../sqllogictest/test_files/timestamps.slt    | 177 ++++++
 3 files changed, 751 insertions(+), 1 deletion(-)
 create mode 100644 datafusion/functions/src/datetime/to_local_time.rs

diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs
index 9c2f80856bf86..a7e9827d6ca69 100644
--- a/datafusion/functions/src/datetime/mod.rs
+++ b/datafusion/functions/src/datetime/mod.rs
@@ -32,6 +32,7 @@ pub mod make_date;
 pub mod now;
 pub mod to_char;
 pub mod to_date;
+pub mod to_local_time;
 pub mod to_timestamp;
 pub mod to_unixtime;
 
@@ -50,6 +51,7 @@ make_udf_function!(
 make_udf_function!(now::NowFunc, NOW, now);
 make_udf_function!(to_char::ToCharFunc, TO_CHAR, to_char);
 make_udf_function!(to_date::ToDateFunc, TO_DATE, to_date);
+make_udf_function!(to_local_time::ToLocalTimeFunc, TO_LOCAL_TIME, to_local_time);
 make_udf_function!(to_unixtime::ToUnixtimeFunc, TO_UNIXTIME, to_unixtime);
 make_udf_function!(to_timestamp::ToTimestampFunc, TO_TIMESTAMP, to_timestamp);
 make_udf_function!(
@@ -108,7 +110,13 @@ pub mod expr_fn {
     ),(
         now,
         "returns the current timestamp in nanoseconds, using the same value for all instances of now() in same statement",
-    ),(
+    ),
+    (
+        to_local_time,
+        "converts a timezone-aware timestamp to local time (with no offset or timezone information), i.e. strips off the timezone from the timestamp",
+        args,
+    ),
+    (
         to_unixtime,
         "converts a string and optional formats to a Unixtime",
         args,
@@ -277,6 +285,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         now(),
         to_char(),
         to_date(),
+        to_local_time(),
         to_unixtime(),
         to_timestamp(),
         to_timestamp_seconds(),
diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs
new file mode 100644
index 0000000000000..c84d1015bd7ee
--- /dev/null
+++ b/datafusion/functions/src/datetime/to_local_time.rs
@@ -0,0 +1,564 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::ops::Add;
+use std::sync::Arc;
+
+use arrow::array::timezone::Tz;
+use arrow::array::{Array, ArrayRef, PrimitiveBuilder};
+use arrow::datatypes::DataType::Timestamp;
+use arrow::datatypes::{
+    ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType,
+    TimestampNanosecondType, TimestampSecondType,
+};
+use arrow::datatypes::{
+    TimeUnit,
+    TimeUnit::{Microsecond, Millisecond, Nanosecond, Second},
+};
+
+use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc};
+use datafusion_common::cast::as_primitive_array;
+use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
+use datafusion_expr::TypeSignature::Exact;
+use datafusion_expr::{
+    ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
+};
+
+/// A UDF function that converts a timezone-aware timestamp to local time (with no offset or
+/// timezone information). In other words, this function strips off the timezone from the timestamp,
+/// while keep the display value of the timestamp the same.
+#[derive(Debug)]
+pub struct ToLocalTimeFunc {
+    signature: Signature,
+}
+
+impl Default for ToLocalTimeFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ToLocalTimeFunc {
+    pub fn new() -> Self {
+        let base_sig = |array_type: TimeUnit| {
+            [
+                Exact(vec![Timestamp(array_type, None)]),
+                Exact(vec![Timestamp(array_type, Some(TIMEZONE_WILDCARD.into()))]),
+            ]
+        };
+
+        let full_sig = [Nanosecond, Microsecond, Millisecond, Second]
+            .into_iter()
+            .flat_map(base_sig)
+            .collect::<Vec<_>>();
+
+        Self {
+            signature: Signature::one_of(full_sig, Volatility::Immutable),
+        }
+    }
+
+    fn to_local_time(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        if args.len() != 1 {
+            return exec_err!(
+                "to_local_time function requires 1 argument, got {}",
+                args.len()
+            );
+        }
+
+        let time_value = &args[0];
+        let arg_type = time_value.data_type();
+        match arg_type {
+            DataType::Timestamp(_, None) => {
+                // if no timezone specificed, just return the input
+                Ok(time_value.clone())
+            }
+            // If has timezone, adjust the underlying time value. The current time value
+            // is stored as i64 in UTC, even though the timezone may not be in UTC. Therefore,
+            // we need to adjust the time value to the local time. See [`adjust_to_local_time`]
+            // for more details.
+            //
+            // Then remove the timezone in return type, i.e. return None
+            DataType::Timestamp(_, Some(timezone)) => {
+                let tz: Tz = timezone.parse()?;
+
+                match time_value {
+                    ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
+                        Some(ts),
+                        Some(_),
+                    )) => {
+                        let adjusted_ts =
+                            adjust_to_local_time::<TimestampNanosecondType>(*ts, tz)?;
+                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
+                            Some(adjusted_ts),
+                            None,
+                        )))
+                    }
+                    ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
+                        Some(ts),
+                        Some(_),
+                    )) => {
+                        let adjusted_ts =
+                            adjust_to_local_time::<TimestampMicrosecondType>(*ts, tz)?;
+                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
+                            Some(adjusted_ts),
+                            None,
+                        )))
+                    }
+                    ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
+                        Some(ts),
+                        Some(_),
+                    )) => {
+                        let adjusted_ts =
+                            adjust_to_local_time::<TimestampMillisecondType>(*ts, tz)?;
+                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
+                            Some(adjusted_ts),
+                            None,
+                        )))
+                    }
+                    ColumnarValue::Scalar(ScalarValue::TimestampSecond(
+                        Some(ts),
+                        Some(_),
+                    )) => {
+                        let adjusted_ts =
+                            adjust_to_local_time::<TimestampSecondType>(*ts, tz)?;
+                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampSecond(
+                            Some(adjusted_ts),
+                            None,
+                        )))
+                    }
+                    ColumnarValue::Array(array) => {
+                        fn transform_array<T: ArrowTimestampType>(
+                            array: &ArrayRef,
+                            tz: Tz,
+                        ) -> Result<ColumnarValue> {
+                            let mut builder = PrimitiveBuilder::<T>::new();
+
+                            let primitive_array = as_primitive_array::<T>(array)?;
+                            for ts_opt in primitive_array.iter() {
+                                match ts_opt {
+                                    None => builder.append_null(),
+                                    Some(ts) => {
+                                        let adjusted_ts: i64 =
+                                            adjust_to_local_time::<T>(ts, tz)?;
+                                        builder.append_value(adjusted_ts)
+                                    }
+                                }
+                            }
+
+                            Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+                        }
+
+                        match array.data_type() {
+                            Timestamp(_, None) => {
+                                // if no timezone specificed, just return the input
+                                Ok(time_value.clone())
+                            }
+                            Timestamp(Nanosecond, Some(_)) => {
+                                transform_array::<TimestampNanosecondType>(array, tz)
+                            }
+                            Timestamp(Microsecond, Some(_)) => {
+                                transform_array::<TimestampMicrosecondType>(array, tz)
+                            }
+                            Timestamp(Millisecond, Some(_)) => {
+                                transform_array::<TimestampMillisecondType>(array, tz)
+                            }
+                            Timestamp(Second, Some(_)) => {
+                                transform_array::<TimestampSecondType>(array, tz)
+                            }
+                            _ => {
+                                exec_err!("to_local_time function requires timestamp argument in array, got {:?}", array.data_type())
+                            }
+                        }
+                    }
+                    _ => {
+                        exec_err!(
+                        "to_local_time function requires timestamp argument, got {:?}",
+                        time_value.data_type()
+                    )
+                    }
+                }
+            }
+            _ => {
+                exec_err!(
+                    "to_local_time function requires timestamp argument, got {:?}",
+                    arg_type
+                )
+            }
+        }
+    }
+}
+
+/// This function converts a timestamp with a timezone to a timestamp without a timezone.
+/// The display value of the adjusted timestamp remain the same, but the underlying timestamp
+/// representation is adjusted according to the relative timezone offset to UTC.
+///
+/// This function uses chrono to handle daylight saving time changes.
+///
+/// For example,
+///
+/// ```text
+/// '2019-03-31T01:00:00Z'::timestamp at time zone 'Europe/Brussels'
+/// ```
+///
+/// is displayed as follows in datafusion-cli:
+///
+/// ```text
+/// 2019-03-31T01:00:00+01:00
+/// ```
+///
+/// and is represented in DataFusion as:
+///
+/// ```text
+/// TimestampNanosecond(Some(1_553_990_400_000_000_000), Some("Europe/Brussels"))
+/// ```
+///
+/// To strip off the timezone while keeping the display value the same, we need to
+/// adjust the underlying timestamp with the timezone offset value using `adjust_to_local_time()`
+///
+/// ```text
+/// adjust_to_local_time(1_553_990_400_000_000_000, "Europe/Brussels") --> 1_553_994_000_000_000_000
+/// ```
+///
+/// The difference between `1_553_990_400_000_000_000` and `1_553_994_000_000_000_000` is
+/// `3600_000_000_000` ns, which corresponds to 1 hour. This matches with the timezone
+/// offset for "Europe/Brussels" for this date.
+///
+/// Note that the offset varies with daylight savings time (DST), which makes this tricky! For
+/// example, timezone "Europe/Brussels" has a 2-hour offset during DST and a 1-hour offset
+/// when DST ends.
+///
+/// Consequently, DataFusion can represent the timestamp in local time (with no offset or
+/// timezone information) as
+///
+/// ```text
+/// TimestampNanosecond(Some(1_553_994_000_000_000_000), None)
+/// ```
+///
+/// which is displayed as follows in datafusion-cli:
+///
+/// ```text
+/// 2019-03-31T01:00:00
+/// ```
+///
+/// See `test_adjust_to_local_time()` for example
+fn adjust_to_local_time<T: ArrowTimestampType>(ts: i64, tz: Tz) -> Result<i64> {
+    fn convert_timestamp<F>(ts: i64, converter: F) -> Result<DateTime<Utc>>
+    where
+        F: Fn(i64) -> MappedLocalTime<DateTime<Utc>>,
+    {
+        match converter(ts) {
+            MappedLocalTime::Ambiguous(earliest, latest) => exec_err!(
+                "Ambiguous timestamp. Do you mean {:?} or {:?}",
+                earliest,
+                latest
+            ),
+            MappedLocalTime::None => exec_err!(
+                "The local time does not exist because there is a gap in the local time."
+            ),
+            MappedLocalTime::Single(date_time) => Ok(date_time),
+        }
+    }
+
+    let date_time = match T::UNIT {
+        Nanosecond => Utc.timestamp_nanos(ts),
+        Microsecond => convert_timestamp(ts, |ts| Utc.timestamp_micros(ts))?,
+        Millisecond => convert_timestamp(ts, |ts| Utc.timestamp_millis_opt(ts))?,
+        Second => convert_timestamp(ts, |ts| Utc.timestamp_opt(ts, 0))?,
+    };
+
+    let offset_seconds: i64 = tz
+        .offset_from_utc_datetime(&date_time.naive_utc())
+        .fix()
+        .local_minus_utc() as i64;
+
+    let adjusted_date_time = date_time.add(
+        // This should not fail under normal circumstances as the
+        // maximum possible offset is 26 hours (93,600 seconds)
+        TimeDelta::try_seconds(offset_seconds)
+            .ok_or(DataFusionError::Internal("Offset seconds should be less than i64::MAX / 1_000 or greater than -i64::MAX / 1_000".to_string()))?,
+    );
+
+    // convert the naive datetime back to i64
+    match T::UNIT {
+        Nanosecond => adjusted_date_time.timestamp_nanos_opt().ok_or(
+            DataFusionError::Internal(
+                "Failed to convert DateTime to timestamp in nanosecond. This error may occur if the date is out of range. The supported date ranges are between 1677-09-21T00:12:43.145224192 and 2262-04-11T23:47:16.854775807".to_string(),
+            ),
+        ),
+        Microsecond => Ok(adjusted_date_time.timestamp_micros()),
+        Millisecond => Ok(adjusted_date_time.timestamp_millis()),
+        Second => Ok(adjusted_date_time.timestamp()),
+    }
+}
+
+impl ScalarUDFImpl for ToLocalTimeFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "to_local_time"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        if arg_types.len() != 1 {
+            return exec_err!(
+                "to_local_time function requires 1 argument, got {:?}",
+                arg_types.len()
+            );
+        }
+
+        match &arg_types[0] {
+            Timestamp(Nanosecond, _) => Ok(Timestamp(Nanosecond, None)),
+            Timestamp(Microsecond, _) => Ok(Timestamp(Microsecond, None)),
+            Timestamp(Millisecond, _) => Ok(Timestamp(Millisecond, None)),
+            Timestamp(Second, _) => Ok(Timestamp(Second, None)),
+            _ => exec_err!(
+                "The to_local_time function can only accept timestamp as the arg, got {:?}", arg_types[0]
+            ),
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        if args.len() != 1 {
+            return exec_err!(
+                "to_local_time function requires 1 argument, got {:?}",
+                args.len()
+            );
+        }
+
+        self.to_local_time(args)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow::array::{types::TimestampNanosecondType, TimestampNanosecondArray};
+    use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
+    use arrow::datatypes::{DataType, TimeUnit};
+    use chrono::NaiveDateTime;
+    use datafusion_common::ScalarValue;
+    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+
+    use super::{adjust_to_local_time, ToLocalTimeFunc};
+
+    #[test]
+    fn test_adjust_to_local_time() {
+        let timestamp_str = "2020-03-31T13:40:00";
+        let tz: arrow::array::timezone::Tz =
+            "America/New_York".parse().expect("Invalid timezone");
+
+        let timestamp = timestamp_str
+            .parse::<NaiveDateTime>()
+            .unwrap()
+            .and_local_timezone(tz) // this is in a local timezone
+            .unwrap()
+            .timestamp_nanos_opt()
+            .unwrap();
+
+        let expected_timestamp = timestamp_str
+            .parse::<NaiveDateTime>()
+            .unwrap()
+            .and_utc() // this is in UTC
+            .timestamp_nanos_opt()
+            .unwrap();
+
+        let res = adjust_to_local_time::<TimestampNanosecondType>(timestamp, tz).unwrap();
+        assert_eq!(res, expected_timestamp);
+    }
+
+    #[test]
+    fn test_to_local_time_scalar() {
+        let timezone = Some("Europe/Brussels".into());
+        let timestamps_with_timezone = vec![
+            (
+                ScalarValue::TimestampNanosecond(
+                    Some(1_123_123_000_000_000_000),
+                    timezone.clone(),
+                ),
+                ScalarValue::TimestampNanosecond(Some(1_123_130_200_000_000_000), None),
+            ),
+            (
+                ScalarValue::TimestampMicrosecond(
+                    Some(1_123_123_000_000_000),
+                    timezone.clone(),
+                ),
+                ScalarValue::TimestampMicrosecond(Some(1_123_130_200_000_000), None),
+            ),
+            (
+                ScalarValue::TimestampMillisecond(
+                    Some(1_123_123_000_000),
+                    timezone.clone(),
+                ),
+                ScalarValue::TimestampMillisecond(Some(1_123_130_200_000), None),
+            ),
+            (
+                ScalarValue::TimestampSecond(Some(1_123_123_000), timezone),
+                ScalarValue::TimestampSecond(Some(1_123_130_200), None),
+            ),
+        ];
+
+        for (input, expected) in timestamps_with_timezone {
+            test_to_local_time_helper(input, expected);
+        }
+    }
+
+    #[test]
+    fn test_timezone_with_daylight_savings() {
+        let timezone_str = "America/New_York";
+        let tz: arrow::array::timezone::Tz =
+            timezone_str.parse().expect("Invalid timezone");
+
+        // Test data:
+        // (
+        //    the string display of the input timestamp,
+        //    the i64 representation of the timestamp before adjustment in nanosecond,
+        //    the i64 representation of the timestamp after adjustment in nanosecond,
+        // )
+        let test_cases = vec![
+            (
+                // DST time
+                "2020-03-31T13:40:00",
+                1_585_676_400_000_000_000,
+                1_585_662_000_000_000_000,
+            ),
+            (
+                // End of DST
+                "2020-11-04T14:06:40",
+                1_604_516_800_000_000_000,
+                1_604_498_800_000_000_000,
+            ),
+        ];
+
+        for (
+            input_timestamp_str,
+            expected_input_timestamp,
+            expected_adjusted_timestamp,
+        ) in test_cases
+        {
+            let input_timestamp = input_timestamp_str
+                .parse::<NaiveDateTime>()
+                .unwrap()
+                .and_local_timezone(tz) // this is in a local timezone
+                .unwrap()
+                .timestamp_nanos_opt()
+                .unwrap();
+            assert_eq!(input_timestamp, expected_input_timestamp);
+
+            let expected_timestamp = input_timestamp_str
+                .parse::<NaiveDateTime>()
+                .unwrap()
+                .and_utc() // this is in UTC
+                .timestamp_nanos_opt()
+                .unwrap();
+            assert_eq!(expected_timestamp, expected_adjusted_timestamp);
+
+            let input = ScalarValue::TimestampNanosecond(
+                Some(input_timestamp),
+                Some(timezone_str.into()),
+            );
+            let expected =
+                ScalarValue::TimestampNanosecond(Some(expected_timestamp), None);
+            test_to_local_time_helper(input, expected)
+        }
+    }
+
+    fn test_to_local_time_helper(input: ScalarValue, expected: ScalarValue) {
+        let res = ToLocalTimeFunc::new()
+            .invoke(&[ColumnarValue::Scalar(input)])
+            .unwrap();
+        match res {
+            ColumnarValue::Scalar(res) => {
+                assert_eq!(res, expected);
+            }
+            _ => panic!("unexpected return type"),
+        }
+    }
+
+    #[test]
+    fn test_to_local_time_timezones_array() {
+        let cases = [
+            (
+                vec![
+                    "2020-09-08T00:00:00",
+                    "2020-09-08T01:00:00",
+                    "2020-09-08T02:00:00",
+                    "2020-09-08T03:00:00",
+                    "2020-09-08T04:00:00",
+                ],
+                None::<Arc<str>>,
+                vec![
+                    "2020-09-08T00:00:00",
+                    "2020-09-08T01:00:00",
+                    "2020-09-08T02:00:00",
+                    "2020-09-08T03:00:00",
+                    "2020-09-08T04:00:00",
+                ],
+            ),
+            (
+                vec![
+                    "2020-09-08T00:00:00",
+                    "2020-09-08T01:00:00",
+                    "2020-09-08T02:00:00",
+                    "2020-09-08T03:00:00",
+                    "2020-09-08T04:00:00",
+                ],
+                Some("+01:00".into()),
+                vec![
+                    "2020-09-08T00:00:00",
+                    "2020-09-08T01:00:00",
+                    "2020-09-08T02:00:00",
+                    "2020-09-08T03:00:00",
+                    "2020-09-08T04:00:00",
+                ],
+            ),
+        ];
+
+        cases.iter().for_each(|(source, _tz_opt, expected)| {
+            let input = source
+                .iter()
+                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
+                .collect::<TimestampNanosecondArray>();
+            let right = expected
+                .iter()
+                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
+                .collect::<TimestampNanosecondArray>();
+            let result = ToLocalTimeFunc::new()
+                .invoke(&[ColumnarValue::Array(Arc::new(input))])
+                .unwrap();
+            if let ColumnarValue::Array(result) = result {
+                assert_eq!(
+                    result.data_type(),
+                    &DataType::Timestamp(TimeUnit::Nanosecond, None)
+                );
+                let left = arrow::array::cast::as_primitive_array::<
+                    TimestampNanosecondType,
+                >(&result);
+                assert_eq!(left, &right);
+            } else {
+                panic!("unexpected column type");
+            }
+        });
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index 2216dbfa5fd58..f4e492649b9f8 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -2844,3 +2844,180 @@ select arrow_cast('2024-06-17T13:00:00', 'Timestamp(Nanosecond, Some("UTC"))') -
 
 query error
 select arrow_cast('2024-06-17T13:00:00', 'Timestamp(Nanosecond, Some("+00:00"))') - arrow_cast('2024-06-17T12:00:00', 'Timestamp(Microsecond, Some("+01:00"))');
+
+##########
+## Test to_local_time function
+##########
+
+# invalid number of arguments -- no argument
+statement error
+select to_local_time();
+
+# invalid number of arguments -- more than 1 argument
+statement error
+select to_local_time('2024-04-01T00:00:20Z'::timestamp, 'some string');
+
+# invalid argument data type
+statement error DataFusion error: Execution error: The to_local_time function can only accept timestamp as the arg, got Utf8
+select to_local_time('2024-04-01T00:00:20Z');
+
+# invalid timezone
+statement error DataFusion error: Arrow error: Parser error: Invalid timezone "Europe/timezone": failed to parse timezone
+select to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/timezone');
+
+# valid query
+query P
+select to_local_time('2024-04-01T00:00:20Z'::timestamp);
+----
+2024-04-01T00:00:20
+
+query P
+select to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE '+05:00');
+----
+2024-04-01T00:00:20
+
+query P
+select to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels');
+----
+2024-04-01T00:00:20
+
+query PTPT
+select
+  time,
+  arrow_typeof(time) as type,
+  to_local_time(time) as to_local_time,
+  arrow_typeof(to_local_time(time)) as to_local_time_type
+from (
+  select '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' as time
+);
+----
+2024-04-01T00:00:20+02:00 Timestamp(Nanosecond, Some("Europe/Brussels")) 2024-04-01T00:00:20 Timestamp(Nanosecond, None)
+
+# use to_local_time() in date_bin()
+query P
+select date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels'));
+----
+2024-04-01T00:00:00
+
+query P
+select date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels';
+----
+2024-04-01T00:00:00+02:00
+
+# test using to_local_time() on array values
+statement ok
+create table t AS
+VALUES
+  ('2024-01-01T00:00:01Z'),
+  ('2024-02-01T00:00:01Z'),
+  ('2024-03-01T00:00:01Z'),
+  ('2024-04-01T00:00:01Z'),
+  ('2024-05-01T00:00:01Z'),
+  ('2024-06-01T00:00:01Z'),
+  ('2024-07-01T00:00:01Z'),
+  ('2024-08-01T00:00:01Z'),
+  ('2024-09-01T00:00:01Z'),
+  ('2024-10-01T00:00:01Z'),
+  ('2024-11-01T00:00:01Z'),
+  ('2024-12-01T00:00:01Z')
+;
+
+statement ok
+create view t_utc as
+select column1::timestamp AT TIME ZONE 'UTC' as "column1"
+from t;
+
+statement ok
+create view t_timezone as
+select column1::timestamp AT TIME ZONE 'Europe/Brussels' as "column1"
+from t;
+
+query PPT
+select column1, to_local_time(column1::timestamp), arrow_typeof(to_local_time(column1::timestamp)) from t_utc;
+----
+2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
+2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
+2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
+2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
+2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
+2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
+2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
+2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
+2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
+2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
+2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
+2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+
+query PPT
+select column1, to_local_time(column1), arrow_typeof(to_local_time(column1)) from t_utc;
+----
+2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
+2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
+2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
+2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
+2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
+2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
+2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
+2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
+2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
+2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
+2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
+2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+
+query PPT
+select column1, to_local_time(column1), arrow_typeof(to_local_time(column1)) from t_timezone;
+----
+2024-01-01T00:00:01+01:00 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
+2024-02-01T00:00:01+01:00 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
+2024-03-01T00:00:01+01:00 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
+2024-04-01T00:00:01+02:00 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
+2024-05-01T00:00:01+02:00 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
+2024-06-01T00:00:01+02:00 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
+2024-07-01T00:00:01+02:00 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
+2024-08-01T00:00:01+02:00 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
+2024-09-01T00:00:01+02:00 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
+2024-10-01T00:00:01+02:00 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
+2024-11-01T00:00:01+01:00 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
+2024-12-01T00:00:01+01:00 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+
+# combine to_local_time() with date_bin()
+query P
+select date_bin(interval '1 day', to_local_time(column1)) AT TIME ZONE 'Europe/Brussels' as date_bin from t_utc;
+----
+2024-01-01T00:00:00+01:00
+2024-02-01T00:00:00+01:00
+2024-03-01T00:00:00+01:00
+2024-04-01T00:00:00+02:00
+2024-05-01T00:00:00+02:00
+2024-06-01T00:00:00+02:00
+2024-07-01T00:00:00+02:00
+2024-08-01T00:00:00+02:00
+2024-09-01T00:00:00+02:00
+2024-10-01T00:00:00+02:00
+2024-11-01T00:00:00+01:00
+2024-12-01T00:00:00+01:00
+
+query P
+select date_bin(interval '1 day', to_local_time(column1)) AT TIME ZONE 'Europe/Brussels' as date_bin from t_timezone;
+----
+2024-01-01T00:00:00+01:00
+2024-02-01T00:00:00+01:00
+2024-03-01T00:00:00+01:00
+2024-04-01T00:00:00+02:00
+2024-05-01T00:00:00+02:00
+2024-06-01T00:00:00+02:00
+2024-07-01T00:00:00+02:00
+2024-08-01T00:00:00+02:00
+2024-09-01T00:00:00+02:00
+2024-10-01T00:00:00+02:00
+2024-11-01T00:00:00+01:00
+2024-12-01T00:00:00+01:00
+
+statement ok
+drop table t;
+
+statement ok
+drop view t_utc;
+
+statement ok
+drop view t_timezone;

From 1e9f0e1d650f0549e6a8f7d6971b7373fae5199c Mon Sep 17 00:00:00 2001
From: Mohamed Abdeen <83442793+MohamedAbdeen21@users.noreply.github.com>
Date: Thu, 11 Jul 2024 19:20:10 +0300
Subject: [PATCH 027/357] Implement prettier SQL unparsing (more human
 readable) (#11186)

* initial prettier unparse

* bug fix

* handling minus and divide

* cleaning references and comments

* moved tests

* Update precedence of BETWEEN

* rerun CI

* Change precedence to match PGSQLs

* more pretty unparser tests

* Update operator precedence to match latest PGSQL

* directly prettify expr_to_sql

* handle IS operator

* correct IS precedence

* update unparser tests

* update unparser example

* update more unparser examples

* add with_pretty builder to unparser
---
 .../examples/parse_sql_expr.rs                |   9 +
 datafusion-examples/examples/plan_to_sql.rs   |  18 +-
 datafusion/expr/src/operator.rs               |  24 +-
 datafusion/sql/src/unparser/expr.rs           | 230 ++++++++++++++----
 datafusion/sql/src/unparser/mod.rs            |  15 +-
 datafusion/sql/tests/cases/plan_to_sql.rs     |  99 +++++++-
 6 files changed, 319 insertions(+), 76 deletions(-)

diff --git a/datafusion-examples/examples/parse_sql_expr.rs b/datafusion-examples/examples/parse_sql_expr.rs
index a1fc5d269a043..e23e5accae397 100644
--- a/datafusion-examples/examples/parse_sql_expr.rs
+++ b/datafusion-examples/examples/parse_sql_expr.rs
@@ -153,5 +153,14 @@ async fn round_trip_parse_sql_expr_demo() -> Result<()> {
 
     assert_eq!(sql, round_trip_sql);
 
+    // enable pretty-unparsing. This make the output more human-readable
+    // but can be problematic when passed to other SQL engines due to
+    // difference in precedence rules between DataFusion and target engines.
+    let unparser = Unparser::default().with_pretty(true);
+
+    let pretty = "int_col < 5 OR double_col = 8";
+    let pretty_round_trip_sql = unparser.expr_to_sql(&parsed_expr)?.to_string();
+    assert_eq!(pretty, pretty_round_trip_sql);
+
     Ok(())
 }
diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs
index bd708fe52bc15..f719a33fb6249 100644
--- a/datafusion-examples/examples/plan_to_sql.rs
+++ b/datafusion-examples/examples/plan_to_sql.rs
@@ -31,9 +31,9 @@ use datafusion_sql::unparser::{plan_to_sql, Unparser};
 /// 1. [`simple_expr_to_sql_demo`]: Create a simple expression [`Exprs`] with
 /// fluent API and convert to sql suitable for passing to another database
 ///
-/// 2. [`simple_expr_to_sql_demo_no_escape`]  Create a simple expression
-/// [`Exprs`] with fluent API and convert to sql without escaping column names
-/// more suitable for displaying to humans.
+/// 2. [`simple_expr_to_pretty_sql_demo`] Create a simple expression
+/// [`Exprs`] with fluent API and convert to sql without extra parentheses,
+/// suitable for displaying to humans
 ///
 /// 3. [`simple_expr_to_sql_demo_escape_mysql_style`]" Create a simple
 /// expression [`Exprs`] with fluent API and convert to sql escaping column
@@ -49,6 +49,7 @@ use datafusion_sql::unparser::{plan_to_sql, Unparser};
 async fn main() -> Result<()> {
     // See how to evaluate expressions
     simple_expr_to_sql_demo()?;
+    simple_expr_to_pretty_sql_demo()?;
     simple_expr_to_sql_demo_escape_mysql_style()?;
     simple_plan_to_sql_demo().await?;
     round_trip_plan_to_sql_demo().await?;
@@ -64,6 +65,17 @@ fn simple_expr_to_sql_demo() -> Result<()> {
     Ok(())
 }
 
+/// DataFusioon can remove parentheses when converting an expression to SQL.
+/// Note that output is intended for humans, not for other SQL engines,
+/// as difference in precedence rules can cause expressions to be parsed differently.
+fn simple_expr_to_pretty_sql_demo() -> Result<()> {
+    let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
+    let unparser = Unparser::default().with_pretty(true);
+    let sql = unparser.expr_to_sql(&expr)?.to_string();
+    assert_eq!(sql, r#"a < 5 OR a = 8"#);
+    Ok(())
+}
+
 /// DataFusion can convert expressions to SQL without escaping column names using
 /// using a custom dialect and an explicit unparser
 fn simple_expr_to_sql_demo_escape_mysql_style() -> Result<()> {
diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs
index a10312e234460..9bb8c48d6c71f 100644
--- a/datafusion/expr/src/operator.rs
+++ b/datafusion/expr/src/operator.rs
@@ -218,29 +218,23 @@ impl Operator {
     }
 
     /// Get the operator precedence
-    /// use <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
+    /// use <https://www.postgresql.org/docs/7.2/sql-precedence.html> as a reference
     pub fn precedence(&self) -> u8 {
         match self {
             Operator::Or => 5,
             Operator::And => 10,
-            Operator::NotEq
-            | Operator::Eq
-            | Operator::Lt
-            | Operator::LtEq
-            | Operator::Gt
-            | Operator::GtEq => 20,
-            Operator::Plus | Operator::Minus => 30,
-            Operator::Multiply | Operator::Divide | Operator::Modulo => 40,
+            Operator::Eq | Operator::NotEq | Operator::LtEq | Operator::GtEq => 15,
+            Operator::Lt | Operator::Gt => 20,
+            Operator::LikeMatch
+            | Operator::NotLikeMatch
+            | Operator::ILikeMatch
+            | Operator::NotILikeMatch => 25,
             Operator::IsDistinctFrom
             | Operator::IsNotDistinctFrom
             | Operator::RegexMatch
             | Operator::RegexNotMatch
             | Operator::RegexIMatch
             | Operator::RegexNotIMatch
-            | Operator::LikeMatch
-            | Operator::ILikeMatch
-            | Operator::NotLikeMatch
-            | Operator::NotILikeMatch
             | Operator::BitwiseAnd
             | Operator::BitwiseOr
             | Operator::BitwiseShiftLeft
@@ -248,7 +242,9 @@ impl Operator {
             | Operator::BitwiseXor
             | Operator::StringConcat
             | Operator::AtArrow
-            | Operator::ArrowAt => 0,
+            | Operator::ArrowAt => 30,
+            Operator::Plus | Operator::Minus => 40,
+            Operator::Multiply | Operator::Divide | Operator::Modulo => 45,
         }
     }
 }
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 198186934c84b..e0d05c400cb09 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -30,8 +30,8 @@ use arrow_array::{Date32Array, Date64Array, PrimitiveArray};
 use arrow_schema::DataType;
 use sqlparser::ast::Value::SingleQuotedString;
 use sqlparser::ast::{
-    self, Expr as AstExpr, Function, FunctionArg, Ident, Interval, TimezoneInfo,
-    UnaryOperator,
+    self, BinaryOperator, Expr as AstExpr, Function, FunctionArg, Ident, Interval,
+    TimezoneInfo, UnaryOperator,
 };
 
 use datafusion_common::{
@@ -101,8 +101,21 @@ pub fn expr_to_unparsed(expr: &Expr) -> Result<Unparsed> {
     unparser.expr_to_unparsed(expr)
 }
 
+const LOWEST: &BinaryOperator = &BinaryOperator::Or;
+// closest precedence we have to IS operator is BitwiseAnd (any other) in PG docs
+// (https://www.postgresql.org/docs/7.2/sql-precedence.html)
+const IS: &BinaryOperator = &BinaryOperator::BitwiseAnd;
+
 impl Unparser<'_> {
     pub fn expr_to_sql(&self, expr: &Expr) -> Result<ast::Expr> {
+        let mut root_expr = self.expr_to_sql_inner(expr)?;
+        if self.pretty {
+            root_expr = self.remove_unnecessary_nesting(root_expr, LOWEST, LOWEST);
+        }
+        Ok(root_expr)
+    }
+
+    fn expr_to_sql_inner(&self, expr: &Expr) -> Result<ast::Expr> {
         match expr {
             Expr::InList(InList {
                 expr,
@@ -111,10 +124,10 @@ impl Unparser<'_> {
             }) => {
                 let list_expr = list
                     .iter()
-                    .map(|e| self.expr_to_sql(e))
+                    .map(|e| self.expr_to_sql_inner(e))
                     .collect::<Result<Vec<_>>>()?;
                 Ok(ast::Expr::InList {
-                    expr: Box::new(self.expr_to_sql(expr)?),
+                    expr: Box::new(self.expr_to_sql_inner(expr)?),
                     list: list_expr,
                     negated: *negated,
                 })
@@ -128,7 +141,7 @@ impl Unparser<'_> {
                         if matches!(e, Expr::Wildcard { qualifier: None }) {
                             Ok(FunctionArg::Unnamed(ast::FunctionArgExpr::Wildcard))
                         } else {
-                            self.expr_to_sql(e).map(|e| {
+                            self.expr_to_sql_inner(e).map(|e| {
                                 FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(e))
                             })
                         }
@@ -157,9 +170,9 @@ impl Unparser<'_> {
                 low,
                 high,
             }) => {
-                let sql_parser_expr = self.expr_to_sql(expr)?;
-                let sql_low = self.expr_to_sql(low)?;
-                let sql_high = self.expr_to_sql(high)?;
+                let sql_parser_expr = self.expr_to_sql_inner(expr)?;
+                let sql_low = self.expr_to_sql_inner(low)?;
+                let sql_high = self.expr_to_sql_inner(high)?;
                 Ok(ast::Expr::Nested(Box::new(self.between_op_to_sql(
                     sql_parser_expr,
                     *negated,
@@ -169,8 +182,8 @@ impl Unparser<'_> {
             }
             Expr::Column(col) => self.col_to_sql(col),
             Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
-                let l = self.expr_to_sql(left.as_ref())?;
-                let r = self.expr_to_sql(right.as_ref())?;
+                let l = self.expr_to_sql_inner(left.as_ref())?;
+                let r = self.expr_to_sql_inner(right.as_ref())?;
                 let op = self.op_to_sql(op)?;
 
                 Ok(ast::Expr::Nested(Box::new(self.binary_op_to_sql(l, r, op))))
@@ -182,21 +195,21 @@ impl Unparser<'_> {
             }) => {
                 let conditions = when_then_expr
                     .iter()
-                    .map(|(w, _)| self.expr_to_sql(w))
+                    .map(|(w, _)| self.expr_to_sql_inner(w))
                     .collect::<Result<Vec<_>>>()?;
                 let results = when_then_expr
                     .iter()
-                    .map(|(_, t)| self.expr_to_sql(t))
+                    .map(|(_, t)| self.expr_to_sql_inner(t))
                     .collect::<Result<Vec<_>>>()?;
                 let operand = match expr.as_ref() {
-                    Some(e) => match self.expr_to_sql(e) {
+                    Some(e) => match self.expr_to_sql_inner(e) {
                         Ok(sql_expr) => Some(Box::new(sql_expr)),
                         Err(_) => None,
                     },
                     None => None,
                 };
                 let else_result = match else_expr.as_ref() {
-                    Some(e) => match self.expr_to_sql(e) {
+                    Some(e) => match self.expr_to_sql_inner(e) {
                         Ok(sql_expr) => Some(Box::new(sql_expr)),
                         Err(_) => None,
                     },
@@ -211,7 +224,7 @@ impl Unparser<'_> {
                 })
             }
             Expr::Cast(Cast { expr, data_type }) => {
-                let inner_expr = self.expr_to_sql(expr)?;
+                let inner_expr = self.expr_to_sql_inner(expr)?;
                 Ok(ast::Expr::Cast {
                     kind: ast::CastKind::Cast,
                     expr: Box::new(inner_expr),
@@ -220,7 +233,7 @@ impl Unparser<'_> {
                 })
             }
             Expr::Literal(value) => Ok(self.scalar_to_sql(value)?),
-            Expr::Alias(Alias { expr, name: _, .. }) => self.expr_to_sql(expr),
+            Expr::Alias(Alias { expr, name: _, .. }) => self.expr_to_sql_inner(expr),
             Expr::WindowFunction(WindowFunction {
                 fun,
                 args,
@@ -255,7 +268,7 @@ impl Unparser<'_> {
                     window_name: None,
                     partition_by: partition_by
                         .iter()
-                        .map(|e| self.expr_to_sql(e))
+                        .map(|e| self.expr_to_sql_inner(e))
                         .collect::<Result<Vec<_>>>()?,
                     order_by,
                     window_frame: Some(ast::WindowFrame {
@@ -296,8 +309,8 @@ impl Unparser<'_> {
                 case_insensitive: _,
             }) => Ok(ast::Expr::Like {
                 negated: *negated,
-                expr: Box::new(self.expr_to_sql(expr)?),
-                pattern: Box::new(self.expr_to_sql(pattern)?),
+                expr: Box::new(self.expr_to_sql_inner(expr)?),
+                pattern: Box::new(self.expr_to_sql_inner(pattern)?),
                 escape_char: escape_char.map(|c| c.to_string()),
             }),
             Expr::AggregateFunction(agg) => {
@@ -305,7 +318,7 @@ impl Unparser<'_> {
 
                 let args = self.function_args_to_sql(&agg.args)?;
                 let filter = match &agg.filter {
-                    Some(filter) => Some(Box::new(self.expr_to_sql(filter)?)),
+                    Some(filter) => Some(Box::new(self.expr_to_sql_inner(filter)?)),
                     None => None,
                 };
                 Ok(ast::Expr::Function(Function {
@@ -339,7 +352,7 @@ impl Unparser<'_> {
                 Ok(ast::Expr::Subquery(sub_query))
             }
             Expr::InSubquery(insubq) => {
-                let inexpr = Box::new(self.expr_to_sql(insubq.expr.as_ref())?);
+                let inexpr = Box::new(self.expr_to_sql_inner(insubq.expr.as_ref())?);
                 let sub_statement =
                     self.plan_to_sql(insubq.subquery.subquery.as_ref())?;
                 let sub_query = if let ast::Statement::Query(inner_query) = sub_statement
@@ -377,38 +390,38 @@ impl Unparser<'_> {
                 nulls_first: _,
             }) => plan_err!("Sort expression should be handled by expr_to_unparsed"),
             Expr::IsNull(expr) => {
-                Ok(ast::Expr::IsNull(Box::new(self.expr_to_sql(expr)?)))
-            }
-            Expr::IsNotNull(expr) => {
-                Ok(ast::Expr::IsNotNull(Box::new(self.expr_to_sql(expr)?)))
+                Ok(ast::Expr::IsNull(Box::new(self.expr_to_sql_inner(expr)?)))
             }
+            Expr::IsNotNull(expr) => Ok(ast::Expr::IsNotNull(Box::new(
+                self.expr_to_sql_inner(expr)?,
+            ))),
             Expr::IsTrue(expr) => {
-                Ok(ast::Expr::IsTrue(Box::new(self.expr_to_sql(expr)?)))
-            }
-            Expr::IsNotTrue(expr) => {
-                Ok(ast::Expr::IsNotTrue(Box::new(self.expr_to_sql(expr)?)))
+                Ok(ast::Expr::IsTrue(Box::new(self.expr_to_sql_inner(expr)?)))
             }
+            Expr::IsNotTrue(expr) => Ok(ast::Expr::IsNotTrue(Box::new(
+                self.expr_to_sql_inner(expr)?,
+            ))),
             Expr::IsFalse(expr) => {
-                Ok(ast::Expr::IsFalse(Box::new(self.expr_to_sql(expr)?)))
-            }
-            Expr::IsNotFalse(expr) => {
-                Ok(ast::Expr::IsNotFalse(Box::new(self.expr_to_sql(expr)?)))
-            }
-            Expr::IsUnknown(expr) => {
-                Ok(ast::Expr::IsUnknown(Box::new(self.expr_to_sql(expr)?)))
-            }
-            Expr::IsNotUnknown(expr) => {
-                Ok(ast::Expr::IsNotUnknown(Box::new(self.expr_to_sql(expr)?)))
-            }
+                Ok(ast::Expr::IsFalse(Box::new(self.expr_to_sql_inner(expr)?)))
+            }
+            Expr::IsNotFalse(expr) => Ok(ast::Expr::IsNotFalse(Box::new(
+                self.expr_to_sql_inner(expr)?,
+            ))),
+            Expr::IsUnknown(expr) => Ok(ast::Expr::IsUnknown(Box::new(
+                self.expr_to_sql_inner(expr)?,
+            ))),
+            Expr::IsNotUnknown(expr) => Ok(ast::Expr::IsNotUnknown(Box::new(
+                self.expr_to_sql_inner(expr)?,
+            ))),
             Expr::Not(expr) => {
-                let sql_parser_expr = self.expr_to_sql(expr)?;
+                let sql_parser_expr = self.expr_to_sql_inner(expr)?;
                 Ok(AstExpr::UnaryOp {
                     op: UnaryOperator::Not,
                     expr: Box::new(sql_parser_expr),
                 })
             }
             Expr::Negative(expr) => {
-                let sql_parser_expr = self.expr_to_sql(expr)?;
+                let sql_parser_expr = self.expr_to_sql_inner(expr)?;
                 Ok(AstExpr::UnaryOp {
                     op: UnaryOperator::Minus,
                     expr: Box::new(sql_parser_expr),
@@ -432,7 +445,7 @@ impl Unparser<'_> {
                 })
             }
             Expr::TryCast(TryCast { expr, data_type }) => {
-                let inner_expr = self.expr_to_sql(expr)?;
+                let inner_expr = self.expr_to_sql_inner(expr)?;
                 Ok(ast::Expr::Cast {
                     kind: ast::CastKind::TryCast,
                     expr: Box::new(inner_expr),
@@ -449,7 +462,7 @@ impl Unparser<'_> {
                         .iter()
                         .map(|set| {
                             set.iter()
-                                .map(|e| self.expr_to_sql(e))
+                                .map(|e| self.expr_to_sql_inner(e))
                                 .collect::<Result<Vec<_>>>()
                         })
                         .collect::<Result<Vec<_>>>()?;
@@ -460,7 +473,7 @@ impl Unparser<'_> {
                     let expr_ast_sets = cube
                         .iter()
                         .map(|e| {
-                            let sql = self.expr_to_sql(e)?;
+                            let sql = self.expr_to_sql_inner(e)?;
                             Ok(vec![sql])
                         })
                         .collect::<Result<Vec<_>>>()?;
@@ -470,7 +483,7 @@ impl Unparser<'_> {
                     let expr_ast_sets: Vec<Vec<AstExpr>> = rollup
                         .iter()
                         .map(|e| {
-                            let sql = self.expr_to_sql(e)?;
+                            let sql = self.expr_to_sql_inner(e)?;
                             Ok(vec![sql])
                         })
                         .collect::<Result<Vec<_>>>()?;
@@ -603,6 +616,88 @@ impl Unparser<'_> {
         }
     }
 
+    /// Given an expression of the form `((a + b) * (c * d))`,
+    /// the parenthesing is redundant if the precedence of the nested expression is already higher
+    /// than the surrounding operators' precedence. The above expression would become
+    /// `(a + b) * c * d`.
+    ///
+    /// Also note that when fetching the precedence of a nested expression, we ignore other nested
+    /// expressions, so precedence of expr `(a * (b + c))` equals `*` and not `+`.
+    fn remove_unnecessary_nesting(
+        &self,
+        expr: ast::Expr,
+        left_op: &BinaryOperator,
+        right_op: &BinaryOperator,
+    ) -> ast::Expr {
+        match expr {
+            ast::Expr::Nested(nested) => {
+                let surrounding_precedence = self
+                    .sql_op_precedence(left_op)
+                    .max(self.sql_op_precedence(right_op));
+
+                let inner_precedence = self.inner_precedence(&nested);
+
+                let not_associative =
+                    matches!(left_op, BinaryOperator::Minus | BinaryOperator::Divide);
+
+                if inner_precedence == surrounding_precedence && not_associative {
+                    ast::Expr::Nested(Box::new(
+                        self.remove_unnecessary_nesting(*nested, LOWEST, LOWEST),
+                    ))
+                } else if inner_precedence >= surrounding_precedence {
+                    self.remove_unnecessary_nesting(*nested, left_op, right_op)
+                } else {
+                    ast::Expr::Nested(Box::new(
+                        self.remove_unnecessary_nesting(*nested, LOWEST, LOWEST),
+                    ))
+                }
+            }
+            ast::Expr::BinaryOp { left, op, right } => ast::Expr::BinaryOp {
+                left: Box::new(self.remove_unnecessary_nesting(*left, left_op, &op)),
+                right: Box::new(self.remove_unnecessary_nesting(*right, &op, right_op)),
+                op,
+            },
+            ast::Expr::IsTrue(expr) => ast::Expr::IsTrue(Box::new(
+                self.remove_unnecessary_nesting(*expr, left_op, IS),
+            )),
+            ast::Expr::IsNotTrue(expr) => ast::Expr::IsNotTrue(Box::new(
+                self.remove_unnecessary_nesting(*expr, left_op, IS),
+            )),
+            ast::Expr::IsFalse(expr) => ast::Expr::IsFalse(Box::new(
+                self.remove_unnecessary_nesting(*expr, left_op, IS),
+            )),
+            ast::Expr::IsNotFalse(expr) => ast::Expr::IsNotFalse(Box::new(
+                self.remove_unnecessary_nesting(*expr, left_op, IS),
+            )),
+            ast::Expr::IsNull(expr) => ast::Expr::IsNull(Box::new(
+                self.remove_unnecessary_nesting(*expr, left_op, IS),
+            )),
+            ast::Expr::IsNotNull(expr) => ast::Expr::IsNotNull(Box::new(
+                self.remove_unnecessary_nesting(*expr, left_op, IS),
+            )),
+            ast::Expr::IsUnknown(expr) => ast::Expr::IsUnknown(Box::new(
+                self.remove_unnecessary_nesting(*expr, left_op, IS),
+            )),
+            ast::Expr::IsNotUnknown(expr) => ast::Expr::IsNotUnknown(Box::new(
+                self.remove_unnecessary_nesting(*expr, left_op, IS),
+            )),
+            _ => expr,
+        }
+    }
+
+    fn inner_precedence(&self, expr: &ast::Expr) -> u8 {
+        match expr {
+            ast::Expr::Nested(_) | ast::Expr::Identifier(_) | ast::Expr::Value(_) => 100,
+            ast::Expr::BinaryOp { op, .. } => self.sql_op_precedence(op),
+            // closest precedence we currently have to Between is PGLikeMatch
+            // (https://www.postgresql.org/docs/7.2/sql-precedence.html)
+            ast::Expr::Between { .. } => {
+                self.sql_op_precedence(&ast::BinaryOperator::PGLikeMatch)
+            }
+            _ => 0,
+        }
+    }
+
     pub(super) fn between_op_to_sql(
         &self,
         expr: ast::Expr,
@@ -618,6 +713,48 @@ impl Unparser<'_> {
         }
     }
 
+    fn sql_op_precedence(&self, op: &BinaryOperator) -> u8 {
+        match self.sql_to_op(op) {
+            Ok(op) => op.precedence(),
+            Err(_) => 0,
+        }
+    }
+
+    fn sql_to_op(&self, op: &BinaryOperator) -> Result<Operator> {
+        match op {
+            ast::BinaryOperator::Eq => Ok(Operator::Eq),
+            ast::BinaryOperator::NotEq => Ok(Operator::NotEq),
+            ast::BinaryOperator::Lt => Ok(Operator::Lt),
+            ast::BinaryOperator::LtEq => Ok(Operator::LtEq),
+            ast::BinaryOperator::Gt => Ok(Operator::Gt),
+            ast::BinaryOperator::GtEq => Ok(Operator::GtEq),
+            ast::BinaryOperator::Plus => Ok(Operator::Plus),
+            ast::BinaryOperator::Minus => Ok(Operator::Minus),
+            ast::BinaryOperator::Multiply => Ok(Operator::Multiply),
+            ast::BinaryOperator::Divide => Ok(Operator::Divide),
+            ast::BinaryOperator::Modulo => Ok(Operator::Modulo),
+            ast::BinaryOperator::And => Ok(Operator::And),
+            ast::BinaryOperator::Or => Ok(Operator::Or),
+            ast::BinaryOperator::PGRegexMatch => Ok(Operator::RegexMatch),
+            ast::BinaryOperator::PGRegexIMatch => Ok(Operator::RegexIMatch),
+            ast::BinaryOperator::PGRegexNotMatch => Ok(Operator::RegexNotMatch),
+            ast::BinaryOperator::PGRegexNotIMatch => Ok(Operator::RegexNotIMatch),
+            ast::BinaryOperator::PGILikeMatch => Ok(Operator::ILikeMatch),
+            ast::BinaryOperator::PGNotLikeMatch => Ok(Operator::NotLikeMatch),
+            ast::BinaryOperator::PGLikeMatch => Ok(Operator::LikeMatch),
+            ast::BinaryOperator::PGNotILikeMatch => Ok(Operator::NotILikeMatch),
+            ast::BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd),
+            ast::BinaryOperator::BitwiseOr => Ok(Operator::BitwiseOr),
+            ast::BinaryOperator::BitwiseXor => Ok(Operator::BitwiseXor),
+            ast::BinaryOperator::PGBitwiseShiftRight => Ok(Operator::BitwiseShiftRight),
+            ast::BinaryOperator::PGBitwiseShiftLeft => Ok(Operator::BitwiseShiftLeft),
+            ast::BinaryOperator::StringConcat => Ok(Operator::StringConcat),
+            ast::BinaryOperator::AtArrow => Ok(Operator::AtArrow),
+            ast::BinaryOperator::ArrowAt => Ok(Operator::ArrowAt),
+            _ => not_impl_err!("unsupported operation: {op:?}"),
+        }
+    }
+
     fn op_to_sql(&self, op: &Operator) -> Result<ast::BinaryOperator> {
         match op {
             Operator::Eq => Ok(ast::BinaryOperator::Eq),
@@ -1538,6 +1675,7 @@ mod tests {
 
         Ok(())
     }
+
     #[test]
     fn custom_dialect() -> Result<()> {
         let dialect = CustomDialect::new(Some('\''));
diff --git a/datafusion/sql/src/unparser/mod.rs b/datafusion/sql/src/unparser/mod.rs
index fbbed4972b173..e5ffbc8a212ab 100644
--- a/datafusion/sql/src/unparser/mod.rs
+++ b/datafusion/sql/src/unparser/mod.rs
@@ -29,11 +29,23 @@ pub mod dialect;
 
 pub struct Unparser<'a> {
     dialect: &'a dyn Dialect,
+    pretty: bool,
 }
 
 impl<'a> Unparser<'a> {
     pub fn new(dialect: &'a dyn Dialect) -> Self {
-        Self { dialect }
+        Self {
+            dialect,
+            pretty: false,
+        }
+    }
+
+    /// Allow unparser to remove parenthesis according to the precedence rules of DataFusion.
+    /// This might make it invalid SQL for other SQL query engines with different precedence
+    /// rules, even if its valid for DataFusion.
+    pub fn with_pretty(mut self, pretty: bool) -> Self {
+        self.pretty = pretty;
+        self
     }
 }
 
@@ -41,6 +53,7 @@ impl<'a> Default for Unparser<'a> {
     fn default() -> Self {
         Self {
             dialect: &DefaultDialect {},
+            pretty: false,
         }
     }
 }
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 374403d853f92..91295b2e8aae9 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -104,26 +104,26 @@ fn roundtrip_statement() -> Result<()> {
             "select id, count(*) as cnt from (select p1.id as id from person p1 inner join person p2 on p1.id=p2.id) group by id",
             "select id, count(*), first_name from person group by first_name, id",
             "select id, sum(age), first_name from person group by first_name, id",
-            "select id, count(*), first_name 
-            from person 
+            "select id, count(*), first_name
+            from person
             where id!=3 and first_name=='test'
-            group by first_name, id 
+            group by first_name, id
             having count(*)>5 and count(*)<10
             order by count(*)",
-            r#"select id, count("First Name") as count_first_name, "Last Name" 
+            r#"select id, count("First Name") as count_first_name, "Last Name"
             from person_quoted_cols
             where id!=3 and "First Name"=='test'
-            group by "Last Name", id 
+            group by "Last Name", id
             having count_first_name>5 and count_first_name<10
             order by count_first_name, "Last Name""#,
             r#"select p.id, count("First Name") as count_first_name,
-            "Last Name", sum(qp.id/p.id - (select sum(id) from person_quoted_cols) ) / (select count(*) from person) 
+            "Last Name", sum(qp.id/p.id - (select sum(id) from person_quoted_cols) ) / (select count(*) from person)
             from (select id, "First Name", "Last Name" from person_quoted_cols) qp
             inner join (select * from person) p
             on p.id = qp.id
-            where p.id!=3 and "First Name"=='test' and qp.id in 
+            where p.id!=3 and "First Name"=='test' and qp.id in
             (select id from (select id, count(*) from person group by id having count(*) > 0))
-            group by "Last Name", p.id 
+            group by "Last Name", p.id
             having count_first_name>5 and count_first_name<10
             order by count_first_name, "Last Name""#,
             r#"SELECT j1_string as string FROM j1
@@ -134,12 +134,12 @@ fn roundtrip_statement() -> Result<()> {
             SELECT j2_string as string FROM j2
             ORDER BY string DESC
             LIMIT 10"#,
-            "SELECT id, count(*) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 
-            last_name, sum(id) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 
+            "SELECT id, count(*) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
+            last_name, sum(id) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
             first_name from person",
-            r#"SELECT id, count(distinct id) over (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 
+            r#"SELECT id, count(distinct id) over (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
             sum(id) OVER (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) from person"#,
-            "SELECT id, sum(id) OVER (PARTITION BY first_name ROWS BETWEEN 5 PRECEDING AND 2 FOLLOWING) from person",            
+            "SELECT id, sum(id) OVER (PARTITION BY first_name ROWS BETWEEN 5 PRECEDING AND 2 FOLLOWING) from person",
         ];
 
     // For each test sql string, we transform as follows:
@@ -314,3 +314,78 @@ fn test_table_references_in_plan_to_sql() {
         "SELECT \"table\".id, \"table\".\"value\" FROM \"table\"",
     );
 }
+
+#[test]
+fn test_pretty_roundtrip() -> Result<()> {
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Utf8, false),
+        Field::new("age", DataType::Utf8, false),
+    ]);
+
+    let df_schema = DFSchema::try_from(schema)?;
+
+    let context = MockContextProvider::default();
+    let sql_to_rel = SqlToRel::new(&context);
+
+    let unparser = Unparser::default().with_pretty(true);
+
+    let sql_to_pretty_unparse = vec![
+        ("((id < 5) OR (age = 8))", "id < 5 OR age = 8"),
+        ("((id + 5) * (age * 8))", "(id + 5) * age * 8"),
+        ("(3 + (5 * 6) * 3)", "3 + 5 * 6 * 3"),
+        ("((3 * (5 + 6)) * 3)", "3 * (5 + 6) * 3"),
+        ("((3 AND (5 OR 6)) * 3)", "(3 AND (5 OR 6)) * 3"),
+        ("((3 + (5 + 6)) * 3)", "(3 + 5 + 6) * 3"),
+        ("((3 + (5 + 6)) + 3)", "3 + 5 + 6 + 3"),
+        ("3 + 5 + 6 + 3", "3 + 5 + 6 + 3"),
+        ("3 + (5 + (6 + 3))", "3 + 5 + 6 + 3"),
+        ("3 + ((5 + 6) + 3)", "3 + 5 + 6 + 3"),
+        ("(3 + 5) + (6 + 3)", "3 + 5 + 6 + 3"),
+        ("((3 + 5) + (6 + 3))", "3 + 5 + 6 + 3"),
+        (
+            "((id > 10) OR (age BETWEEN 10 AND 20))",
+            "id > 10 OR age BETWEEN 10 AND 20",
+        ),
+        (
+            "((id > 10) * (age BETWEEN 10 AND 20))",
+            "(id > 10) * (age BETWEEN 10 AND 20)",
+        ),
+        ("id - (age - 8)", "id - (age - 8)"),
+        ("((id - age) - 8)", "id - age - 8"),
+        ("(id OR (age - 8))", "id OR age - 8"),
+        ("(id / (age - 8))", "id / (age - 8)"),
+        ("((id / age) * 8)", "id / age * 8"),
+        ("((age + 10) < 20) IS TRUE", "(age + 10 < 20) IS TRUE"),
+        (
+            "(20 > (age + 5)) IS NOT FALSE",
+            "(20 > age + 5) IS NOT FALSE",
+        ),
+        ("(true AND false) IS FALSE", "(true AND false) IS FALSE"),
+        ("true AND (false IS FALSE)", "true AND false IS FALSE"),
+    ];
+
+    for (sql, pretty) in sql_to_pretty_unparse.iter() {
+        let sql_expr = Parser::new(&GenericDialect {})
+            .try_with_sql(sql)?
+            .parse_expr()?;
+        let expr =
+            sql_to_rel.sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new())?;
+        let round_trip_sql = unparser.expr_to_sql(&expr)?.to_string();
+        assert_eq!(pretty.to_string(), round_trip_sql);
+
+        // verify that the pretty string parses to the same underlying Expr
+        let pretty_sql_expr = Parser::new(&GenericDialect {})
+            .try_with_sql(pretty)?
+            .parse_expr()?;
+
+        let pretty_expr = sql_to_rel.sql_to_expr(
+            pretty_sql_expr,
+            &df_schema,
+            &mut PlannerContext::new(),
+        )?;
+
+        assert_eq!(expr.to_string(), pretty_expr.to_string());
+    }
+
+    Ok(())
+}

From e19dd2d0b91f30b97fd68da894137987c1318b18 Mon Sep 17 00:00:00 2001
From: Chunchun Ye <14298407+appletreeisyellow@users.noreply.github.com>
Date: Thu, 11 Jul 2024 11:21:51 -0500
Subject: [PATCH 028/357] Add `to_local_time()` in function reference docs
 (#11401)

* chore: add document for `to_local_time()`

* chore: feedback

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../source/user-guide/sql/scalar_functions.md | 65 ++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index d636726b45fe1..d2e012cf4093d 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1480,6 +1480,7 @@ contains(string, search_string)
 - [make_date](#make_date)
 - [to_char](#to_char)
 - [to_date](#to_date)
+- [to_local_time](#to_local_time)
 - [to_timestamp](#to_timestamp)
 - [to_timestamp_millis](#to_timestamp_millis)
 - [to_timestamp_micros](#to_timestamp_micros)
@@ -1710,7 +1711,7 @@ to_char(expression, format)
 #### Example
 
 ```
-> > select to_char('2023-03-01'::date, '%d-%m-%Y');
+> select to_char('2023-03-01'::date, '%d-%m-%Y');
 +----------------------------------------------+
 | to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) |
 +----------------------------------------------+
@@ -1771,6 +1772,68 @@ to_date(expression[, ..., format_n])
 
 Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)
 
+### `to_local_time`
+
+Converts a timestamp with a timezone to a timestamp without a timezone (with no offset or
+timezone information). This function handles daylight saving time changes.
+
+```
+to_local_time(expression)
+```
+
+#### Arguments
+
+- **expression**: Time expression to operate on. Can be a constant, column, or function.
+
+#### Example
+
+```
+> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp);
++---------------------------------------------+
+| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
++---------------------------------------------+
+| 2024-04-01T00:00:20                         |
++---------------------------------------------+
+
+> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels');
++---------------------------------------------+
+| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
++---------------------------------------------+
+| 2024-04-01T00:00:20                         |
++---------------------------------------------+
+
+> SELECT
+  time,
+  arrow_typeof(time) as type,
+  to_local_time(time) as to_local_time,
+  arrow_typeof(to_local_time(time)) as to_local_time_type
+FROM (
+  SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time
+);
++---------------------------+------------------------------------------------+---------------------+-----------------------------+
+| time                      | type                                           | to_local_time       | to_local_time_type          |
++---------------------------+------------------------------------------------+---------------------+-----------------------------+
+| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) |
++---------------------------+------------------------------------------------+---------------------+-----------------------------+
+
+# combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather
+# than UTC boundaries
+
+> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AS date_bin;
++---------------------+
+| date_bin            |
++---------------------+
+| 2024-04-01T00:00:00 |
++---------------------+
+
+> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels' AS date_bin_with_timezone;
++---------------------------+
+| date_bin_with_timezone    |
++---------------------------+
+| 2024-04-01T00:00:00+02:00 |
++---------------------------+
+```
+
 ### `to_timestamp`
 
 Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`).

From 4402a1a9dd8ebec1640b2fa807781a2701407672 Mon Sep 17 00:00:00 2001
From: Dharan Aditya <dharan.aditya@gmail.com>
Date: Thu, 11 Jul 2024 21:52:06 +0530
Subject: [PATCH 029/357] Move `overlay` planning to`ExprPlanner` (#11398)

* move overlay to expr planner

* typo
---
 datafusion/expr/src/planner.rs           |  7 ++++++
 datafusion/functions/src/core/planner.rs |  6 +++++
 datafusion/functions/src/string/mod.rs   |  1 -
 datafusion/sql/src/expr/mod.rs           | 28 ++++++++++++------------
 4 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs
index aeb8ed8372b76..2f13923b1f10a 100644
--- a/datafusion/expr/src/planner.rs
+++ b/datafusion/expr/src/planner.rs
@@ -161,6 +161,13 @@ pub trait ExprPlanner: Send + Sync {
     ) -> Result<PlannerResult<Vec<Expr>>> {
         Ok(PlannerResult::Original(args))
     }
+
+    /// Plans an overlay expression eg `overlay(str PLACING substr FROM pos [FOR count])`
+    ///
+    /// Returns origin expression arguments if not possible
+    fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
+        Ok(PlannerResult::Original(args))
+    }
 }
 
 /// An operator with two arguments to plan
diff --git a/datafusion/functions/src/core/planner.rs b/datafusion/functions/src/core/planner.rs
index 748b598d292fe..63eaa9874c2b9 100644
--- a/datafusion/functions/src/core/planner.rs
+++ b/datafusion/functions/src/core/planner.rs
@@ -56,4 +56,10 @@ impl ExprPlanner for CoreFunctionPlanner {
             ),
         )))
     }
+
+    fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
+        Ok(PlannerResult::Planned(Expr::ScalarFunction(
+            ScalarFunction::new_udf(crate::string::overlay(), args),
+        )))
+    }
 }
diff --git a/datafusion/functions/src/string/mod.rs b/datafusion/functions/src/string/mod.rs
index 5bf372c29f2d5..9a19151a85e26 100644
--- a/datafusion/functions/src/string/mod.rs
+++ b/datafusion/functions/src/string/mod.rs
@@ -182,7 +182,6 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         lower(),
         ltrim(),
         octet_length(),
-        overlay(),
         repeat(),
         replace(),
         rtrim(),
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 859842e212be7..062ef805fd9f8 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -193,7 +193,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     }
                 }
 
-                not_impl_err!("Extract not supported by UserDefinedExtensionPlanners: {extract_args:?}")
+                not_impl_err!("Extract not supported by ExprPlanner: {extract_args:?}")
             }
 
             SQLExpr::Array(arr) => self.sql_array_literal(arr.elem, schema),
@@ -292,7 +292,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     }
                 }
 
-                not_impl_err!("GetFieldAccess not supported by UserDefinedExtensionPlanners: {field_access_expr:?}")
+                not_impl_err!(
+                    "GetFieldAccess not supported by ExprPlanner: {field_access_expr:?}"
+                )
             }
 
             SQLExpr::CompoundIdentifier(ids) => {
@@ -657,7 +659,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 PlannerResult::Original(args) => create_struct_args = args,
             }
         }
-        not_impl_err!("Struct not supported by UserDefinedExtensionPlanners: {create_struct_args:?}")
+        not_impl_err!("Struct not supported by ExprPlanner: {create_struct_args:?}")
     }
 
     fn sql_position_to_expr(
@@ -680,9 +682,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
         }
 
-        not_impl_err!(
-            "Position not supported by UserDefinedExtensionPlanners: {position_args:?}"
-        )
+        not_impl_err!("Position not supported by ExprPlanner: {position_args:?}")
     }
 
     fn try_plan_dictionary_literal(
@@ -914,18 +914,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
     ) -> Result<Expr> {
-        let fun = self
-            .context_provider
-            .get_function_meta("overlay")
-            .ok_or_else(|| {
-                internal_datafusion_err!("Unable to find expected 'overlay' function")
-            })?;
         let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
         let what_arg =
             self.sql_expr_to_logical_expr(overlay_what, schema, planner_context)?;
         let from_arg =
             self.sql_expr_to_logical_expr(overlay_from, schema, planner_context)?;
-        let args = match overlay_for {
+        let mut overlay_args = match overlay_for {
             Some(for_expr) => {
                 let for_expr =
                     self.sql_expr_to_logical_expr(*for_expr, schema, planner_context)?;
@@ -933,7 +927,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
             None => vec![arg, what_arg, from_arg],
         };
-        Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, args)))
+        for planner in self.planners.iter() {
+            match planner.plan_overlay(overlay_args)? {
+                PlannerResult::Planned(expr) => return Ok(expr),
+                PlannerResult::Original(args) => overlay_args = args,
+            }
+        }
+        not_impl_err!("Overlay not supported by ExprPlanner: {overlay_args:?}")
     }
 }
 

From d314ced8090cb599fd7808d7df41699e46ac956e Mon Sep 17 00:00:00 2001
From: Marko Grujic <markoog@gmail.com>
Date: Thu, 11 Jul 2024 18:22:20 +0200
Subject: [PATCH 030/357] Coerce types for all union children plans when
 eliminating nesting (#11386)

---
 .../optimizer/src/eliminate_nested_union.rs       | 13 +++++++------
 datafusion/sqllogictest/test_files/union.slt      | 15 +++++++++++++++
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs
index c8ae937e128a6..cc8cf1f56c184 100644
--- a/datafusion/optimizer/src/eliminate_nested_union.rs
+++ b/datafusion/optimizer/src/eliminate_nested_union.rs
@@ -60,7 +60,8 @@ impl OptimizerRule for EliminateNestedUnion {
                 let inputs = inputs
                     .into_iter()
                     .flat_map(extract_plans_from_union)
-                    .collect::<Vec<_>>();
+                    .map(|plan| coerce_plan_expr_for_schema(&plan, &schema))
+                    .collect::<Result<Vec<_>>>()?;
 
                 Ok(Transformed::yes(LogicalPlan::Union(Union {
                     inputs: inputs.into_iter().map(Arc::new).collect_vec(),
@@ -74,7 +75,8 @@ impl OptimizerRule for EliminateNestedUnion {
                             .into_iter()
                             .map(extract_plan_from_distinct)
                             .flat_map(extract_plans_from_union)
-                            .collect::<Vec<_>>();
+                            .map(|plan| coerce_plan_expr_for_schema(&plan, &schema))
+                            .collect::<Result<Vec<_>>>()?;
 
                         Ok(Transformed::yes(LogicalPlan::Distinct(Distinct::All(
                             Arc::new(LogicalPlan::Union(Union {
@@ -95,10 +97,9 @@ impl OptimizerRule for EliminateNestedUnion {
 
 fn extract_plans_from_union(plan: Arc<LogicalPlan>) -> Vec<LogicalPlan> {
     match unwrap_arc(plan) {
-        LogicalPlan::Union(Union { inputs, schema }) => inputs
-            .into_iter()
-            .map(|plan| coerce_plan_expr_for_schema(&plan, &schema).unwrap())
-            .collect::<Vec<_>>(),
+        LogicalPlan::Union(Union { inputs, .. }) => {
+            inputs.into_iter().map(unwrap_arc).collect::<Vec<_>>()
+        }
         plan => vec![plan],
     }
 }
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 7b91e97e4a3e2..5ede68a42aae6 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -135,6 +135,21 @@ SELECT SUM(d) FROM (
 ----
 5
 
+# three way union with aggregate and type coercion
+query II rowsort
+SELECT c1, SUM(c2) FROM (
+    SELECT 1 as c1, 1::int as c2
+    UNION
+    SELECT 2 as c1, 2::int as c2
+    UNION
+    SELECT 3 as c1, COALESCE(3::int, 0) as c2
+) as a
+GROUP BY c1
+----
+1 1
+2 2
+3 3
+
 # union_all_with_count
 statement ok
 CREATE table t as SELECT 1 as a

From 4bed04e4e312a0b125306944aee94a93c2ff6c4f Mon Sep 17 00:00:00 2001
From: Georgi Krastev <georgi.krastev@coralogix.com>
Date: Thu, 11 Jul 2024 19:26:46 +0300
Subject: [PATCH 031/357] Add customizable equality and hash functions to UDFs
 (#11392)

* Add customizable equality and hash functions to UDFs

* Improve equals and hash_value documentation

* Add tests for parameterized UDFs
---
 .../user_defined/user_defined_aggregates.rs   |  79 ++++++++++-
 .../user_defined_scalar_functions.rs          | 128 +++++++++++++++++-
 datafusion/expr/src/udaf.rs                   |  73 ++++++++--
 datafusion/expr/src/udf.rs                    |  62 +++++++--
 datafusion/expr/src/udwf.rs                   |  69 ++++++++--
 5 files changed, 367 insertions(+), 44 deletions(-)

diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index d591c662d8774..96de865b6554a 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -18,14 +18,19 @@
 //! This module contains end to end demonstrations of creating
 //! user defined aggregate functions
 
-use arrow::{array::AsArray, datatypes::Fields};
-use arrow_array::{types::UInt64Type, Int32Array, PrimitiveArray, StructArray};
-use arrow_schema::Schema;
+use std::hash::{DefaultHasher, Hash, Hasher};
 use std::sync::{
     atomic::{AtomicBool, Ordering},
     Arc,
 };
 
+use arrow::{array::AsArray, datatypes::Fields};
+use arrow_array::{
+    types::UInt64Type, Int32Array, PrimitiveArray, StringArray, StructArray,
+};
+use arrow_schema::Schema;
+
+use datafusion::dataframe::DataFrame;
 use datafusion::datasource::MemTable;
 use datafusion::test_util::plan_and_collect;
 use datafusion::{
@@ -45,8 +50,8 @@ use datafusion::{
 };
 use datafusion_common::{assert_contains, cast::as_primitive_array, exec_err};
 use datafusion_expr::{
-    create_udaf, function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator,
-    SimpleAggregateUDF,
+    col, create_udaf, function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator,
+    LogicalPlanBuilder, SimpleAggregateUDF,
 };
 use datafusion_functions_aggregate::average::AvgAccumulator;
 
@@ -377,6 +382,55 @@ async fn test_groups_accumulator() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_parameterized_aggregate_udf() -> Result<()> {
+    let batch = RecordBatch::try_from_iter([(
+        "text",
+        Arc::new(StringArray::from(vec!["foo"])) as ArrayRef,
+    )])?;
+
+    let ctx = SessionContext::new();
+    ctx.register_batch("t", batch)?;
+    let t = ctx.table("t").await?;
+    let signature = Signature::exact(vec![DataType::Utf8], Volatility::Immutable);
+    let udf1 = AggregateUDF::from(TestGroupsAccumulator {
+        signature: signature.clone(),
+        result: 1,
+    });
+    let udf2 = AggregateUDF::from(TestGroupsAccumulator {
+        signature: signature.clone(),
+        result: 2,
+    });
+
+    let plan = LogicalPlanBuilder::from(t.into_optimized_plan()?)
+        .aggregate(
+            [col("text")],
+            [
+                udf1.call(vec![col("text")]).alias("a"),
+                udf2.call(vec![col("text")]).alias("b"),
+            ],
+        )?
+        .build()?;
+
+    assert_eq!(
+        format!("{plan:?}"),
+        "Aggregate: groupBy=[[t.text]], aggr=[[geo_mean(t.text) AS a, geo_mean(t.text) AS b]]\n  TableScan: t projection=[text]"
+    );
+
+    let actual = DataFrame::new(ctx.state(), plan).collect().await?;
+    let expected = [
+        "+------+---+---+",
+        "| text | a | b |",
+        "+------+---+---+",
+        "| foo  | 1 | 2 |",
+        "+------+---+---+",
+    ];
+    assert_batches_eq!(expected, &actual);
+
+    ctx.deregister_table("t")?;
+    Ok(())
+}
+
 /// Returns an context with a table "t" and the "first" and "time_sum"
 /// aggregate functions registered.
 ///
@@ -735,6 +789,21 @@ impl AggregateUDFImpl for TestGroupsAccumulator {
     ) -> Result<Box<dyn GroupsAccumulator>> {
         Ok(Box::new(self.clone()))
     }
+
+    fn equals(&self, other: &dyn AggregateUDFImpl) -> bool {
+        if let Some(other) = other.as_any().downcast_ref::<TestGroupsAccumulator>() {
+            self.result == other.result && self.signature == other.signature
+        } else {
+            false
+        }
+    }
+
+    fn hash_value(&self) -> u64 {
+        let hasher = &mut DefaultHasher::new();
+        self.signature.hash(hasher);
+        self.result.hash(hasher);
+        hasher.finish()
+    }
 }
 
 impl Accumulator for TestGroupsAccumulator {
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index 1733068debb96..5847952ae6a61 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -16,11 +16,20 @@
 // under the License.
 
 use std::any::Any;
+use std::hash::{DefaultHasher, Hash, Hasher};
 use std::sync::Arc;
 
 use arrow::compute::kernels::numeric::add;
-use arrow_array::{ArrayRef, Float32Array, Float64Array, Int32Array, RecordBatch};
+use arrow_array::builder::BooleanBuilder;
+use arrow_array::cast::AsArray;
+use arrow_array::{
+    Array, ArrayRef, Float32Array, Float64Array, Int32Array, RecordBatch, StringArray,
+};
 use arrow_schema::{DataType, Field, Schema};
+use parking_lot::Mutex;
+use regex::Regex;
+use sqlparser::ast::Ident;
+
 use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionState};
 use datafusion::prelude::*;
 use datafusion::{execution::registry::FunctionRegistry, test_util};
@@ -37,8 +46,6 @@ use datafusion_expr::{
     Volatility,
 };
 use datafusion_functions_array::range::range_udf;
-use parking_lot::Mutex;
-use sqlparser::ast::Ident;
 
 /// test that casting happens on udfs.
 /// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and
@@ -1021,6 +1028,121 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<(
     Ok(())
 }
 
+#[derive(Debug)]
+struct MyRegexUdf {
+    signature: Signature,
+    regex: Regex,
+}
+
+impl MyRegexUdf {
+    fn new(pattern: &str) -> Self {
+        Self {
+            signature: Signature::exact(vec![DataType::Utf8], Volatility::Immutable),
+            regex: Regex::new(pattern).expect("regex"),
+        }
+    }
+
+    fn matches(&self, value: Option<&str>) -> Option<bool> {
+        Some(self.regex.is_match(value?))
+    }
+}
+
+impl ScalarUDFImpl for MyRegexUdf {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "regex_udf"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, args: &[DataType]) -> Result<DataType> {
+        if matches!(args, [DataType::Utf8]) {
+            Ok(DataType::Boolean)
+        } else {
+            plan_err!("regex_udf only accepts a Utf8 argument")
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        match args {
+            [ColumnarValue::Scalar(ScalarValue::Utf8(value))] => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Boolean(
+                    self.matches(value.as_deref()),
+                )))
+            }
+            [ColumnarValue::Array(values)] => {
+                let mut builder = BooleanBuilder::with_capacity(values.len());
+                for value in values.as_string::<i32>() {
+                    builder.append_option(self.matches(value))
+                }
+                Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+            }
+            _ => exec_err!("regex_udf only accepts a Utf8 arguments"),
+        }
+    }
+
+    fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
+        if let Some(other) = other.as_any().downcast_ref::<MyRegexUdf>() {
+            self.regex.as_str() == other.regex.as_str()
+        } else {
+            false
+        }
+    }
+
+    fn hash_value(&self) -> u64 {
+        let hasher = &mut DefaultHasher::new();
+        self.regex.as_str().hash(hasher);
+        hasher.finish()
+    }
+}
+
+#[tokio::test]
+async fn test_parameterized_scalar_udf() -> Result<()> {
+    let batch = RecordBatch::try_from_iter([(
+        "text",
+        Arc::new(StringArray::from(vec!["foo", "bar", "foobar", "barfoo"])) as ArrayRef,
+    )])?;
+
+    let ctx = SessionContext::new();
+    ctx.register_batch("t", batch)?;
+    let t = ctx.table("t").await?;
+    let foo_udf = ScalarUDF::from(MyRegexUdf::new("fo{2}"));
+    let bar_udf = ScalarUDF::from(MyRegexUdf::new("[Bb]ar"));
+
+    let plan = LogicalPlanBuilder::from(t.into_optimized_plan()?)
+        .filter(
+            foo_udf
+                .call(vec![col("text")])
+                .and(bar_udf.call(vec![col("text")])),
+        )?
+        .filter(col("text").is_not_null())?
+        .build()?;
+
+    assert_eq!(
+        format!("{plan:?}"),
+        "Filter: t.text IS NOT NULL\n  Filter: regex_udf(t.text) AND regex_udf(t.text)\n    TableScan: t projection=[text]"
+    );
+
+    let actual = DataFrame::new(ctx.state(), plan).collect().await?;
+    let expected = [
+        "+--------+",
+        "| text   |",
+        "+--------+",
+        "| foobar |",
+        "| barfoo |",
+        "+--------+",
+    ];
+    assert_batches_eq!(expected, &actual);
+
+    ctx.deregister_table("t")?;
+    Ok(())
+}
+
 fn create_udf_context() -> SessionContext {
     let ctx = SessionContext::new();
     // register a custom UDF
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index 7a054abea75b3..1657e034fbe2b 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -17,6 +17,17 @@
 
 //! [`AggregateUDF`]: User Defined Aggregate Functions
 
+use std::any::Any;
+use std::fmt::{self, Debug, Formatter};
+use std::hash::{DefaultHasher, Hash, Hasher};
+use std::sync::Arc;
+use std::vec;
+
+use arrow::datatypes::{DataType, Field};
+use sqlparser::ast::NullTreatment;
+
+use datafusion_common::{exec_err, not_impl_err, plan_err, Result};
+
 use crate::expr::AggregateFunction;
 use crate::function::{
     AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs,
@@ -26,13 +37,6 @@ use crate::utils::format_state_name;
 use crate::utils::AggregateOrderSensitivity;
 use crate::{Accumulator, Expr};
 use crate::{AccumulatorFactoryFunction, ReturnTypeFunction, Signature};
-use arrow::datatypes::{DataType, Field};
-use datafusion_common::{exec_err, not_impl_err, plan_err, Result};
-use sqlparser::ast::NullTreatment;
-use std::any::Any;
-use std::fmt::{self, Debug, Formatter};
-use std::sync::Arc;
-use std::vec;
 
 /// Logical representation of a user-defined [aggregate function] (UDAF).
 ///
@@ -72,20 +76,19 @@ pub struct AggregateUDF {
 
 impl PartialEq for AggregateUDF {
     fn eq(&self, other: &Self) -> bool {
-        self.name() == other.name() && self.signature() == other.signature()
+        self.inner.equals(other.inner.as_ref())
     }
 }
 
 impl Eq for AggregateUDF {}
 
-impl std::hash::Hash for AggregateUDF {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.name().hash(state);
-        self.signature().hash(state);
+impl Hash for AggregateUDF {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.inner.hash_value().hash(state)
     }
 }
 
-impl std::fmt::Display for AggregateUDF {
+impl fmt::Display for AggregateUDF {
     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
         write!(f, "{}", self.name())
     }
@@ -280,7 +283,7 @@ where
 /// #[derive(Debug, Clone)]
 /// struct GeoMeanUdf {
 ///   signature: Signature
-/// };
+/// }
 ///
 /// impl GeoMeanUdf {
 ///   fn new() -> Self {
@@ -507,6 +510,33 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
     fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
         not_impl_err!("Function {} does not implement coerce_types", self.name())
     }
+
+    /// Return true if this aggregate UDF is equal to the other.
+    ///
+    /// Allows customizing the equality of aggregate UDFs.
+    /// Must be consistent with [`Self::hash_value`] and follow the same rules as [`Eq`]:
+    ///
+    /// - reflexive: `a.equals(a)`;
+    /// - symmetric: `a.equals(b)` implies `b.equals(a)`;
+    /// - transitive: `a.equals(b)` and `b.equals(c)` implies `a.equals(c)`.
+    ///
+    /// By default, compares [`Self::name`] and [`Self::signature`].
+    fn equals(&self, other: &dyn AggregateUDFImpl) -> bool {
+        self.name() == other.name() && self.signature() == other.signature()
+    }
+
+    /// Returns a hash value for this aggregate UDF.
+    ///
+    /// Allows customizing the hash code of aggregate UDFs. Similarly to [`Hash`] and [`Eq`],
+    /// if [`Self::equals`] returns true for two UDFs, their `hash_value`s must be the same.
+    ///
+    /// By default, hashes [`Self::name`] and [`Self::signature`].
+    fn hash_value(&self) -> u64 {
+        let hasher = &mut DefaultHasher::new();
+        self.name().hash(hasher);
+        self.signature().hash(hasher);
+        hasher.finish()
+    }
 }
 
 pub enum ReversedUDAF {
@@ -562,6 +592,21 @@ impl AggregateUDFImpl for AliasedAggregateUDFImpl {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn equals(&self, other: &dyn AggregateUDFImpl) -> bool {
+        if let Some(other) = other.as_any().downcast_ref::<AliasedAggregateUDFImpl>() {
+            self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases
+        } else {
+            false
+        }
+    }
+
+    fn hash_value(&self) -> u64 {
+        let hasher = &mut DefaultHasher::new();
+        self.inner.hash_value().hash(hasher);
+        self.aliases.hash(hasher);
+        hasher.finish()
+    }
 }
 
 /// Implementation of [`AggregateUDFImpl`] that wraps the function style pointers
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 68d3af6ace3c0..1fbb3cc584b34 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -19,8 +19,13 @@
 
 use std::any::Any;
 use std::fmt::{self, Debug, Formatter};
+use std::hash::{DefaultHasher, Hash, Hasher};
 use std::sync::Arc;
 
+use arrow::datatypes::DataType;
+
+use datafusion_common::{not_impl_err, ExprSchema, Result};
+
 use crate::expr::create_name;
 use crate::interval_arithmetic::Interval;
 use crate::simplify::{ExprSimplifyResult, SimplifyInfo};
@@ -29,9 +34,6 @@ use crate::{
     ColumnarValue, Expr, ReturnTypeFunction, ScalarFunctionImplementation, Signature,
 };
 
-use arrow::datatypes::DataType;
-use datafusion_common::{not_impl_err, ExprSchema, Result};
-
 /// Logical representation of a Scalar User Defined Function.
 ///
 /// A scalar function produces a single row output for each row of input. This
@@ -59,16 +61,15 @@ pub struct ScalarUDF {
 
 impl PartialEq for ScalarUDF {
     fn eq(&self, other: &Self) -> bool {
-        self.name() == other.name() && self.signature() == other.signature()
+        self.inner.equals(other.inner.as_ref())
     }
 }
 
 impl Eq for ScalarUDF {}
 
-impl std::hash::Hash for ScalarUDF {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.name().hash(state);
-        self.signature().hash(state);
+impl Hash for ScalarUDF {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.inner.hash_value().hash(state)
     }
 }
 
@@ -294,7 +295,7 @@ where
 /// #[derive(Debug)]
 /// struct AddOne {
 ///   signature: Signature
-/// };
+/// }
 ///
 /// impl AddOne {
 ///   fn new() -> Self {
@@ -540,6 +541,33 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
     fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
         not_impl_err!("Function {} does not implement coerce_types", self.name())
     }
+
+    /// Return true if this scalar UDF is equal to the other.
+    ///
+    /// Allows customizing the equality of scalar UDFs.
+    /// Must be consistent with [`Self::hash_value`] and follow the same rules as [`Eq`]:
+    ///
+    /// - reflexive: `a.equals(a)`;
+    /// - symmetric: `a.equals(b)` implies `b.equals(a)`;
+    /// - transitive: `a.equals(b)` and `b.equals(c)` implies `a.equals(c)`.
+    ///
+    /// By default, compares [`Self::name`] and [`Self::signature`].
+    fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
+        self.name() == other.name() && self.signature() == other.signature()
+    }
+
+    /// Returns a hash value for this scalar UDF.
+    ///
+    /// Allows customizing the hash code of scalar UDFs. Similarly to [`Hash`] and [`Eq`],
+    /// if [`Self::equals`] returns true for two UDFs, their `hash_value`s must be the same.
+    ///
+    /// By default, hashes [`Self::name`] and [`Self::signature`].
+    fn hash_value(&self) -> u64 {
+        let hasher = &mut DefaultHasher::new();
+        self.name().hash(hasher);
+        self.signature().hash(hasher);
+        hasher.finish()
+    }
 }
 
 /// ScalarUDF that adds an alias to the underlying function. It is better to
@@ -557,7 +585,6 @@ impl AliasedScalarUDFImpl {
     ) -> Self {
         let mut aliases = inner.aliases().to_vec();
         aliases.extend(new_aliases.into_iter().map(|s| s.to_string()));
-
         Self { inner, aliases }
     }
 }
@@ -586,6 +613,21 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
+        if let Some(other) = other.as_any().downcast_ref::<AliasedScalarUDFImpl>() {
+            self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases
+        } else {
+            false
+        }
+    }
+
+    fn hash_value(&self) -> u64 {
+        let hasher = &mut DefaultHasher::new();
+        self.inner.hash_value().hash(hasher);
+        self.aliases.hash(hasher);
+        hasher.finish()
+    }
 }
 
 /// Implementation of [`ScalarUDFImpl`] that wraps the function style pointers
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 70b44e5e307a4..1a6b21e3dd294 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -17,18 +17,22 @@
 
 //! [`WindowUDF`]: User Defined Window Functions
 
-use crate::{
-    function::WindowFunctionSimplification, Expr, PartitionEvaluator,
-    PartitionEvaluatorFactory, ReturnTypeFunction, Signature, WindowFrame,
-};
-use arrow::datatypes::DataType;
-use datafusion_common::Result;
+use std::hash::{DefaultHasher, Hash, Hasher};
 use std::{
     any::Any,
     fmt::{self, Debug, Display, Formatter},
     sync::Arc,
 };
 
+use arrow::datatypes::DataType;
+
+use datafusion_common::Result;
+
+use crate::{
+    function::WindowFunctionSimplification, Expr, PartitionEvaluator,
+    PartitionEvaluatorFactory, ReturnTypeFunction, Signature, WindowFrame,
+};
+
 /// Logical representation of a user-defined window function (UDWF)
 /// A UDWF is different from a UDF in that it is stateful across batches.
 ///
@@ -62,16 +66,15 @@ impl Display for WindowUDF {
 
 impl PartialEq for WindowUDF {
     fn eq(&self, other: &Self) -> bool {
-        self.name() == other.name() && self.signature() == other.signature()
+        self.inner.equals(other.inner.as_ref())
     }
 }
 
 impl Eq for WindowUDF {}
 
-impl std::hash::Hash for WindowUDF {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.name().hash(state);
-        self.signature().hash(state);
+impl Hash for WindowUDF {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.inner.hash_value().hash(state)
     }
 }
 
@@ -212,7 +215,7 @@ where
 /// #[derive(Debug, Clone)]
 /// struct SmoothIt {
 ///   signature: Signature
-/// };
+/// }
 ///
 /// impl SmoothIt {
 ///   fn new() -> Self {
@@ -296,6 +299,33 @@ pub trait WindowUDFImpl: Debug + Send + Sync {
     fn simplify(&self) -> Option<WindowFunctionSimplification> {
         None
     }
+
+    /// Return true if this window UDF is equal to the other.
+    ///
+    /// Allows customizing the equality of window UDFs.
+    /// Must be consistent with [`Self::hash_value`] and follow the same rules as [`Eq`]:
+    ///
+    /// - reflexive: `a.equals(a)`;
+    /// - symmetric: `a.equals(b)` implies `b.equals(a)`;
+    /// - transitive: `a.equals(b)` and `b.equals(c)` implies `a.equals(c)`.
+    ///
+    /// By default, compares [`Self::name`] and [`Self::signature`].
+    fn equals(&self, other: &dyn WindowUDFImpl) -> bool {
+        self.name() == other.name() && self.signature() == other.signature()
+    }
+
+    /// Returns a hash value for this window UDF.
+    ///
+    /// Allows customizing the hash code of window UDFs. Similarly to [`Hash`] and [`Eq`],
+    /// if [`Self::equals`] returns true for two UDFs, their `hash_value`s must be the same.
+    ///
+    /// By default, hashes [`Self::name`] and [`Self::signature`].
+    fn hash_value(&self) -> u64 {
+        let hasher = &mut DefaultHasher::new();
+        self.name().hash(hasher);
+        self.signature().hash(hasher);
+        hasher.finish()
+    }
 }
 
 /// WindowUDF that adds an alias to the underlying function. It is better to
@@ -342,6 +372,21 @@ impl WindowUDFImpl for AliasedWindowUDFImpl {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn equals(&self, other: &dyn WindowUDFImpl) -> bool {
+        if let Some(other) = other.as_any().downcast_ref::<AliasedWindowUDFImpl>() {
+            self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases
+        } else {
+            false
+        }
+    }
+
+    fn hash_value(&self) -> u64 {
+        let hasher = &mut DefaultHasher::new();
+        self.inner.hash_value().hash(hasher);
+        self.aliases.hash(hasher);
+        hasher.finish()
+    }
 }
 
 /// Implementation of [`WindowUDFImpl`] that wraps the function style pointers

From 5ba634aa4f6d3d4ed5eefbc15dba5448f4f30923 Mon Sep 17 00:00:00 2001
From: Jax Liu <liugs963@gmail.com>
Date: Fri, 12 Jul 2024 14:43:49 +0800
Subject: [PATCH 032/357] Implement ScalarFunction `MAKE_MAP` and `MAP`
 (#11361)

* tmp

* opt

* modify test

* add another version

* implement make_map function

* implement make_map function

* implement map function

* format and modify the doc

* add benchmark for map function

* add empty end-line

* fix cargo check

* update lock

* upate lock

* fix clippy

* fmt and clippy

* support FixedSizeList and LargeList

* check type and handle null array in coerce_types

* make array value throw todo error

* fix clippy

* simpify the error tests
---
 datafusion-cli/Cargo.lock                  |   1 +
 datafusion/functions/Cargo.toml            |   7 +-
 datafusion/functions/benches/map.rs        | 101 +++++++
 datafusion/functions/src/core/map.rs       | 312 +++++++++++++++++++++
 datafusion/functions/src/core/mod.rs       |  13 +
 datafusion/sqllogictest/test_files/map.slt | 112 ++++++++
 6 files changed, 545 insertions(+), 1 deletion(-)
 create mode 100644 datafusion/functions/benches/map.rs
 create mode 100644 datafusion/functions/src/core/map.rs

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 8af42cb43932e..7da9cc427c37d 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1278,6 +1278,7 @@ name = "datafusion-functions"
 version = "40.0.0"
 dependencies = [
  "arrow",
+ "arrow-buffer",
  "base64 0.22.1",
  "blake2",
  "blake3",
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 884a66724c91e..b143080b19626 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -66,6 +66,7 @@ path = "src/lib.rs"
 
 [dependencies]
 arrow = { workspace = true }
+arrow-buffer = { workspace = true }
 base64 = { version = "0.22", optional = true }
 blake2 = { version = "^0.10.2", optional = true }
 blake3 = { version = "1.0", optional = true }
@@ -86,7 +87,6 @@ uuid = { version = "1.7", features = ["v4"], optional = true }
 
 [dev-dependencies]
 arrow = { workspace = true, features = ["test_utils"] }
-arrow-buffer = { workspace = true }
 criterion = "0.5"
 rand = { workspace = true }
 rstest = { workspace = true }
@@ -141,3 +141,8 @@ required-features = ["string_expressions"]
 harness = false
 name = "upper"
 required-features = ["string_expressions"]
+
+[[bench]]
+harness = false
+name = "map"
+required-features = ["core_expressions"]
diff --git a/datafusion/functions/benches/map.rs b/datafusion/functions/benches/map.rs
new file mode 100644
index 0000000000000..cd863d0e33114
--- /dev/null
+++ b/datafusion/functions/benches/map.rs
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::{Int32Array, ListArray, StringArray};
+use arrow::datatypes::{DataType, Field};
+use arrow_buffer::{OffsetBuffer, ScalarBuffer};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_common::ScalarValue;
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::core::{make_map, map};
+use rand::prelude::ThreadRng;
+use rand::Rng;
+use std::sync::Arc;
+
+fn keys(rng: &mut ThreadRng) -> Vec<String> {
+    let mut keys = vec![];
+    for _ in 0..1000 {
+        keys.push(rng.gen_range(0..9999).to_string());
+    }
+    keys
+}
+
+fn values(rng: &mut ThreadRng) -> Vec<i32> {
+    let mut values = vec![];
+    for _ in 0..1000 {
+        values.push(rng.gen_range(0..9999));
+    }
+    values
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    c.bench_function("make_map_1000", |b| {
+        let mut rng = rand::thread_rng();
+        let keys = keys(&mut rng);
+        let values = values(&mut rng);
+        let mut buffer = Vec::new();
+        for i in 0..1000 {
+            buffer.push(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                keys[i].clone(),
+            ))));
+            buffer.push(ColumnarValue::Scalar(ScalarValue::Int32(Some(values[i]))));
+        }
+
+        b.iter(|| {
+            black_box(
+                make_map()
+                    .invoke(&buffer)
+                    .expect("map should work on valid values"),
+            );
+        });
+    });
+
+    c.bench_function("map_1000", |b| {
+        let mut rng = rand::thread_rng();
+        let field = Arc::new(Field::new("item", DataType::Utf8, true));
+        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 1000]));
+        let key_list = ListArray::new(
+            field,
+            offsets,
+            Arc::new(StringArray::from(keys(&mut rng))),
+            None,
+        );
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 1000]));
+        let value_list = ListArray::new(
+            field,
+            offsets,
+            Arc::new(Int32Array::from(values(&mut rng))),
+            None,
+        );
+        let keys = ColumnarValue::Scalar(ScalarValue::List(Arc::new(key_list)));
+        let values = ColumnarValue::Scalar(ScalarValue::List(Arc::new(value_list)));
+
+        b.iter(|| {
+            black_box(
+                map()
+                    .invoke(&[keys.clone(), values.clone()])
+                    .expect("map should work on valid values"),
+            );
+        });
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/core/map.rs b/datafusion/functions/src/core/map.rs
new file mode 100644
index 0000000000000..8a8a19d7af52b
--- /dev/null
+++ b/datafusion/functions/src/core/map.rs
@@ -0,0 +1,312 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::collections::VecDeque;
+use std::sync::Arc;
+
+use arrow::array::{Array, ArrayData, ArrayRef, MapArray, StructArray};
+use arrow::compute::concat;
+use arrow::datatypes::{DataType, Field, SchemaBuilder};
+use arrow_buffer::{Buffer, ToByteSlice};
+
+use datafusion_common::{exec_err, internal_err, ScalarValue};
+use datafusion_common::{not_impl_err, Result};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+
+fn make_map(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    let (key, value): (Vec<_>, Vec<_>) = args
+        .chunks_exact(2)
+        .map(|chunk| {
+            if let ColumnarValue::Array(_) = chunk[0] {
+                return not_impl_err!("make_map does not support array keys");
+            }
+            if let ColumnarValue::Array(_) = chunk[1] {
+                return not_impl_err!("make_map does not support array values");
+            }
+            Ok((chunk[0].clone(), chunk[1].clone()))
+        })
+        .collect::<Result<Vec<_>>>()?
+        .into_iter()
+        .unzip();
+
+    let keys = ColumnarValue::values_to_arrays(&key)?;
+    let values = ColumnarValue::values_to_arrays(&value)?;
+
+    let keys: Vec<_> = keys.iter().map(|k| k.as_ref()).collect();
+    let values: Vec<_> = values.iter().map(|v| v.as_ref()).collect();
+
+    let key = match concat(&keys) {
+        Ok(key) => key,
+        Err(e) => return internal_err!("Error concatenating keys: {}", e),
+    };
+    let value = match concat(&values) {
+        Ok(value) => value,
+        Err(e) => return internal_err!("Error concatenating values: {}", e),
+    };
+    make_map_batch_internal(key, value)
+}
+
+fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.len() != 2 {
+        return exec_err!(
+            "make_map requires exactly 2 arguments, got {} instead",
+            args.len()
+        );
+    }
+    let key = get_first_array_ref(&args[0])?;
+    let value = get_first_array_ref(&args[1])?;
+    make_map_batch_internal(key, value)
+}
+
+fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result<ArrayRef> {
+    match columnar_value {
+        ColumnarValue::Scalar(value) => match value {
+            ScalarValue::List(array) => Ok(array.value(0).clone()),
+            ScalarValue::LargeList(array) => Ok(array.value(0).clone()),
+            ScalarValue::FixedSizeList(array) => Ok(array.value(0).clone()),
+            _ => exec_err!("Expected array, got {:?}", value),
+        },
+        ColumnarValue::Array(array) => exec_err!("Expected scalar, got {:?}", array),
+    }
+}
+
+fn make_map_batch_internal(keys: ArrayRef, values: ArrayRef) -> Result<ColumnarValue> {
+    if keys.null_count() > 0 {
+        return exec_err!("map key cannot be null");
+    }
+
+    if keys.len() != values.len() {
+        return exec_err!("map requires key and value lists to have the same length");
+    }
+
+    let key_field = Arc::new(Field::new("key", keys.data_type().clone(), false));
+    let value_field = Arc::new(Field::new("value", values.data_type().clone(), true));
+    let mut entry_struct_buffer: VecDeque<(Arc<Field>, ArrayRef)> = VecDeque::new();
+    let mut entry_offsets_buffer = VecDeque::new();
+    entry_offsets_buffer.push_back(0);
+
+    entry_struct_buffer.push_back((Arc::clone(&key_field), Arc::clone(&keys)));
+    entry_struct_buffer.push_back((Arc::clone(&value_field), Arc::clone(&values)));
+    entry_offsets_buffer.push_back(keys.len() as u32);
+
+    let entry_struct: Vec<(Arc<Field>, ArrayRef)> = entry_struct_buffer.into();
+    let entry_struct = StructArray::from(entry_struct);
+
+    let map_data_type = DataType::Map(
+        Arc::new(Field::new(
+            "entries",
+            entry_struct.data_type().clone(),
+            false,
+        )),
+        false,
+    );
+
+    let entry_offsets: Vec<u32> = entry_offsets_buffer.into();
+    let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
+
+    let map_data = ArrayData::builder(map_data_type)
+        .len(entry_offsets.len() - 1)
+        .add_buffer(entry_offsets_buffer)
+        .add_child_data(entry_struct.to_data())
+        .build()?;
+
+    Ok(ColumnarValue::Array(Arc::new(MapArray::from(map_data))))
+}
+
+#[derive(Debug)]
+pub struct MakeMap {
+    signature: Signature,
+}
+
+impl Default for MakeMap {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl MakeMap {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::user_defined(Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for MakeMap {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "make_map"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        if arg_types.is_empty() {
+            return exec_err!(
+                "make_map requires at least one pair of arguments, got 0 instead"
+            );
+        }
+        if arg_types.len() % 2 != 0 {
+            return exec_err!(
+                "make_map requires an even number of arguments, got {} instead",
+                arg_types.len()
+            );
+        }
+
+        let key_type = &arg_types[0];
+        let mut value_type = &arg_types[1];
+
+        for (i, chunk) in arg_types.chunks_exact(2).enumerate() {
+            if chunk[0].is_null() {
+                return exec_err!("make_map key cannot be null at position {}", i);
+            }
+            if &chunk[0] != key_type {
+                return exec_err!(
+                    "make_map requires all keys to have the same type {}, got {} instead at position {}",
+                    key_type,
+                    chunk[0],
+                    i
+                );
+            }
+
+            if !chunk[1].is_null() {
+                if value_type.is_null() {
+                    value_type = &chunk[1];
+                } else if &chunk[1] != value_type {
+                    return exec_err!(
+                        "map requires all values to have the same type {}, got {} instead at position {}",
+                        value_type,
+                        &chunk[1],
+                        i
+                    );
+                }
+            }
+        }
+
+        let mut result = Vec::new();
+        for _ in 0..arg_types.len() / 2 {
+            result.push(key_type.clone());
+            result.push(value_type.clone());
+        }
+
+        Ok(result)
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        let key_type = &arg_types[0];
+        let mut value_type = &arg_types[1];
+
+        for chunk in arg_types.chunks_exact(2) {
+            if !chunk[1].is_null() && value_type.is_null() {
+                value_type = &chunk[1];
+            }
+        }
+
+        let mut builder = SchemaBuilder::new();
+        builder.push(Field::new("key", key_type.clone(), false));
+        builder.push(Field::new("value", value_type.clone(), true));
+        let fields = builder.finish().fields;
+        Ok(DataType::Map(
+            Arc::new(Field::new("entries", DataType::Struct(fields), false)),
+            false,
+        ))
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        make_map(args)
+    }
+}
+
+#[derive(Debug)]
+pub struct MapFunc {
+    signature: Signature,
+}
+
+impl Default for MapFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl MapFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for MapFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "map"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        if arg_types.len() % 2 != 0 {
+            return exec_err!(
+                "map requires an even number of arguments, got {} instead",
+                arg_types.len()
+            );
+        }
+        let mut builder = SchemaBuilder::new();
+        builder.push(Field::new(
+            "key",
+            get_element_type(&arg_types[0])?.clone(),
+            false,
+        ));
+        builder.push(Field::new(
+            "value",
+            get_element_type(&arg_types[1])?.clone(),
+            true,
+        ));
+        let fields = builder.finish().fields;
+        Ok(DataType::Map(
+            Arc::new(Field::new("entries", DataType::Struct(fields), false)),
+            false,
+        ))
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        make_map_batch(args)
+    }
+}
+
+fn get_element_type(data_type: &DataType) -> Result<&DataType> {
+    match data_type {
+        DataType::List(element) => Ok(element.data_type()),
+        DataType::LargeList(element) => Ok(element.data_type()),
+        DataType::FixedSizeList(element, _) => Ok(element.data_type()),
+        _ => exec_err!(
+            "Expected list, large_list or fixed_size_list, got {:?}",
+            data_type
+        ),
+    }
+}
diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index 062a4a104d54a..31bce04beec1b 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -25,6 +25,7 @@ pub mod arrowtypeof;
 pub mod coalesce;
 pub mod expr_ext;
 pub mod getfield;
+pub mod map;
 pub mod named_struct;
 pub mod nullif;
 pub mod nvl;
@@ -42,6 +43,8 @@ make_udf_function!(r#struct::StructFunc, STRUCT, r#struct);
 make_udf_function!(named_struct::NamedStructFunc, NAMED_STRUCT, named_struct);
 make_udf_function!(getfield::GetFieldFunc, GET_FIELD, get_field);
 make_udf_function!(coalesce::CoalesceFunc, COALESCE, coalesce);
+make_udf_function!(map::MakeMap, MAKE_MAP, make_map);
+make_udf_function!(map::MapFunc, MAP, map);
 
 pub mod expr_fn {
     use datafusion_expr::{Expr, Literal};
@@ -78,6 +81,14 @@ pub mod expr_fn {
         coalesce,
         "Returns `coalesce(args...)`, which evaluates to the value of the first expr which is not NULL",
         args,
+    ),(
+        make_map,
+        "Returns a map created from the given keys and values pairs. This function isn't efficient for large maps. Use the `map` function instead.",
+        args,
+    ),(
+        map,
+        "Returns a map created from a key list and a value list",
+        args,
     ));
 
     #[doc = "Returns the value of the field with the given name from the struct"]
@@ -96,5 +107,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         named_struct(),
         get_field(),
         coalesce(),
+        make_map(),
+        map(),
     ]
 }
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index 417947dc6c89b..abf5b2ebbf98e 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -100,3 +100,115 @@ physical_plan
 
 statement ok
 drop table table_with_map;
+
+query ?
+SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', 30, 'OPTION', 29, 'GET', 27, 'PUT', 25, 'DELETE', 24) AS method_count;
+----
+{POST: 41, HEAD: 33, PATCH: 30, OPTION: 29, GET: 27, PUT: 25, DELETE: 24}
+
+query I
+SELECT MAKE_MAP('POST', 41, 'HEAD', 33)['POST'];
+----
+41
+
+query ?
+SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null);
+----
+{POST: 41, HEAD: 33, PATCH: }
+
+query ?
+SELECT MAKE_MAP('POST', null, 'HEAD', 33, 'PATCH', null);
+----
+{POST: , HEAD: 33, PATCH: }
+
+query ?
+SELECT MAKE_MAP(1, null, 2, 33, 3, null);
+----
+{1: , 2: 33, 3: }
+
+query ?
+SELECT MAKE_MAP([1,2], ['a', 'b'], [3,4], ['b']);
+----
+{[1, 2]: [a, b], [3, 4]: [b]}
+
+query error
+SELECT MAKE_MAP('POST', 41, 'HEAD', 'ab', 'PATCH', 30);
+
+query error
+SELECT MAKE_MAP('POST', 41, 'HEAD', 33, null, 30);
+
+query error
+SELECT MAKE_MAP('POST', 41, 123, 33,'PATCH', 30);
+
+query error
+SELECT MAKE_MAP()
+
+query error
+SELECT MAKE_MAP('POST', 41, 'HEAD');
+
+query ?
+SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, 30]);
+----
+{POST: 41, HEAD: 33, PATCH: 30}
+
+query ?
+SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]);
+----
+{POST: 41, HEAD: 33, PATCH: }
+
+query ?
+SELECT MAP([[1,2], [3,4]], ['a', 'b']);
+----
+{[1, 2]: a, [3, 4]: b}
+
+query error
+SELECT MAP()
+
+query error DataFusion error: Execution error: map requires an even number of arguments, got 1 instead
+SELECT MAP(['POST', 'HEAD'])
+
+query error DataFusion error: Execution error: Expected list, large_list or fixed_size_list, got Null
+SELECT MAP(null, [41, 33, 30]);
+
+query error DataFusion error: Execution error: map requires key and value lists to have the same length
+SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33]);
+
+query error DataFusion error: Execution error: map key cannot be null
+SELECT MAP(['POST', 'HEAD', null], [41, 33, 30]);
+
+query ?
+SELECT MAP(make_array('POST', 'HEAD', 'PATCH'), make_array(41, 33, 30));
+----
+{POST: 41, HEAD: 33, PATCH: 30}
+
+query ?
+SELECT MAP(arrow_cast(make_array('POST', 'HEAD', 'PATCH'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array(41, 33, 30), 'FixedSizeList(3, Int64)'));
+----
+{POST: 41, HEAD: 33, PATCH: 30}
+
+query ?
+SELECT MAP(arrow_cast(make_array('POST', 'HEAD', 'PATCH'), 'LargeList(Utf8)'), arrow_cast(make_array(41, 33, 30), 'LargeList(Int64)'));
+----
+{POST: 41, HEAD: 33, PATCH: 30}
+
+statement ok
+create table t as values
+('a', 1, 'k1', 10, ['k1', 'k2'], [1, 2]),
+('b', 2, 'k3', 30, ['k3'], [3]),
+('d', 4, 'k5', 50, ['k5'], [5]);
+
+query error
+SELECT make_map(column1, column2, column3, column4) FROM t;
+# TODO: support array value
+# ----
+# {a: 1, k1: 10}
+# {b: 2, k3: 30}
+# {d: 4, k5: 50}
+
+query error
+SELECT map(column5, column6) FROM t;
+# TODO: support array value
+# ----
+# {k1:1, k2:2}
+# {k3: 3}
+# {k5: 5}

From d542cbda8f17ba004de18bb107ecf1c8ec3266f6 Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Fri, 12 Jul 2024 12:53:05 +0200
Subject: [PATCH 033/357] Improve `CommonSubexprEliminate` rule with surely and
 conditionally evaluated stats (#11357)

* Improve `CommonSubexprEliminate` rule with surely and conditionally evaluated stats

* remove expression tree hashing as no longer needed

* address review comments

* add negative tests
---
 datafusion/expr/src/expr.rs                   |  39 ++-
 .../optimizer/src/common_subexpr_eliminate.rs | 256 +++++++++++-------
 .../optimizer/src/optimize_projections/mod.rs |  10 +-
 datafusion/sqllogictest/test_files/cse.slt    |  88 +++++-
 datafusion/sqllogictest/test_files/select.slt |  20 +-
 .../sqllogictest/test_files/tpch/q14.slt.part |  33 +--
 6 files changed, 298 insertions(+), 148 deletions(-)

diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index ecece6dbfce7f..a344e621ddb12 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -17,7 +17,7 @@
 
 //! Logical Expressions: [`Expr`]
 
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::fmt::{self, Display, Formatter, Write};
 use std::hash::{Hash, Hasher};
 use std::mem;
@@ -1380,7 +1380,7 @@ impl Expr {
     /// // refs contains "a" and "b"
     /// assert_eq!(refs.len(), 2);
     /// assert!(refs.contains(&Column::new_unqualified("a")));
-    ///  assert!(refs.contains(&Column::new_unqualified("b")));
+    /// assert!(refs.contains(&Column::new_unqualified("b")));
     /// ```
     pub fn column_refs(&self) -> HashSet<&Column> {
         let mut using_columns = HashSet::new();
@@ -1401,6 +1401,41 @@ impl Expr {
         .expect("traversal is infallable");
     }
 
+    /// Return all references to columns and their occurrence counts in the expression.
+    ///
+    /// # Example
+    /// ```
+    /// # use std::collections::HashMap;
+    /// # use datafusion_common::Column;
+    /// # use datafusion_expr::col;
+    /// // For an expression `a + (b * a)`
+    /// let expr = col("a") + (col("b") * col("a"));
+    /// let mut refs = expr.column_refs_counts();
+    /// // refs contains "a" and "b"
+    /// assert_eq!(refs.len(), 2);
+    /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
+    /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
+    /// ```
+    pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
+        let mut map = HashMap::new();
+        self.add_column_ref_counts(&mut map);
+        map
+    }
+
+    /// Adds references to all columns and their occurrence counts in the expression to
+    /// the map.
+    ///
+    /// See [`Self::column_refs_counts`] for details
+    pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) {
+        self.apply(|expr| {
+            if let Expr::Column(col) = expr {
+                *map.entry(col).or_default() += 1;
+            }
+            Ok(TreeNodeRecursion::Continue)
+        })
+        .expect("traversal is infallable");
+    }
+
     /// Returns true if there are any column references in this Expr
     pub fn any_column_refs(&self) -> bool {
         self.exists(|expr| Ok(matches!(expr, Expr::Column(_))))
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 721987b917d4c..e4b36652974d7 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -33,12 +33,12 @@ use datafusion_common::tree_node::{
 use datafusion_common::{
     internal_datafusion_err, qualified_name, Column, DFSchema, DFSchemaRef, Result,
 };
-use datafusion_expr::expr::Alias;
+use datafusion_expr::expr::{Alias, ScalarFunction};
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::{
     Aggregate, Filter, LogicalPlan, Projection, Sort, Window,
 };
-use datafusion_expr::{col, Expr, ExprSchemable};
+use datafusion_expr::{col, BinaryExpr, Case, Expr, ExprSchemable, Operator};
 use indexmap::IndexMap;
 
 const CSE_PREFIX: &str = "__common_expr";
@@ -56,13 +56,9 @@ struct Identifier<'n> {
 }
 
 impl<'n> Identifier<'n> {
-    fn new(expr: &'n Expr, is_tree: bool, random_state: &RandomState) -> Self {
+    fn new(expr: &'n Expr, random_state: &RandomState) -> Self {
         let mut hasher = random_state.build_hasher();
-        if is_tree {
-            expr.hash(&mut hasher);
-        } else {
-            expr.hash_node(&mut hasher);
-        }
+        expr.hash_node(&mut hasher);
         let hash = hasher.finish();
         Self { hash, expr }
     }
@@ -110,8 +106,9 @@ impl Hash for Identifier<'_> {
 /// ```
 type IdArray<'n> = Vec<(usize, Option<Identifier<'n>>)>;
 
-/// A map that contains the number of occurrences of expressions by their identifiers.
-type ExprStats<'n> = HashMap<Identifier<'n>, usize>;
+/// A map that contains the number of normal and conditional occurrences of expressions by
+/// their identifiers.
+type ExprStats<'n> = HashMap<Identifier<'n>, (usize, usize)>;
 
 /// A map that contains the common expressions and their alias extracted during the
 /// second, rewriting traversal.
@@ -200,6 +197,7 @@ impl CommonSubexprEliminate {
             expr_mask,
             random_state: &self.random_state,
             found_common: false,
+            conditional: false,
         };
         expr.visit(&mut visitor)?;
 
@@ -901,15 +899,17 @@ struct ExprIdentifierVisitor<'a, 'n> {
     random_state: &'a RandomState,
     // a flag to indicate that common expression found
     found_common: bool,
+    // if we are in a conditional branch. A conditional branch means that the expression
+    // might not be executed depending on the runtime values of other expressions, and
+    // thus can not be extracted as a common expression.
+    conditional: bool,
 }
 
 /// Record item that used when traversing an expression tree.
 enum VisitRecord<'n> {
     /// Marks the beginning of expression. It contains:
     /// - The post-order index assigned during the first, visiting traversal.
-    /// - A boolean flag if the record marks an expression subtree (not just a single
-    ///   node).
-    EnterMark(usize, bool),
+    EnterMark(usize),
 
     /// Marks an accumulated subexpression tree. It contains:
     /// - The accumulated identifier of a subexpression.
@@ -924,10 +924,6 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> {
     /// Find the first `EnterMark` in the stack, and accumulates every `ExprItem` before
     /// it. Returns a tuple that contains:
     /// - The pre-order index of the expression we marked.
-    /// - A boolean flag if we marked an expression subtree (not just a single node).
-    ///   If true we didn't recurse into the node's children, so we need to calculate the
-    ///   hash of the marked expression tree (not just the node) and we need to validate
-    ///   the expression tree (not just the node).
     /// - The accumulated identifier of the children of the marked expression.
     /// - An accumulated boolean flag from the children of the marked expression if all
     ///   children are valid for subexpression elimination (i.e. it is safe to extract the
@@ -937,14 +933,14 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> {
     ///   information up from children to parents via `visit_stack` during the first,
     ///   visiting traversal and no need to test the expression's validity beforehand with
     ///   an extra traversal).
-    fn pop_enter_mark(&mut self) -> (usize, bool, Option<Identifier<'n>>, bool) {
+    fn pop_enter_mark(&mut self) -> (usize, Option<Identifier<'n>>, bool) {
         let mut expr_id = None;
         let mut is_valid = true;
 
         while let Some(item) = self.visit_stack.pop() {
             match item {
-                VisitRecord::EnterMark(down_index, is_tree) => {
-                    return (down_index, is_tree, expr_id, is_valid);
+                VisitRecord::EnterMark(down_index) => {
+                    return (down_index, expr_id, is_valid);
                 }
                 VisitRecord::ExprItem(sub_expr_id, sub_expr_is_valid) => {
                     expr_id = Some(sub_expr_id.combine(expr_id));
@@ -954,53 +950,112 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> {
         }
         unreachable!("Enter mark should paired with node number");
     }
+
+    /// Save the current `conditional` status and run `f` with `conditional` set to true.
+    fn conditionally<F: FnMut(&mut Self) -> Result<()>>(
+        &mut self,
+        mut f: F,
+    ) -> Result<()> {
+        let conditional = self.conditional;
+        self.conditional = true;
+        f(self)?;
+        self.conditional = conditional;
+
+        Ok(())
+    }
 }
 
 impl<'n> TreeNodeVisitor<'n> for ExprIdentifierVisitor<'_, 'n> {
     type Node = Expr;
 
     fn f_down(&mut self, expr: &'n Expr) -> Result<TreeNodeRecursion> {
-        // If an expression can short circuit its children then don't consider its
-        // children for CSE (https://github.com/apache/arrow-datafusion/issues/8814).
-        // This means that we don't recurse into its children, but handle the expression
-        // as a subtree when we calculate its identifier.
-        // TODO: consider surely executed children of "short circuited"s for CSE
-        let is_tree = expr.short_circuits();
-        let tnr = if is_tree {
-            TreeNodeRecursion::Jump
-        } else {
-            TreeNodeRecursion::Continue
-        };
-
         self.id_array.push((0, None));
         self.visit_stack
-            .push(VisitRecord::EnterMark(self.down_index, is_tree));
+            .push(VisitRecord::EnterMark(self.down_index));
         self.down_index += 1;
 
-        Ok(tnr)
+        // If an expression can short-circuit then some of its children might not be
+        // executed so count the occurrence of subexpressions as conditional in all
+        // children.
+        Ok(match expr {
+            // If we are already in a conditionally evaluated subtree then continue
+            // traversal.
+            _ if self.conditional => TreeNodeRecursion::Continue,
+
+            // In case of `ScalarFunction`s we don't know which children are surely
+            // executed so start visiting all children conditionally and stop the
+            // recursion with `TreeNodeRecursion::Jump`.
+            Expr::ScalarFunction(ScalarFunction { func, args })
+                if func.short_circuits() =>
+            {
+                self.conditionally(|visitor| {
+                    args.iter().try_for_each(|e| e.visit(visitor).map(|_| ()))
+                })?;
+
+                TreeNodeRecursion::Jump
+            }
+
+            // In case of `And` and `Or` the first child is surely executed, but we
+            // account subexpressions as conditional in the second.
+            Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: Operator::And | Operator::Or,
+                right,
+            }) => {
+                left.visit(self)?;
+                self.conditionally(|visitor| right.visit(visitor).map(|_| ()))?;
+
+                TreeNodeRecursion::Jump
+            }
+
+            // In case of `Case` the optional base expression and the first when
+            // expressions are surely executed, but we account subexpressions as
+            // conditional in the others.
+            Expr::Case(Case {
+                expr,
+                when_then_expr,
+                else_expr,
+            }) => {
+                expr.iter().try_for_each(|e| e.visit(self).map(|_| ()))?;
+                when_then_expr.iter().take(1).try_for_each(|(when, then)| {
+                    when.visit(self)?;
+                    self.conditionally(|visitor| then.visit(visitor).map(|_| ()))
+                })?;
+                self.conditionally(|visitor| {
+                    when_then_expr.iter().skip(1).try_for_each(|(when, then)| {
+                        when.visit(visitor)?;
+                        then.visit(visitor).map(|_| ())
+                    })?;
+                    else_expr
+                        .iter()
+                        .try_for_each(|e| e.visit(visitor).map(|_| ()))
+                })?;
+
+                TreeNodeRecursion::Jump
+            }
+
+            // In case of non-short-circuit expressions continue the traversal.
+            _ => TreeNodeRecursion::Continue,
+        })
     }
 
     fn f_up(&mut self, expr: &'n Expr) -> Result<TreeNodeRecursion> {
-        let (down_index, is_tree, sub_expr_id, sub_expr_is_valid) = self.pop_enter_mark();
+        let (down_index, sub_expr_id, sub_expr_is_valid) = self.pop_enter_mark();
 
-        let (expr_id, is_valid) = if is_tree {
-            (
-                Identifier::new(expr, true, self.random_state),
-                !expr.is_volatile()?,
-            )
-        } else {
-            (
-                Identifier::new(expr, false, self.random_state).combine(sub_expr_id),
-                !expr.is_volatile_node() && sub_expr_is_valid,
-            )
-        };
+        let expr_id = Identifier::new(expr, self.random_state).combine(sub_expr_id);
+        let is_valid = !expr.is_volatile_node() && sub_expr_is_valid;
 
         self.id_array[down_index].0 = self.up_index;
         if is_valid && !self.expr_mask.ignores(expr) {
             self.id_array[down_index].1 = Some(expr_id);
-            let count = self.expr_stats.entry(expr_id).or_insert(0);
-            *count += 1;
-            if *count > 1 {
+            let (count, conditional_count) =
+                self.expr_stats.entry(expr_id).or_insert((0, 0));
+            if self.conditional {
+                *conditional_count += 1;
+            } else {
+                *count += 1;
+            }
+            if *count > 1 || (*count == 1 && *conditional_count > 0) {
                 self.found_common = true;
             }
         }
@@ -1039,51 +1094,40 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_, '_> {
             self.alias_counter += 1;
         }
 
-        // The `CommonSubexprRewriter` relies on `ExprIdentifierVisitor` to generate the
-        // `id_array`, which records the expr's identifier used to rewrite expr. So if we
-        // skip an expr in `ExprIdentifierVisitor`, we should skip it here, too.
-        let is_tree = expr.short_circuits();
-        let tnr = if is_tree {
-            TreeNodeRecursion::Jump
-        } else {
-            TreeNodeRecursion::Continue
-        };
-
         let (up_index, expr_id) = self.id_array[self.down_index];
         self.down_index += 1;
 
-        // skip `Expr`s without identifier (empty identifier).
-        let Some(expr_id) = expr_id else {
-            return Ok(Transformed::new(expr, false, tnr));
-        };
-
-        let count = self.expr_stats.get(&expr_id).unwrap();
-        if *count > 1 {
-            // step index to skip all sub-node (which has smaller series number).
-            while self.down_index < self.id_array.len()
-                && self.id_array[self.down_index].0 < up_index
-            {
-                self.down_index += 1;
-            }
+        // Handle `Expr`s with identifiers only
+        if let Some(expr_id) = expr_id {
+            let (count, conditional_count) = self.expr_stats.get(&expr_id).unwrap();
+            if *count > 1 || *count == 1 && *conditional_count > 0 {
+                // step index to skip all sub-node (which has smaller series number).
+                while self.down_index < self.id_array.len()
+                    && self.id_array[self.down_index].0 < up_index
+                {
+                    self.down_index += 1;
+                }
 
-            let expr_name = expr.display_name()?;
-            let (_, expr_alias) = self.common_exprs.entry(expr_id).or_insert_with(|| {
-                let expr_alias = self.alias_generator.next(CSE_PREFIX);
-                (expr, expr_alias)
-            });
+                let expr_name = expr.display_name()?;
+                let (_, expr_alias) =
+                    self.common_exprs.entry(expr_id).or_insert_with(|| {
+                        let expr_alias = self.alias_generator.next(CSE_PREFIX);
+                        (expr, expr_alias)
+                    });
 
-            // alias the expressions without an `Alias` ancestor node
-            let rewritten = if self.alias_counter > 0 {
-                col(expr_alias.clone())
-            } else {
-                self.alias_counter += 1;
-                col(expr_alias.clone()).alias(expr_name)
-            };
+                // alias the expressions without an `Alias` ancestor node
+                let rewritten = if self.alias_counter > 0 {
+                    col(expr_alias.clone())
+                } else {
+                    self.alias_counter += 1;
+                    col(expr_alias.clone()).alias(expr_name)
+                };
 
-            Ok(Transformed::new(rewritten, true, TreeNodeRecursion::Jump))
-        } else {
-            Ok(Transformed::new(expr, false, tnr))
+                return Ok(Transformed::new(rewritten, true, TreeNodeRecursion::Jump));
+            }
         }
+
+        Ok(Transformed::no(expr))
     }
 
     fn f_up(&mut self, expr: Expr) -> Result<Transformed<Self::Node>> {
@@ -1685,7 +1729,7 @@ mod test {
             .unwrap();
         let rule = CommonSubexprEliminate::new();
         let optimized_plan = rule.rewrite(plan, &OptimizerContext::new()).unwrap();
-        assert!(!optimized_plan.transformed);
+        assert!(optimized_plan.transformed);
         let optimized_plan = optimized_plan.data;
 
         let schema = optimized_plan.schema();
@@ -1837,22 +1881,29 @@ mod test {
         let table_scan = test_table_scan()?;
 
         let extracted_short_circuit = col("a").eq(lit(0)).or(col("b").eq(lit(0)));
-        let not_extracted_short_circuit_leg_1 = (col("a") + col("b")).eq(lit(0));
+        let extracted_short_circuit_leg_1 = (col("a") + col("b")).eq(lit(0));
         let not_extracted_short_circuit_leg_2 = (col("a") - col("b")).eq(lit(0));
+        let extracted_short_circuit_leg_3 = (col("a") * col("b")).eq(lit(0));
         let plan = LogicalPlanBuilder::from(table_scan.clone())
             .project(vec![
                 extracted_short_circuit.clone().alias("c1"),
                 extracted_short_circuit.alias("c2"),
-                not_extracted_short_circuit_leg_1.clone().alias("c3"),
-                not_extracted_short_circuit_leg_2.clone().alias("c4"),
-                not_extracted_short_circuit_leg_1
-                    .or(not_extracted_short_circuit_leg_2)
+                extracted_short_circuit_leg_1
+                    .clone()
+                    .or(not_extracted_short_circuit_leg_2.clone())
+                    .alias("c3"),
+                extracted_short_circuit_leg_1
+                    .and(not_extracted_short_circuit_leg_2)
+                    .alias("c4"),
+                extracted_short_circuit_leg_3
+                    .clone()
+                    .or(extracted_short_circuit_leg_3.clone())
                     .alias("c5"),
             ])?
             .build()?;
 
-        let expected = "Projection: __common_expr_1 AS c1, __common_expr_1 AS c2, test.a + test.b = Int32(0) AS c3, test.a - test.b = Int32(0) AS c4, test.a + test.b = Int32(0) OR test.a - test.b = Int32(0) AS c5\
-        \n  Projection: test.a = Int32(0) OR test.b = Int32(0) AS __common_expr_1, test.a, test.b, test.c\
+        let expected = "Projection: __common_expr_1 AS c1, __common_expr_1 AS c2, __common_expr_2 OR test.a - test.b = Int32(0) AS c3, __common_expr_2 AND test.a - test.b = Int32(0) AS c4, __common_expr_3 OR __common_expr_3 AS c5\
+        \n  Projection: test.a = Int32(0) OR test.b = Int32(0) AS __common_expr_1, test.a + test.b = Int32(0) AS __common_expr_2, test.a * test.b = Int32(0) AS __common_expr_3, test.a, test.b, test.c\
         \n    TableScan: test";
 
         assert_optimized_plan_eq(expected, plan, None);
@@ -1888,10 +1939,12 @@ mod test {
         let table_scan = test_table_scan()?;
 
         let rand = rand_func().call(vec![]);
-        let not_extracted_volatile_short_circuit_2 =
-            rand.clone().eq(lit(0)).or(col("b").eq(lit(0)));
+        let extracted_short_circuit_leg_1 = col("a").eq(lit(0));
         let not_extracted_volatile_short_circuit_1 =
-            col("a").eq(lit(0)).or(rand.eq(lit(0)));
+            extracted_short_circuit_leg_1.or(rand.clone().eq(lit(0)));
+        let not_extracted_short_circuit_leg_2 = col("b").eq(lit(0));
+        let not_extracted_volatile_short_circuit_2 =
+            rand.eq(lit(0)).or(not_extracted_short_circuit_leg_2);
         let plan = LogicalPlanBuilder::from(table_scan.clone())
             .project(vec![
                 not_extracted_volatile_short_circuit_1.clone().alias("c1"),
@@ -1901,10 +1954,11 @@ mod test {
             ])?
             .build()?;
 
-        let expected = "Projection: test.a = Int32(0) OR random() = Int32(0) AS c1, test.a = Int32(0) OR random() = Int32(0) AS c2, random() = Int32(0) OR test.b = Int32(0) AS c3, random() = Int32(0) OR test.b = Int32(0) AS c4\
-        \n  TableScan: test";
+        let expected = "Projection: __common_expr_1 OR random() = Int32(0) AS c1, __common_expr_1 OR random() = Int32(0) AS c2, random() = Int32(0) OR test.b = Int32(0) AS c3, random() = Int32(0) OR test.b = Int32(0) AS c4\
+        \n  Projection: test.a = Int32(0) AS __common_expr_1, test.a, test.b, test.c\
+        \n    TableScan: test";
 
-        assert_non_optimized_plan_eq(expected, plan, None);
+        assert_optimized_plan_eq(expected, plan, None);
 
         Ok(())
     }
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index cae2a7b2cad2f..58c1ae297b02e 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -19,7 +19,7 @@
 
 mod required_indices;
 
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
 use crate::optimizer::ApplyOrder;
@@ -42,7 +42,6 @@ use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion,
 };
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
-use hashbrown::HashMap;
 
 /// Optimizer rule to prune unnecessary columns from intermediate schemas
 /// inside the [`LogicalPlan`]. This rule:
@@ -472,11 +471,8 @@ fn merge_consecutive_projections(proj: Projection) -> Result<Transformed<Project
 
     // Count usages (referrals) of each projection expression in its input fields:
     let mut column_referral_map = HashMap::<&Column, usize>::new();
-    for columns in expr.iter().map(|expr| expr.column_refs()) {
-        for col in columns.into_iter() {
-            *column_referral_map.entry(col).or_default() += 1;
-        }
-    }
+    expr.iter()
+        .for_each(|expr| expr.add_column_ref_counts(&mut column_referral_map));
 
     // If an expression is non-trivial and appears more than once, do not merge
     // them as consecutive projections will benefit from a compute-once approach.
diff --git a/datafusion/sqllogictest/test_files/cse.slt b/datafusion/sqllogictest/test_files/cse.slt
index 3579c1c1635cb..19b47fa50e410 100644
--- a/datafusion/sqllogictest/test_files/cse.slt
+++ b/datafusion/sqllogictest/test_files/cse.slt
@@ -93,15 +93,16 @@ FROM t1
 ----
 logical_plan
 01)Projection: __common_expr_1 AS c1, __common_expr_1 AS c2, __common_expr_2 AS c3, __common_expr_2 AS c4, __common_expr_3 AS c5, __common_expr_3 AS c6
-02)--Projection: t1.a = Float64(0) AND t1.b = Float64(0) AS __common_expr_1, t1.a = Float64(0) OR t1.b = Float64(0) AS __common_expr_2, CASE WHEN t1.a = Float64(0) THEN Int64(0) ELSE Int64(1) END AS __common_expr_3
-03)----TableScan: t1 projection=[a, b]
+02)--Projection: __common_expr_4 AND t1.b = Float64(0) AS __common_expr_1, __common_expr_4 OR t1.b = Float64(0) AS __common_expr_2, CASE WHEN __common_expr_4 THEN Int64(0) ELSE Int64(1) END AS __common_expr_3
+03)----Projection: t1.a = Float64(0) AS __common_expr_4, t1.b
+04)------TableScan: t1 projection=[a, b]
 physical_plan
 01)ProjectionExec: expr=[__common_expr_1@0 as c1, __common_expr_1@0 as c2, __common_expr_2@1 as c3, __common_expr_2@1 as c4, __common_expr_3@2 as c5, __common_expr_3@2 as c6]
-02)--ProjectionExec: expr=[a@0 = 0 AND b@1 = 0 as __common_expr_1, a@0 = 0 OR b@1 = 0 as __common_expr_2, CASE WHEN a@0 = 0 THEN 0 ELSE 1 END as __common_expr_3]
-03)----MemoryExec: partitions=1, partition_sizes=[0]
+02)--ProjectionExec: expr=[__common_expr_4@0 AND b@1 = 0 as __common_expr_1, __common_expr_4@0 OR b@1 = 0 as __common_expr_2, CASE WHEN __common_expr_4@0 THEN 0 ELSE 1 END as __common_expr_3]
+03)----ProjectionExec: expr=[a@0 = 0 as __common_expr_4, b@1 as b]
+04)------MemoryExec: partitions=1, partition_sizes=[0]
 
 # Common children of short-circuit expression
-# TODO: consider surely executed children of "short circuited"s for CSE. i.e. `a = 0`, `a = 2`, `a = 4` should be extracted
 query TT
 EXPLAIN SELECT
     a = 0 AND b = 0 AS c1,
@@ -121,14 +122,15 @@ EXPLAIN SELECT
 FROM t1
 ----
 logical_plan
-01)Projection: t1.a = Float64(0) AND t1.b = Float64(0) AS c1, t1.a = Float64(0) AND t1.b = Float64(1) AS c2, t1.b = Float64(2) AND t1.a = Float64(1) AS c3, t1.b = Float64(3) AND t1.a = Float64(1) AS c4, t1.a = Float64(2) OR t1.b = Float64(4) AS c5, t1.a = Float64(2) OR t1.b = Float64(5) AS c6, t1.b = Float64(6) OR t1.a = Float64(3) AS c7, t1.b = Float64(7) OR t1.a = Float64(3) AS c8, CASE WHEN t1.a = Float64(4) THEN Int64(0) ELSE Int64(1) END AS c9, CASE WHEN t1.a = Float64(4) THEN Int64(0) ELSE Int64(2) END AS c10, CASE WHEN t1.b = Float64(8) THEN t1.a + Float64(1) ELSE Float64(0) END AS c11, CASE WHEN t1.b = Float64(9) THEN t1.a + Float64(1) ELSE Float64(0) END AS c12, CASE WHEN t1.b = Float64(10) THEN Float64(0) ELSE t1.a + Float64(2) END AS c13, CASE WHEN t1.b = Float64(11) THEN Float64(0) ELSE t1.a + Float64(2) END AS c14
-02)--TableScan: t1 projection=[a, b]
+01)Projection: __common_expr_1 AND t1.b = Float64(0) AS c1, __common_expr_1 AND t1.b = Float64(1) AS c2, t1.b = Float64(2) AND t1.a = Float64(1) AS c3, t1.b = Float64(3) AND t1.a = Float64(1) AS c4, __common_expr_2 OR t1.b = Float64(4) AS c5, __common_expr_2 OR t1.b = Float64(5) AS c6, t1.b = Float64(6) OR t1.a = Float64(3) AS c7, t1.b = Float64(7) OR t1.a = Float64(3) AS c8, CASE WHEN __common_expr_3 THEN Int64(0) ELSE Int64(1) END AS c9, CASE WHEN __common_expr_3 THEN Int64(0) ELSE Int64(2) END AS c10, CASE WHEN t1.b = Float64(8) THEN t1.a + Float64(1) ELSE Float64(0) END AS c11, CASE WHEN t1.b = Float64(9) THEN t1.a + Float64(1) ELSE Float64(0) END AS c12, CASE WHEN t1.b = Float64(10) THEN Float64(0) ELSE t1.a + Float64(2) END AS c13, CASE WHEN t1.b = Float64(11) THEN Float64(0) ELSE t1.a + Float64(2) END AS c14
+02)--Projection: t1.a = Float64(0) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a = Float64(4) AS __common_expr_3, t1.a, t1.b
+03)----TableScan: t1 projection=[a, b]
 physical_plan
-01)ProjectionExec: expr=[a@0 = 0 AND b@1 = 0 as c1, a@0 = 0 AND b@1 = 1 as c2, b@1 = 2 AND a@0 = 1 as c3, b@1 = 3 AND a@0 = 1 as c4, a@0 = 2 OR b@1 = 4 as c5, a@0 = 2 OR b@1 = 5 as c6, b@1 = 6 OR a@0 = 3 as c7, b@1 = 7 OR a@0 = 3 as c8, CASE WHEN a@0 = 4 THEN 0 ELSE 1 END as c9, CASE WHEN a@0 = 4 THEN 0 ELSE 2 END as c10, CASE WHEN b@1 = 8 THEN a@0 + 1 ELSE 0 END as c11, CASE WHEN b@1 = 9 THEN a@0 + 1 ELSE 0 END as c12, CASE WHEN b@1 = 10 THEN 0 ELSE a@0 + 2 END as c13, CASE WHEN b@1 = 11 THEN 0 ELSE a@0 + 2 END as c14]
-02)--MemoryExec: partitions=1, partition_sizes=[0]
+01)ProjectionExec: expr=[__common_expr_1@0 AND b@4 = 0 as c1, __common_expr_1@0 AND b@4 = 1 as c2, b@4 = 2 AND a@3 = 1 as c3, b@4 = 3 AND a@3 = 1 as c4, __common_expr_2@1 OR b@4 = 4 as c5, __common_expr_2@1 OR b@4 = 5 as c6, b@4 = 6 OR a@3 = 3 as c7, b@4 = 7 OR a@3 = 3 as c8, CASE WHEN __common_expr_3@2 THEN 0 ELSE 1 END as c9, CASE WHEN __common_expr_3@2 THEN 0 ELSE 2 END as c10, CASE WHEN b@4 = 8 THEN a@3 + 1 ELSE 0 END as c11, CASE WHEN b@4 = 9 THEN a@3 + 1 ELSE 0 END as c12, CASE WHEN b@4 = 10 THEN 0 ELSE a@3 + 2 END as c13, CASE WHEN b@4 = 11 THEN 0 ELSE a@3 + 2 END as c14]
+02)--ProjectionExec: expr=[a@0 = 0 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 = 4 as __common_expr_3, a@0 as a, b@1 as b]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
 
 # Common children of volatile, short-circuit expression
-# TODO: consider surely executed children of "short circuited"s for CSE. i.e. `a = 0`, `a = 2`, `a = 4` should be extracted
 query TT
 EXPLAIN SELECT
     a = 0 AND b = random() AS c1,
@@ -148,11 +150,13 @@ EXPLAIN SELECT
 FROM t1
 ----
 logical_plan
-01)Projection: t1.a = Float64(0) AND t1.b = random() AS c1, t1.a = Float64(0) AND t1.b = Float64(1) + random() AS c2, t1.b = Float64(2) + random() AND t1.a = Float64(1) AS c3, t1.b = Float64(3) + random() AND t1.a = Float64(1) AS c4, t1.a = Float64(2) OR t1.b = Float64(4) + random() AS c5, t1.a = Float64(2) OR t1.b = Float64(5) + random() AS c6, t1.b = Float64(6) + random() OR t1.a = Float64(3) AS c7, t1.b = Float64(7) + random() OR t1.a = Float64(3) AS c8, CASE WHEN t1.a = Float64(4) THEN random() ELSE Float64(1) END AS c9, CASE WHEN t1.a = Float64(4) THEN random() ELSE Float64(2) END AS c10, CASE WHEN t1.b = Float64(8) + random() THEN t1.a + Float64(1) ELSE Float64(0) END AS c11, CASE WHEN t1.b = Float64(9) + random() THEN t1.a + Float64(1) ELSE Float64(0) END AS c12, CASE WHEN t1.b = Float64(10) + random() THEN Float64(0) ELSE t1.a + Float64(2) END AS c13, CASE WHEN t1.b = Float64(11) + random() THEN Float64(0) ELSE t1.a + Float64(2) END AS c14
-02)--TableScan: t1 projection=[a, b]
+01)Projection: __common_expr_1 AND t1.b = random() AS c1, __common_expr_1 AND t1.b = Float64(1) + random() AS c2, t1.b = Float64(2) + random() AND t1.a = Float64(1) AS c3, t1.b = Float64(3) + random() AND t1.a = Float64(1) AS c4, __common_expr_2 OR t1.b = Float64(4) + random() AS c5, __common_expr_2 OR t1.b = Float64(5) + random() AS c6, t1.b = Float64(6) + random() OR t1.a = Float64(3) AS c7, t1.b = Float64(7) + random() OR t1.a = Float64(3) AS c8, CASE WHEN __common_expr_3 THEN random() ELSE Float64(1) END AS c9, CASE WHEN __common_expr_3 THEN random() ELSE Float64(2) END AS c10, CASE WHEN t1.b = Float64(8) + random() THEN t1.a + Float64(1) ELSE Float64(0) END AS c11, CASE WHEN t1.b = Float64(9) + random() THEN t1.a + Float64(1) ELSE Float64(0) END AS c12, CASE WHEN t1.b = Float64(10) + random() THEN Float64(0) ELSE t1.a + Float64(2) END AS c13, CASE WHEN t1.b = Float64(11) + random() THEN Float64(0) ELSE t1.a + Float64(2) END AS c14
+02)--Projection: t1.a = Float64(0) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a = Float64(4) AS __common_expr_3, t1.a, t1.b
+03)----TableScan: t1 projection=[a, b]
 physical_plan
-01)ProjectionExec: expr=[a@0 = 0 AND b@1 = random() as c1, a@0 = 0 AND b@1 = 1 + random() as c2, b@1 = 2 + random() AND a@0 = 1 as c3, b@1 = 3 + random() AND a@0 = 1 as c4, a@0 = 2 OR b@1 = 4 + random() as c5, a@0 = 2 OR b@1 = 5 + random() as c6, b@1 = 6 + random() OR a@0 = 3 as c7, b@1 = 7 + random() OR a@0 = 3 as c8, CASE WHEN a@0 = 4 THEN random() ELSE 1 END as c9, CASE WHEN a@0 = 4 THEN random() ELSE 2 END as c10, CASE WHEN b@1 = 8 + random() THEN a@0 + 1 ELSE 0 END as c11, CASE WHEN b@1 = 9 + random() THEN a@0 + 1 ELSE 0 END as c12, CASE WHEN b@1 = 10 + random() THEN 0 ELSE a@0 + 2 END as c13, CASE WHEN b@1 = 11 + random() THEN 0 ELSE a@0 + 2 END as c14]
-02)--MemoryExec: partitions=1, partition_sizes=[0]
+01)ProjectionExec: expr=[__common_expr_1@0 AND b@4 = random() as c1, __common_expr_1@0 AND b@4 = 1 + random() as c2, b@4 = 2 + random() AND a@3 = 1 as c3, b@4 = 3 + random() AND a@3 = 1 as c4, __common_expr_2@1 OR b@4 = 4 + random() as c5, __common_expr_2@1 OR b@4 = 5 + random() as c6, b@4 = 6 + random() OR a@3 = 3 as c7, b@4 = 7 + random() OR a@3 = 3 as c8, CASE WHEN __common_expr_3@2 THEN random() ELSE 1 END as c9, CASE WHEN __common_expr_3@2 THEN random() ELSE 2 END as c10, CASE WHEN b@4 = 8 + random() THEN a@3 + 1 ELSE 0 END as c11, CASE WHEN b@4 = 9 + random() THEN a@3 + 1 ELSE 0 END as c12, CASE WHEN b@4 = 10 + random() THEN 0 ELSE a@3 + 2 END as c13, CASE WHEN b@4 = 11 + random() THEN 0 ELSE a@3 + 2 END as c14]
+02)--ProjectionExec: expr=[a@0 = 0 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 = 4 as __common_expr_3, a@0 as a, b@1 as b]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
 
 # Common volatile children of short-circuit expression
 query TT
@@ -171,3 +175,59 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[a@0 = random() AND b@1 = 0 as c1, a@0 = random() AND b@1 = 1 as c2, a@0 = 2 + random() OR b@1 = 4 as c3, a@0 = 2 + random() OR b@1 = 5 as c4, CASE WHEN a@0 = 4 + random() THEN 0 ELSE 1 END as c5, CASE WHEN a@0 = 4 + random() THEN 0 ELSE 2 END as c6]
 02)--MemoryExec: partitions=1, partition_sizes=[0]
+
+# Surely only once but also conditionally evaluated expressions
+query TT
+EXPLAIN SELECT
+    (a = 1 OR random() = 0) AND a = 1 AS c1,
+    (a = 2 AND random() = 0) OR a = 2 AS c2,
+    CASE WHEN a + 3 = 0 THEN a + 3 ELSE 0 END AS c3,
+    CASE WHEN a + 4 = 0 THEN 0 WHEN a + 4 THEN 0 ELSE 0 END AS c4,
+    CASE WHEN a + 5 = 0 THEN 0 WHEN random() = 0 THEN a + 5 ELSE 0 END AS c5,
+    CASE WHEN a + 6 = 0 THEN 0 ELSE a + 6 END AS c6
+FROM t1
+----
+logical_plan
+01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_1 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_2 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_5 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_6 = Float64(0) THEN Float64(0) ELSE __common_expr_6 END AS c6
+02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS __common_expr_4, t1.a + Float64(5) AS __common_expr_5, t1.a + Float64(6) AS __common_expr_6
+03)----TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_1@0 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_2@1 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN __common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 END as c6]
+02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4, a@0 + 5 as __common_expr_5, a@0 + 6 as __common_expr_6]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
+
+# Surely only once but also conditionally evaluated subexpressions
+query TT
+EXPLAIN SELECT
+    (a = 1 OR random() = 0) AND (a = 1 OR random() = 1) AS c1,
+    (a = 2 AND random() = 0) OR (a = 2 AND random() = 1) AS c2,
+    CASE WHEN a + 3 = 0 THEN a + 3 + random() ELSE 0 END AS c3,
+    CASE WHEN a + 4 = 0 THEN 0 ELSE a + 4 + random() END AS c4
+FROM t1
+----
+logical_plan
+01)Projection: (__common_expr_1 OR random() = Float64(0)) AND (__common_expr_1 OR random() = Float64(1)) AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_2 AND random() = Float64(1) AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 + random() ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Float64(0) ELSE __common_expr_4 + random() END AS c4
+02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS __common_expr_4
+03)----TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND (__common_expr_1@0 OR random() = 1) as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_2@1 AND random() = 1 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 + random() ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 ELSE __common_expr_4@3 + random() END as c4]
+02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
+
+# Only conditionally evaluated expressions
+query TT
+EXPLAIN SELECT
+    (random() = 0 OR a = 1) AND a = 1 AS c1,
+    (random() = 0 AND a = 2) OR a = 2 AS c2,
+    CASE WHEN random() = 0 THEN a + 3 ELSE a + 3 END AS c3,
+    CASE WHEN random() = 0 THEN 0 WHEN a + 4 = 0 THEN a + 4 ELSE 0 END AS c4,
+    CASE WHEN random() = 0 THEN 0 WHEN a + 5 = 0 THEN 0 ELSE a + 5 END AS c5,
+    CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a + 6 ELSE a + 6 END AS c6
+FROM t1
+----
+logical_plan
+01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(1) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(2) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6
+02)--TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 1 as c1, random() = 0 AND a@0 = 2 OR a@0 = 2 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 + 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6]
+02)--MemoryExec: partitions=1, partition_sizes=[0]
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index f9baf8db69d5b..95f67245a981e 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1504,21 +1504,25 @@ query TT
 EXPLAIN SELECT y > 0 and 1 / y < 1, x > 0 and y > 0 and 1 / y < 1 / x from t;
 ----
 logical_plan
-01)Projection: t.y > Int32(0) AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y > Int64(0) AND Int64(1) / t.y < Int64(1), t.x > Int32(0) AND t.y > Int32(0) AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x
-02)--TableScan: t projection=[x, y]
+01)Projection: __common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y > Int64(0) AND Int64(1) / t.y < Int64(1), t.x > Int32(0) AND __common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x
+02)--Projection: t.y > Int32(0) AS __common_expr_1, t.x, t.y
+03)----TableScan: t projection=[x, y]
 physical_plan
-01)ProjectionExec: expr=[y@1 > 0 AND 1 / CAST(y@1 AS Int64) < 1 as t.y > Int64(0) AND Int64(1) / t.y < Int64(1), x@0 > 0 AND y@1 > 0 AND 1 / CAST(y@1 AS Int64) < 1 / CAST(x@0 AS Int64) as t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x]
-02)--MemoryExec: partitions=1, partition_sizes=[1]
+01)ProjectionExec: expr=[__common_expr_1@0 AND 1 / CAST(y@2 AS Int64) < 1 as t.y > Int64(0) AND Int64(1) / t.y < Int64(1), x@1 > 0 AND __common_expr_1@0 AND 1 / CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x]
+02)--ProjectionExec: expr=[y@1 > 0 as __common_expr_1, x@0 as x, y@1 as y]
+03)----MemoryExec: partitions=1, partition_sizes=[1]
 
 query TT
 EXPLAIN SELECT y = 0 or 1 / y < 1, x = 0 or y = 0 or 1 / y < 1 / x from t;
 ----
 logical_plan
-01)Projection: t.y = Int32(0) OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y = Int64(0) OR Int64(1) / t.y < Int64(1), t.x = Int32(0) OR t.y = Int32(0) OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x
-02)--TableScan: t projection=[x, y]
+01)Projection: __common_expr_1 OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y = Int64(0) OR Int64(1) / t.y < Int64(1), t.x = Int32(0) OR __common_expr_1 OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x
+02)--Projection: t.y = Int32(0) AS __common_expr_1, t.x, t.y
+03)----TableScan: t projection=[x, y]
 physical_plan
-01)ProjectionExec: expr=[y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 as t.y = Int64(0) OR Int64(1) / t.y < Int64(1), x@0 = 0 OR y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 / CAST(x@0 AS Int64) as t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x]
-02)--MemoryExec: partitions=1, partition_sizes=[1]
+01)ProjectionExec: expr=[__common_expr_1@0 OR 1 / CAST(y@2 AS Int64) < 1 as t.y = Int64(0) OR Int64(1) / t.y < Int64(1), x@1 = 0 OR __common_expr_1@0 OR 1 / CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x]
+02)--ProjectionExec: expr=[y@1 = 0 as __common_expr_1, x@0 as x, y@1 as y]
+03)----MemoryExec: partitions=1, partition_sizes=[1]
 
 # due to the reason describe in https://github.com/apache/datafusion/issues/8927,
 # the following queries will fail
diff --git a/datafusion/sqllogictest/test_files/tpch/q14.slt.part b/datafusion/sqllogictest/test_files/tpch/q14.slt.part
index e56e463a617d7..3743c201ff2e5 100644
--- a/datafusion/sqllogictest/test_files/tpch/q14.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q14.slt.part
@@ -32,9 +32,9 @@ where
   and l_shipdate < date '1995-10-01';
 ----
 logical_plan
-01)Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%")  THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64) AS promo_revenue
-02)--Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN part.p_type LIKE Utf8("PROMO%")  THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
-03)----Projection: lineitem.l_extendedprice, lineitem.l_discount, part.p_type
+01)Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64) AS promo_revenue
+02)--Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN __common_expr_1 ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+03)----Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, part.p_type
 04)------Inner Join: lineitem.l_partkey = part.p_partkey
 05)--------Projection: lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount
 06)----------Filter: lineitem.l_shipdate >= Date32("1995-09-01") AND lineitem.l_shipdate < Date32("1995-10-01")
@@ -44,19 +44,20 @@ physical_plan
 01)ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue]
 02)--AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
 03)----CoalescePartitionsExec
-04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%")  THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_type@4]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
-09)----------------ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------FilterExec: l_shipdate@3 >= 1995-09-01 AND l_shipdate@3 < 1995-10-01
-12)----------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], has_header=false
-13)------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-15)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-16)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_type], has_header=false
+04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+05)--------ProjectionExec: expr=[l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as __common_expr_1, p_type@2 as p_type]
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_type@4]
+08)--------------CoalesceBatchesExec: target_batch_size=8192
+09)----------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
+10)------------------ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
+11)--------------------CoalesceBatchesExec: target_batch_size=8192
+12)----------------------FilterExec: l_shipdate@3 >= 1995-09-01 AND l_shipdate@3 < 1995-10-01
+13)------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], has_header=false
+14)--------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+16)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+17)--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_type], has_header=false
 
 
 

From 1dfac86a89750193491cf3e04917e37b92c64ffa Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Fri, 12 Jul 2024 04:04:42 -0700
Subject: [PATCH 034/357] fix(11397): surface proper errors in ParquetSink
 (#11399)

* fix(11397): do not surface errors for closed channels, and instead let the task join errors be surfaced

* fix(11397): terminate early on channel send failure
---
 .../src/datasource/file_format/parquet.rs     | 32 +++++++++----------
 datafusion/core/tests/memory_limit/mod.rs     |  4 +--
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 694c949285374..6271d8af37862 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -893,12 +893,12 @@ async fn send_arrays_to_col_writers(
     let mut next_channel = 0;
     for (array, field) in rb.columns().iter().zip(schema.fields()) {
         for c in compute_leaves(field, array)? {
-            col_array_channels[next_channel]
-                .send(c)
-                .await
-                .map_err(|_| {
-                    DataFusionError::Internal("Unable to send array to writer!".into())
-                })?;
+            // Do not surface error from closed channel (means something
+            // else hit an error, and the plan is shutting down).
+            if col_array_channels[next_channel].send(c).await.is_err() {
+                return Ok(());
+            }
+
             next_channel += 1;
         }
     }
@@ -984,11 +984,11 @@ fn spawn_parquet_parallel_serialization_task(
                         &pool,
                     );
 
-                    serialize_tx.send(finalize_rg_task).await.map_err(|_| {
-                        DataFusionError::Internal(
-                            "Unable to send closed RG to concat task!".into(),
-                        )
-                    })?;
+                    // Do not surface error from closed channel (means something
+                    // else hit an error, and the plan is shutting down).
+                    if serialize_tx.send(finalize_rg_task).await.is_err() {
+                        return Ok(());
+                    }
 
                     current_rg_rows = 0;
                     rb = rb.slice(rows_left, rb.num_rows() - rows_left);
@@ -1013,11 +1013,11 @@ fn spawn_parquet_parallel_serialization_task(
                 &pool,
             );
 
-            serialize_tx.send(finalize_rg_task).await.map_err(|_| {
-                DataFusionError::Internal(
-                    "Unable to send closed RG to concat task!".into(),
-                )
-            })?;
+            // Do not surface error from closed channel (means something
+            // else hit an error, and the plan is shutting down).
+            if serialize_tx.send(finalize_rg_task).await.is_err() {
+                return Ok(());
+            }
         }
 
         Ok(())
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index f7402357d1c76..7ef24609e238d 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -340,8 +340,8 @@ async fn oom_parquet_sink() {
             path.to_string_lossy()
         ))
         .with_expected_errors(vec![
-            // TODO: update error handling in ParquetSink
-            "Unable to send array to writer!",
+            "Failed to allocate additional",
+            "for ParquetSink(ArrowColumnWriter)",
         ])
         .with_memory_limit(200_000)
         .run()

From 13ddbaf2f7220c26f443d097697d1380e63f6206 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 12 Jul 2024 10:53:58 -0400
Subject: [PATCH 035/357] Minor: Add note about SQLLancer fuzz testing to docs
 (#11430)

* Minor: Add note about SQLLancer fuzz testing to docs

* prettier
---
 docs/source/contributor-guide/testing.md | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/source/contributor-guide/testing.md b/docs/source/contributor-guide/testing.md
index 018cc6233c461..0f4461ab2c2c6 100644
--- a/docs/source/contributor-guide/testing.md
+++ b/docs/source/contributor-guide/testing.md
@@ -39,7 +39,7 @@ DataFusion's SQL implementation is tested using [sqllogictest](https://github.co
 
 Like similar systems such as [DuckDB](https://duckdb.org/dev/testing), DataFusion has chosen to trade off a slightly higher barrier to contribution for longer term maintainability.
 
-### Rust Integration Tests
+## Rust Integration Tests
 
 There are several tests of the public interface of the DataFusion library in the [tests](https://github.com/apache/datafusion/tree/main/datafusion/core/tests) directory.
 
@@ -49,6 +49,18 @@ You can run these tests individually using `cargo` as normal command such as
 cargo test -p datafusion --test parquet_exec
 ```
 
+## SQL "Fuzz" testing
+
+DataFusion uses the [SQLancer] for "fuzz" testing: it generates random SQL
+queries and execute them against DataFusion to find bugs.
+
+The code is in the [datafusion-sqllancer] repository, and we welcome further
+contributions. Kudos to [@2010YOUY01] for the initial implementation.
+
+[sqlancer]: https://github.com/sqlancer/sqlancer
+[datafusion-sqllancer]: https://github.com/datafusion-contrib/datafusion-sqllancer
+[@2010youy01]: https://github.com/2010YOUY01
+
 ## Documentation Examples
 
 We use Rust [doctest] to verify examples from the documentation are correct and

From c769a70dc1c746460b4c1369d4e42c4a78da9571 Mon Sep 17 00:00:00 2001
From: tmi <vojta.tuma@gmail.com>
Date: Fri, 12 Jul 2024 17:52:24 +0200
Subject: [PATCH 036/357] Trivial: use arrow csv writer's timestamp_tz_format
 (#11407)

---
 datafusion/common/src/file_options/csv_writer.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/datafusion/common/src/file_options/csv_writer.rs b/datafusion/common/src/file_options/csv_writer.rs
index 5792cfdba9e0c..ae069079a68f8 100644
--- a/datafusion/common/src/file_options/csv_writer.rs
+++ b/datafusion/common/src/file_options/csv_writer.rs
@@ -63,6 +63,9 @@ impl TryFrom<&CsvOptions> for CsvWriterOptions {
         if let Some(v) = &value.timestamp_format {
             builder = builder.with_timestamp_format(v.into())
         }
+        if let Some(v) = &value.timestamp_tz_format {
+            builder = builder.with_timestamp_tz_format(v.into())
+        }
         if let Some(v) = &value.time_format {
             builder = builder.with_time_format(v.into())
         }

From a2a6458e420209c7125b08966c5726b5fd104195 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 12 Jul 2024 11:53:03 -0400
Subject: [PATCH 037/357] Minor: improve documentation for sql unparsing
 (#11395)

---
 datafusion/sql/src/lib.rs           |  6 ++-
 datafusion/sql/src/unparser/expr.rs | 29 +++++++++----
 datafusion/sql/src/unparser/mod.rs  | 64 +++++++++++++++++++++++++++--
 datafusion/sql/src/unparser/plan.rs | 24 ++++++++---
 4 files changed, 105 insertions(+), 18 deletions(-)

diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs
index eb5fec7a3c8bb..f53cab5df8482 100644
--- a/datafusion/sql/src/lib.rs
+++ b/datafusion/sql/src/lib.rs
@@ -17,7 +17,7 @@
 // Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
 
-//! This module provides:
+//! This crate provides:
 //!
 //! 1. A SQL parser, [`DFParser`], that translates SQL query text into
 //! an abstract syntax tree (AST), [`Statement`].
@@ -25,10 +25,14 @@
 //! 2. A SQL query planner [`SqlToRel`] that creates [`LogicalPlan`]s
 //! from [`Statement`]s.
 //!
+//! 3. A SQL [`unparser`] that converts [`Expr`]s and [`LogicalPlan`]s
+//! into SQL query text.
+//!
 //! [`DFParser`]: parser::DFParser
 //! [`Statement`]: parser::Statement
 //! [`SqlToRel`]: planner::SqlToRel
 //! [`LogicalPlan`]: datafusion_expr::logical_plan::LogicalPlan
+//! [`Expr`]: datafusion_expr::expr::Expr
 
 mod cte;
 mod expr;
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index e0d05c400cb09..eb149c819c8b0 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -72,21 +72,34 @@ impl Display for Unparsed {
     }
 }
 
-/// Convert a DataFusion [`Expr`] to `sqlparser::ast::Expr`
+/// Convert a DataFusion [`Expr`] to [`ast::Expr`]
 ///
-/// This function is the opposite of `SqlToRel::sql_to_expr` and can
-/// be used to, among other things, convert [`Expr`]s to strings.
-/// Throws an error if [`Expr`] can not be represented by an `sqlparser::ast::Expr`
+/// This function is the opposite of [`SqlToRel::sql_to_expr`] and can be used
+/// to, among other things, convert [`Expr`]s to SQL strings. Such strings could
+/// be used to pass filters or other expressions to another SQL engine.
+///
+/// # Errors
+///
+/// Throws an error if [`Expr`] can not be represented by an [`ast::Expr`]
+///
+/// # See Also
+///
+/// * [`Unparser`] for more control over the conversion to SQL
+/// * [`plan_to_sql`] for converting a [`LogicalPlan`] to SQL
 ///
 /// # Example
 /// ```
 /// use datafusion_expr::{col, lit};
 /// use datafusion_sql::unparser::expr_to_sql;
-/// let expr = col("a").gt(lit(4));
-/// let sql = expr_to_sql(&expr).unwrap();
-///
-/// assert_eq!(format!("{}", sql), "(a > 4)")
+/// let expr = col("a").gt(lit(4)); // form an expression `a > 4`
+/// let sql = expr_to_sql(&expr).unwrap(); // convert to ast::Expr
+/// // use the Display impl to convert to SQL text
+/// assert_eq!(sql.to_string(), "(a > 4)")
 /// ```
+///
+/// [`SqlToRel::sql_to_expr`]: crate::planner::SqlToRel::sql_to_expr
+/// [`plan_to_sql`]: crate::unparser::plan_to_sql
+/// [`LogicalPlan`]: datafusion_expr::logical_plan::LogicalPlan
 pub fn expr_to_sql(expr: &Expr) -> Result<ast::Expr> {
     let unparser = Unparser::default();
     unparser.expr_to_sql(expr)
diff --git a/datafusion/sql/src/unparser/mod.rs b/datafusion/sql/src/unparser/mod.rs
index e5ffbc8a212ab..83ae64ba238b0 100644
--- a/datafusion/sql/src/unparser/mod.rs
+++ b/datafusion/sql/src/unparser/mod.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! [`Unparser`] for converting `Expr` to SQL text
+
 mod ast;
 mod expr;
 mod plan;
@@ -27,6 +29,29 @@ pub use plan::plan_to_sql;
 use self::dialect::{DefaultDialect, Dialect};
 pub mod dialect;
 
+/// Convert a DataFusion [`Expr`] to [`sqlparser::ast::Expr`]
+///
+/// See [`expr_to_sql`] for background. `Unparser` allows greater control of
+/// the conversion, but with a more complicated API.
+///
+/// To get more human-readable output, see [`Self::with_pretty`]
+///
+/// # Example
+/// ```
+/// use datafusion_expr::{col, lit};
+/// use datafusion_sql::unparser::Unparser;
+/// let expr = col("a").gt(lit(4)); // form an expression `a > 4`
+/// let unparser = Unparser::default();
+/// let sql = unparser.expr_to_sql(&expr).unwrap();// convert to AST
+/// // use the Display impl to convert to SQL text
+/// assert_eq!(sql.to_string(), "(a > 4)");
+/// // now convert to pretty sql
+/// let unparser = unparser.with_pretty(true);
+/// let sql = unparser.expr_to_sql(&expr).unwrap();
+/// assert_eq!(sql.to_string(), "a > 4"); // note lack of parenthesis
+/// ```
+///
+/// [`Expr`]: datafusion_expr::Expr
 pub struct Unparser<'a> {
     dialect: &'a dyn Dialect,
     pretty: bool,
@@ -40,9 +65,42 @@ impl<'a> Unparser<'a> {
         }
     }
 
-    /// Allow unparser to remove parenthesis according to the precedence rules of DataFusion.
-    /// This might make it invalid SQL for other SQL query engines with different precedence
-    /// rules, even if its valid for DataFusion.
+    /// Create pretty SQL output, better suited for human consumption
+    ///
+    /// See example on the struct level documentation
+    ///
+    /// # Pretty Output
+    ///
+    /// By default, `Unparser` generates SQL text that will parse back to the
+    /// same parsed [`Expr`], which is useful for creating machine readable
+    /// expressions to send to other systems. However, the resulting expressions are
+    /// not always nice to read for humans.
+    ///
+    /// For example
+    ///
+    /// ```sql
+    /// ((a + 4) > 5)
+    /// ```
+    ///
+    /// This method removes parenthesis using to the precedence rules of
+    /// DataFusion. If the output is reparsed, the resulting [`Expr`] produces
+    /// same value as the original in DataFusion, but with a potentially
+    /// different order of operations.
+    ///
+    /// Note that this setting may create invalid SQL for other SQL query
+    /// engines with different precedence rules
+    ///
+    /// # Example
+    /// ```
+    /// use datafusion_expr::{col, lit};
+    /// use datafusion_sql::unparser::Unparser;
+    /// let expr = col("a").gt(lit(4)).and(col("b").lt(lit(5))); // form an expression `a > 4 AND b < 5`
+    /// let unparser = Unparser::default().with_pretty(true);
+    /// let sql = unparser.expr_to_sql(&expr).unwrap();
+    /// assert_eq!(sql.to_string(), "a > 4 AND b < 5"); // note lack of parenthesis
+    /// ```
+    ///
+    /// [`Expr`]: datafusion_expr::Expr
     pub fn with_pretty(mut self, pretty: bool) -> Self {
         self.pretty = pretty;
         self
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 15137403c582d..41a8c968841b3 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -33,10 +33,18 @@ use super::{
     Unparser,
 };
 
-/// Convert a DataFusion [`LogicalPlan`] to `sqlparser::ast::Statement`
+/// Convert a DataFusion [`LogicalPlan`] to [`ast::Statement`]
 ///
-/// This function is the opposite of `SqlToRel::sql_statement_to_plan` and can
-/// be used to, among other things, convert `LogicalPlan`s to strings.
+/// This function is the opposite of [`SqlToRel::sql_statement_to_plan`] and can
+/// be used to, among other things, to convert `LogicalPlan`s to SQL strings.
+///
+/// # Errors
+///
+/// This function returns an error if the plan cannot be converted to SQL.
+///
+/// # See Also
+///
+/// * [`expr_to_sql`] for converting [`Expr`], a single expression to SQL
 ///
 /// # Example
 /// ```
@@ -47,16 +55,20 @@ use super::{
 ///     Field::new("id", DataType::Utf8, false),
 ///     Field::new("value", DataType::Utf8, false),
 /// ]);
+/// // Scan 'table' and select columns 'id' and 'value'
 /// let plan = table_scan(Some("table"), &schema, None)
 ///     .unwrap()
 ///     .project(vec![col("id"), col("value")])
 ///     .unwrap()
 ///     .build()
 ///     .unwrap();
-/// let sql = plan_to_sql(&plan).unwrap();
-///
-/// assert_eq!(format!("{}", sql), "SELECT \"table\".id, \"table\".\"value\" FROM \"table\"")
+/// let sql = plan_to_sql(&plan).unwrap(); // convert to AST
+/// // use the Display impl to convert to SQL text
+/// assert_eq!(sql.to_string(), "SELECT \"table\".id, \"table\".\"value\" FROM \"table\"")
 /// ```
+///
+/// [`SqlToRel::sql_statement_to_plan`]: crate::planner::SqlToRel::sql_statement_to_plan
+/// [`expr_to_sql`]: crate::unparser::expr_to_sql
 pub fn plan_to_sql(plan: &LogicalPlan) -> Result<ast::Statement> {
     let unparser = Unparser::default();
     unparser.plan_to_sql(plan)

From dc21a6c25893e7906da588debf18a8e5918b3b32 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 12 Jul 2024 11:53:44 -0400
Subject: [PATCH 038/357] Minor: Consolidate specificataion doc sections
 (#11427)

---
 docs/source/contributor-guide/index.md           | 16 ----------------
 .../contributor-guide/specification/index.rst    | 10 ++++++++++
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md
index 891277f647570..ad49b614c3341 100644
--- a/docs/source/contributor-guide/index.md
+++ b/docs/source/contributor-guide/index.md
@@ -134,19 +134,3 @@ The good thing about open code and open development is that any issues in one ch
 
 Pull requests will be marked with a `stale` label after 60 days of inactivity and then closed 7 days after that.
 Commenting on the PR will remove the `stale` label.
-
-## Specifications
-
-We formalize some DataFusion semantics and behaviors through specification
-documents. These specifications are useful to be used as references to help
-resolve ambiguities during development or code reviews.
-
-You are also welcome to propose changes to existing specifications or create
-new specifications as you see fit.
-
-Here is the list current active specifications:
-
-- [Output field name semantic](https://datafusion.apache.org/contributor-guide/specification/output-field-name-semantic.html)
-- [Invariants](https://datafusion.apache.org/contributor-guide/specification/invariants.html)
-
-All specifications are stored in the `docs/source/specification` folder.
diff --git a/docs/source/contributor-guide/specification/index.rst b/docs/source/contributor-guide/specification/index.rst
index bcd5a895c4d24..a34f0b19e4dea 100644
--- a/docs/source/contributor-guide/specification/index.rst
+++ b/docs/source/contributor-guide/specification/index.rst
@@ -18,6 +18,16 @@
 Specifications
 ==============
 
+We formalize some DataFusion semantics and behaviors through specification
+documents. These specifications are useful to be used as references to help
+resolve ambiguities during development or code reviews.
+
+You are also welcome to propose changes to existing specifications or create
+new specifications as you see fit. All specifications are stored in the
+`docs/source/specification` folder. Here is the list current active
+specifications:
+
+
 .. toctree::
    :maxdepth: 1
 

From b075ac471e6d27dfe40b6586a72070a9ec4751a9 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 12 Jul 2024 15:27:16 -0400
Subject: [PATCH 039/357] Minor: consolidate doc roadmap pages (#11426)

---
 .../contributor-guide/quarterly_roadmap.md    | 96 -------------------
 docs/source/contributor-guide/roadmap.md      | 81 ++++++++++++++++
 docs/source/index.rst                         |  1 -
 3 files changed, 81 insertions(+), 97 deletions(-)
 delete mode 100644 docs/source/contributor-guide/quarterly_roadmap.md

diff --git a/docs/source/contributor-guide/quarterly_roadmap.md b/docs/source/contributor-guide/quarterly_roadmap.md
deleted file mode 100644
index ee82617225aa6..0000000000000
--- a/docs/source/contributor-guide/quarterly_roadmap.md
+++ /dev/null
@@ -1,96 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Quarterly Roadmap
-
-A quarterly roadmap will be published to give the DataFusion community visibility into the priorities of the projects contributors. This roadmap is not binding.
-
-## 2023 Q4
-
-- Improve data output (`COPY`, `INSERT` and DataFrame) output capability [#6569](https://github.com/apache/datafusion/issues/6569)
-- Implementation of `ARRAY` types and related functions [#6980](https://github.com/apache/datafusion/issues/6980)
-- Write an industrial paper about DataFusion for SIGMOD [#6782](https://github.com/apache/datafusion/issues/6782)
-
-## 2022 Q2
-
-### DataFusion Core
-
-- IO Improvements
-  - Reading, registering, and writing more file formats from both DataFrame API and SQL
-  - Additional options for IO including partitioning and metadata support
-- Work Scheduling
-  - Improve predictability, observability and performance of IO and CPU-bound work
-  - Develop a more explicit story for managing parallelism during plan execution
-- Memory Management
-  - Add more operators for memory limited execution
-- Performance
-  - Incorporate row-format into operators such as aggregate
-  - Add row-format benchmarks
-  - Explore JIT-compiling complex expressions
-  - Explore LLVM for JIT, with inline Rust functions as the primary goal
-  - Improve performance of Sort and Merge using Row Format / JIT expressions
-- Documentation
-  - General improvements to DataFusion website
-  - Publish design documents
-- Streaming
-  - Create `StreamProvider` trait
-
-### Ballista
-
-- Make production ready
-  - Shuffle file cleanup
-  - Fill functional gaps between DataFusion and Ballista
-  - Improve task scheduling and data exchange efficiency
-  - Better error handling
-    - Task failure
-    - Executor lost
-    - Schedule restart
-  - Improve monitoring and logging
-  - Auto scaling support
-- Support for multi-scheduler deployments. Initially for resiliency and fault tolerance but ultimately to support sharding for scalability and more efficient caching.
-- Executor deployment grouping based on resource allocation
-
-### Extensions ([datafusion-contrib](https://github.com/datafusion-contrib))
-
-#### [DataFusion-Python](https://github.com/datafusion-contrib/datafusion-python)
-
-- Add missing functionality to DataFrame and SessionContext
-- Improve documentation
-
-#### [DataFusion-S3](https://github.com/datafusion-contrib/datafusion-objectstore-s3)
-
-- Create Python bindings to use with datafusion-python
-
-#### [DataFusion-Tui](https://github.com/datafusion-contrib/datafusion-tui)
-
-- Create multiple SQL editors
-- Expose more Context and query metadata
-- Support new data sources
-  - BigTable, HDFS, HTTP APIs
-
-#### [DataFusion-BigTable](https://github.com/datafusion-contrib/datafusion-bigtable)
-
-- Python binding to use with datafusion-python
-- Timestamp range predicate pushdown
-- Multi-threaded partition aware execution
-- Production ready Rust SDK
-
-#### [DataFusion-Streams](https://github.com/datafusion-contrib/datafusion-streams)
-
-- Create experimental implementation of `StreamProvider` trait
diff --git a/docs/source/contributor-guide/roadmap.md b/docs/source/contributor-guide/roadmap.md
index a6d78d9311aa4..3d9c1ee371fe6 100644
--- a/docs/source/contributor-guide/roadmap.md
+++ b/docs/source/contributor-guide/roadmap.md
@@ -43,3 +43,84 @@ start a conversation using a github issue or the
 make review efficient and avoid surprises.
 
 [The current list of `EPIC`s can be found here](https://github.com/apache/datafusion/issues?q=is%3Aissue+is%3Aopen+epic).
+
+# Quarterly Roadmap
+
+A quarterly roadmap will be published to give the DataFusion community
+visibility into the priorities of the projects contributors. This roadmap is not
+binding and we would welcome any/all contributions to help keep this list up to
+date.
+
+## 2023 Q4
+
+- Improve data output (`COPY`, `INSERT` and DataFrame) output capability [#6569](https://github.com/apache/datafusion/issues/6569)
+- Implementation of `ARRAY` types and related functions [#6980](https://github.com/apache/datafusion/issues/6980)
+- Write an industrial paper about DataFusion for SIGMOD [#6782](https://github.com/apache/datafusion/issues/6782)
+
+## 2022 Q2
+
+### DataFusion Core
+
+- IO Improvements
+  - Reading, registering, and writing more file formats from both DataFrame API and SQL
+  - Additional options for IO including partitioning and metadata support
+- Work Scheduling
+  - Improve predictability, observability and performance of IO and CPU-bound work
+  - Develop a more explicit story for managing parallelism during plan execution
+- Memory Management
+  - Add more operators for memory limited execution
+- Performance
+  - Incorporate row-format into operators such as aggregate
+  - Add row-format benchmarks
+  - Explore JIT-compiling complex expressions
+  - Explore LLVM for JIT, with inline Rust functions as the primary goal
+  - Improve performance of Sort and Merge using Row Format / JIT expressions
+- Documentation
+  - General improvements to DataFusion website
+  - Publish design documents
+- Streaming
+  - Create `StreamProvider` trait
+
+### Ballista
+
+- Make production ready
+  - Shuffle file cleanup
+  - Fill functional gaps between DataFusion and Ballista
+  - Improve task scheduling and data exchange efficiency
+  - Better error handling
+    - Task failure
+    - Executor lost
+    - Schedule restart
+  - Improve monitoring and logging
+  - Auto scaling support
+- Support for multi-scheduler deployments. Initially for resiliency and fault tolerance but ultimately to support sharding for scalability and more efficient caching.
+- Executor deployment grouping based on resource allocation
+
+### Extensions ([datafusion-contrib](https://github.com/datafusion-contrib))
+
+### [DataFusion-Python](https://github.com/datafusion-contrib/datafusion-python)
+
+- Add missing functionality to DataFrame and SessionContext
+- Improve documentation
+
+### [DataFusion-S3](https://github.com/datafusion-contrib/datafusion-objectstore-s3)
+
+- Create Python bindings to use with datafusion-python
+
+### [DataFusion-Tui](https://github.com/datafusion-contrib/datafusion-tui)
+
+- Create multiple SQL editors
+- Expose more Context and query metadata
+- Support new data sources
+  - BigTable, HDFS, HTTP APIs
+
+### [DataFusion-BigTable](https://github.com/datafusion-contrib/datafusion-bigtable)
+
+- Python binding to use with datafusion-python
+- Timestamp range predicate pushdown
+- Multi-threaded partition aware execution
+- Production ready Rust SDK
+
+### [DataFusion-Streams](https://github.com/datafusion-contrib/datafusion-streams)
+
+- Create experimental implementation of `StreamProvider` trait
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 8fbff208f5617..ca6905c434f35 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -121,7 +121,6 @@ To get started, see
    contributor-guide/testing
    contributor-guide/howtos
    contributor-guide/roadmap
-   contributor-guide/quarterly_roadmap
    contributor-guide/governance
    contributor-guide/inviting
    contributor-guide/specification/index

From d5367f3ff5ed506e824a04c68120194deb68a908 Mon Sep 17 00:00:00 2001
From: Georgi Krastev <georgi.krastev@coralogix.com>
Date: Fri, 12 Jul 2024 22:34:35 +0300
Subject: [PATCH 040/357] Avoid calling shutdown after failed write of
 AsyncWrite (#249) (#250) (#11415)

in `serialize_rb_stream_to_object_store`
---
 .../file_format/write/orchestration.rs        | 27 +++++++++++--------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/write/orchestration.rs b/datafusion/core/src/datasource/file_format/write/orchestration.rs
index a62b5715aeb3b..8bd0dae9f5a48 100644
--- a/datafusion/core/src/datasource/file_format/write/orchestration.rs
+++ b/datafusion/core/src/datasource/file_format/write/orchestration.rs
@@ -42,15 +42,20 @@ use tokio::task::JoinSet;
 type WriterType = Box<dyn AsyncWrite + Send + Unpin>;
 type SerializerType = Arc<dyn BatchSerializer>;
 
-/// Serializes a single data stream in parallel and writes to an ObjectStore
-/// concurrently. Data order is preserved. In the event of an error,
-/// the ObjectStore writer is returned to the caller in addition to an error,
-/// so that the caller may handle aborting failed writes.
+/// Serializes a single data stream in parallel and writes to an ObjectStore concurrently.
+/// Data order is preserved.
+///
+/// In the event of a non-IO error which does not involve the ObjectStore writer,
+/// the writer returned to the caller in addition to the error,
+/// so that failed writes may be aborted.
+///
+/// In the event of an IO error involving the ObjectStore writer,
+/// the writer is dropped to avoid calling further methods on it which might panic.
 pub(crate) async fn serialize_rb_stream_to_object_store(
     mut data_rx: Receiver<RecordBatch>,
     serializer: Arc<dyn BatchSerializer>,
     mut writer: WriterType,
-) -> std::result::Result<(WriterType, u64), (WriterType, DataFusionError)> {
+) -> std::result::Result<(WriterType, u64), (Option<WriterType>, DataFusionError)> {
     let (tx, mut rx) =
         mpsc::channel::<SpawnedTask<Result<(usize, Bytes), DataFusionError>>>(100);
     let serialize_task = SpawnedTask::spawn(async move {
@@ -82,7 +87,7 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
                     Ok(_) => (),
                     Err(e) => {
                         return Err((
-                            writer,
+                            None,
                             DataFusionError::Execution(format!(
                                 "Error writing to object store: {e}"
                             )),
@@ -93,12 +98,12 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
             }
             Ok(Err(e)) => {
                 // Return the writer along with the error
-                return Err((writer, e));
+                return Err((Some(writer), e));
             }
             Err(e) => {
                 // Handle task panic or cancellation
                 return Err((
-                    writer,
+                    Some(writer),
                     DataFusionError::Execution(format!(
                         "Serialization task panicked or was cancelled: {e}"
                     )),
@@ -109,10 +114,10 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
 
     match serialize_task.join().await {
         Ok(Ok(_)) => (),
-        Ok(Err(e)) => return Err((writer, e)),
+        Ok(Err(e)) => return Err((Some(writer), e)),
         Err(_) => {
             return Err((
-                writer,
+                Some(writer),
                 internal_datafusion_err!("Unknown error writing to object store"),
             ))
         }
@@ -153,7 +158,7 @@ pub(crate) async fn stateless_serialize_and_write_files(
                     row_count += cnt;
                 }
                 Err((writer, e)) => {
-                    finished_writers.push(writer);
+                    finished_writers.extend(writer);
                     any_errors = true;
                     triggering_error = Some(e);
                 }

From 02335ebe2dd36081e22ed2d8ab46287c6d950a5c Mon Sep 17 00:00:00 2001
From: kamille <caoruiqiu.crq@antgroup.com>
Date: Sat, 13 Jul 2024 03:50:22 +0800
Subject: [PATCH 041/357] Short term way to make `AggregateStatistics` still
 work when min/max is converted to udaf (#11261)

* impl the short term solution.

* add todos.
---
 .../aggregate_statistics.rs                   | 136 +++++++++++-------
 1 file changed, 85 insertions(+), 51 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
index 7e9aec9e5e4c4..66067d8cb5c42 100644
--- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
@@ -140,31 +140,29 @@ fn take_optimizable_column_and_table_count(
     stats: &Statistics,
 ) -> Option<(ScalarValue, String)> {
     let col_stats = &stats.column_statistics;
-    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() {
-            if let Precision::Exact(num_rows) = stats.num_rows {
-                let exprs = agg_expr.expressions();
-                if exprs.len() == 1 {
-                    // TODO optimize with exprs other than Column
-                    if let Some(col_expr) =
-                        exprs[0].as_any().downcast_ref::<expressions::Column>()
-                    {
-                        let current_val = &col_stats[col_expr.index()].null_count;
-                        if let &Precision::Exact(val) = current_val {
-                            return Some((
-                                ScalarValue::Int64(Some((num_rows - val) as i64)),
-                                agg_expr.name().to_string(),
-                            ));
-                        }
-                    } else if let Some(lit_expr) =
-                        exprs[0].as_any().downcast_ref::<expressions::Literal>()
-                    {
-                        if lit_expr.value() == &COUNT_STAR_EXPANSION {
-                            return Some((
-                                ScalarValue::Int64(Some(num_rows as i64)),
-                                agg_expr.name().to_string(),
-                            ));
-                        }
+    if is_non_distinct_count(agg_expr) {
+        if let Precision::Exact(num_rows) = stats.num_rows {
+            let exprs = agg_expr.expressions();
+            if exprs.len() == 1 {
+                // TODO optimize with exprs other than Column
+                if let Some(col_expr) =
+                    exprs[0].as_any().downcast_ref::<expressions::Column>()
+                {
+                    let current_val = &col_stats[col_expr.index()].null_count;
+                    if let &Precision::Exact(val) = current_val {
+                        return Some((
+                            ScalarValue::Int64(Some((num_rows - val) as i64)),
+                            agg_expr.name().to_string(),
+                        ));
+                    }
+                } else if let Some(lit_expr) =
+                    exprs[0].as_any().downcast_ref::<expressions::Literal>()
+                {
+                    if lit_expr.value() == &COUNT_STAR_EXPANSION {
+                        return Some((
+                            ScalarValue::Int64(Some(num_rows as i64)),
+                            agg_expr.name().to_string(),
+                        ));
                     }
                 }
             }
@@ -182,26 +180,22 @@ fn take_optimizable_min(
         match *num_rows {
             0 => {
                 // MIN/MAX with 0 rows is always null
-                if let Some(casted_expr) =
-                    agg_expr.as_any().downcast_ref::<expressions::Min>()
-                {
+                if is_min(agg_expr) {
                     if let Ok(min_data_type) =
-                        ScalarValue::try_from(casted_expr.field().unwrap().data_type())
+                        ScalarValue::try_from(agg_expr.field().unwrap().data_type())
                     {
-                        return Some((min_data_type, casted_expr.name().to_string()));
+                        return Some((min_data_type, agg_expr.name().to_string()));
                     }
                 }
             }
             value if value > 0 => {
                 let col_stats = &stats.column_statistics;
-                if let Some(casted_expr) =
-                    agg_expr.as_any().downcast_ref::<expressions::Min>()
-                {
-                    if casted_expr.expressions().len() == 1 {
+                if is_min(agg_expr) {
+                    let exprs = agg_expr.expressions();
+                    if exprs.len() == 1 {
                         // TODO optimize with exprs other than Column
-                        if let Some(col_expr) = casted_expr.expressions()[0]
-                            .as_any()
-                            .downcast_ref::<expressions::Column>()
+                        if let Some(col_expr) =
+                            exprs[0].as_any().downcast_ref::<expressions::Column>()
                         {
                             if let Precision::Exact(val) =
                                 &col_stats[col_expr.index()].min_value
@@ -209,7 +203,7 @@ fn take_optimizable_min(
                                 if !val.is_null() {
                                     return Some((
                                         val.clone(),
-                                        casted_expr.name().to_string(),
+                                        agg_expr.name().to_string(),
                                     ));
                                 }
                             }
@@ -232,26 +226,22 @@ fn take_optimizable_max(
         match *num_rows {
             0 => {
                 // MIN/MAX with 0 rows is always null
-                if let Some(casted_expr) =
-                    agg_expr.as_any().downcast_ref::<expressions::Max>()
-                {
+                if is_max(agg_expr) {
                     if let Ok(max_data_type) =
-                        ScalarValue::try_from(casted_expr.field().unwrap().data_type())
+                        ScalarValue::try_from(agg_expr.field().unwrap().data_type())
                     {
-                        return Some((max_data_type, casted_expr.name().to_string()));
+                        return Some((max_data_type, agg_expr.name().to_string()));
                     }
                 }
             }
             value if value > 0 => {
                 let col_stats = &stats.column_statistics;
-                if let Some(casted_expr) =
-                    agg_expr.as_any().downcast_ref::<expressions::Max>()
-                {
-                    if casted_expr.expressions().len() == 1 {
+                if is_max(agg_expr) {
+                    let exprs = agg_expr.expressions();
+                    if exprs.len() == 1 {
                         // TODO optimize with exprs other than Column
-                        if let Some(col_expr) = casted_expr.expressions()[0]
-                            .as_any()
-                            .downcast_ref::<expressions::Column>()
+                        if let Some(col_expr) =
+                            exprs[0].as_any().downcast_ref::<expressions::Column>()
                         {
                             if let Precision::Exact(val) =
                                 &col_stats[col_expr.index()].max_value
@@ -259,7 +249,7 @@ fn take_optimizable_max(
                                 if !val.is_null() {
                                     return Some((
                                         val.clone(),
-                                        casted_expr.name().to_string(),
+                                        agg_expr.name().to_string(),
                                     ));
                                 }
                             }
@@ -273,6 +263,50 @@ fn take_optimizable_max(
     None
 }
 
+// TODO: Move this check into AggregateUDFImpl
+// https://github.com/apache/datafusion/issues/11153
+fn is_non_distinct_count(agg_expr: &dyn AggregateExpr) -> bool {
+    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
+        if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() {
+            return true;
+        }
+    }
+
+    false
+}
+
+// TODO: Move this check into AggregateUDFImpl
+// https://github.com/apache/datafusion/issues/11153
+fn is_min(agg_expr: &dyn AggregateExpr) -> bool {
+    if agg_expr.as_any().is::<expressions::Min>() {
+        return true;
+    }
+
+    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
+        if agg_expr.fun().name() == "min" {
+            return true;
+        }
+    }
+
+    false
+}
+
+// TODO: Move this check into AggregateUDFImpl
+// https://github.com/apache/datafusion/issues/11153
+fn is_max(agg_expr: &dyn AggregateExpr) -> bool {
+    if agg_expr.as_any().is::<expressions::Max>() {
+        return true;
+    }
+
+    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
+        if agg_expr.fun().name() == "max" {
+            return true;
+        }
+    }
+
+    false
+}
+
 #[cfg(test)]
 pub(crate) mod tests {
     use super::*;

From bd25e26747a271752b7f46aa0970022525eff05b Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Fri, 12 Jul 2024 12:51:01 -0700
Subject: [PATCH 042/357] Implement TPCH substrait integration test, support
 tpch_13, tpch_14, tpch_16 (#11405)

optimize code
---
 .../tests/cases/consumer_integration.rs       |   86 +-
 .../tpch_substrait_plans/query_13.json        |  624 +++++++++
 .../tpch_substrait_plans/query_14.json        |  924 +++++++++++++
 .../tpch_substrait_plans/query_16.json        | 1175 +++++++++++++++++
 4 files changed, 2808 insertions(+), 1 deletion(-)
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_13.json
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_14.json
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_16.json

diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs
index 10c1319b903b5..c8130220ef4ae 100644
--- a/datafusion/substrait/tests/cases/consumer_integration.rs
+++ b/datafusion/substrait/tests/cases/consumer_integration.rs
@@ -40,7 +40,6 @@ mod tests {
         }
         Ok(ctx)
     }
-
     #[tokio::test]
     async fn tpch_test_1() -> Result<()> {
         let ctx = create_context(vec![(
@@ -314,4 +313,89 @@ mod tests {
         \n              TableScan: FILENAME_PLACEHOLDER_2 projection=[n_nationkey, n_name, n_regionkey, n_comment]");
         Ok(())
     }
+
+    // missing query 12
+    #[tokio::test]
+    async fn tpch_test_13() -> Result<()> {
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/customer.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/orders.csv"),
+        ])
+        .await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_13.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Projection: count(FILENAME_PLACEHOLDER_1.o_orderkey) AS C_COUNT, count(Int64(1)) AS CUSTDIST\
+        \n  Sort: count(Int64(1)) DESC NULLS FIRST, count(FILENAME_PLACEHOLDER_1.o_orderkey) DESC NULLS FIRST\
+        \n    Projection: count(FILENAME_PLACEHOLDER_1.o_orderkey), count(Int64(1))\
+        \n      Aggregate: groupBy=[[count(FILENAME_PLACEHOLDER_1.o_orderkey)]], aggr=[[count(Int64(1))]]\
+        \n        Projection: count(FILENAME_PLACEHOLDER_1.o_orderkey)\
+        \n          Aggregate: groupBy=[[FILENAME_PLACEHOLDER_0.c_custkey]], aggr=[[count(FILENAME_PLACEHOLDER_1.o_orderkey)]]\
+        \n            Projection: FILENAME_PLACEHOLDER_0.c_custkey, FILENAME_PLACEHOLDER_1.o_orderkey\
+        \n              Left Join: FILENAME_PLACEHOLDER_0.c_custkey = FILENAME_PLACEHOLDER_1.o_custkey Filter: NOT FILENAME_PLACEHOLDER_1.o_comment LIKE CAST(Utf8(\"%special%requests%\") AS Utf8)\
+        \n                TableScan: FILENAME_PLACEHOLDER_0 projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment]\
+        \n                TableScan: FILENAME_PLACEHOLDER_1 projection=[o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment]");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpch_test_14() -> Result<()> {
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/lineitem.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/part.csv"),
+        ])
+        .await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_14.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END) / sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount) AS PROMO_REVENUE\
+        \n  Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END), sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount)]]\
+        \n    Projection: CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_0.l_discount) ELSE Decimal128(Some(0),19,0) END, FILENAME_PLACEHOLDER_0.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_0.l_discount)\
+        \n      Filter: FILENAME_PLACEHOLDER_0.l_partkey = FILENAME_PLACEHOLDER_1.p_partkey AND FILENAME_PLACEHOLDER_0.l_shipdate >= Date32(\"1995-09-01\") AND FILENAME_PLACEHOLDER_0.l_shipdate < CAST(Utf8(\"1995-10-01\") AS Date32)\
+        \n        Inner Join:  Filter: Boolean(true)\
+        \n          TableScan: FILENAME_PLACEHOLDER_0 projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment]\
+        \n          TableScan: FILENAME_PLACEHOLDER_1 projection=[p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment]");
+        Ok(())
+    }
+    // query 15 is missing
+    #[tokio::test]
+    async fn tpch_test_16() -> Result<()> {
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/partsupp.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/part.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/supplier.csv"),
+        ])
+        .await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_16.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_1.p_brand AS P_BRAND, FILENAME_PLACEHOLDER_1.p_type AS P_TYPE, FILENAME_PLACEHOLDER_1.p_size AS P_SIZE, count(DISTINCT FILENAME_PLACEHOLDER_0.ps_suppkey) AS SUPPLIER_CNT\
+        \n  Sort: count(DISTINCT FILENAME_PLACEHOLDER_0.ps_suppkey) DESC NULLS FIRST, FILENAME_PLACEHOLDER_1.p_brand ASC NULLS LAST, FILENAME_PLACEHOLDER_1.p_type ASC NULLS LAST, FILENAME_PLACEHOLDER_1.p_size ASC NULLS LAST\
+        \n    Aggregate: groupBy=[[FILENAME_PLACEHOLDER_1.p_brand, FILENAME_PLACEHOLDER_1.p_type, FILENAME_PLACEHOLDER_1.p_size]], aggr=[[count(DISTINCT FILENAME_PLACEHOLDER_0.ps_suppkey)]]\
+        \n      Projection: FILENAME_PLACEHOLDER_1.p_brand, FILENAME_PLACEHOLDER_1.p_type, FILENAME_PLACEHOLDER_1.p_size, FILENAME_PLACEHOLDER_0.ps_suppkey\
+        \n        Filter: FILENAME_PLACEHOLDER_1.p_partkey = FILENAME_PLACEHOLDER_0.ps_partkey AND FILENAME_PLACEHOLDER_1.p_brand != CAST(Utf8(\"Brand#45\") AS Utf8) AND NOT FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"MEDIUM POLISHED%\") AS Utf8) AND (FILENAME_PLACEHOLDER_1.p_size = Int32(49) OR FILENAME_PLACEHOLDER_1.p_size = Int32(14) OR FILENAME_PLACEHOLDER_1.p_size = Int32(23) OR FILENAME_PLACEHOLDER_1.p_size = Int32(45) OR FILENAME_PLACEHOLDER_1.p_size = Int32(19) OR FILENAME_PLACEHOLDER_1.p_size = Int32(3) OR FILENAME_PLACEHOLDER_1.p_size = Int32(36) OR FILENAME_PLACEHOLDER_1.p_size = Int32(9)) AND NOT CAST(FILENAME_PLACEHOLDER_0.ps_suppkey IN (<subquery>) AS Boolean)\
+        \n          Subquery:\
+        \n            Projection: FILENAME_PLACEHOLDER_2.s_suppkey\
+        \n              Filter: FILENAME_PLACEHOLDER_2.s_comment LIKE CAST(Utf8(\"%Customer%Complaints%\") AS Utf8)\
+        \n                TableScan: FILENAME_PLACEHOLDER_2 projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]\
+        \n          Inner Join:  Filter: Boolean(true)\
+        \n            TableScan: FILENAME_PLACEHOLDER_0 projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment]\
+        \n            TableScan: FILENAME_PLACEHOLDER_1 projection=[p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment]");
+        Ok(())
+    }
 }
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_13.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_13.json
new file mode 100644
index 0000000000000..c88e61e78304e
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_13.json
@@ -0,0 +1,624 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 4,
+      "uri": "/functions_aggregate_generic.yaml"
+    },
+    {
+      "extensionUriAnchor": 1,
+      "uri": "/functions_boolean.yaml"
+    },
+    {
+      "extensionUriAnchor": 3,
+      "uri": "/functions_string.yaml"
+    },
+    {
+      "extensionUriAnchor": 2,
+      "uri": "/functions_comparison.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "and:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 1,
+        "name": "equal:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 2,
+        "name": "not:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 3,
+        "name": "like:vchar_vchar"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 4,
+        "name": "count:opt_any"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 5,
+        "name": "count:opt"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "sort": {
+            "common": {
+              "direct": {
+              }
+            },
+            "input": {
+              "project": {
+                "common": {
+                  "emit": {
+                    "outputMapping": [
+                      2,
+                      3
+                    ]
+                  }
+                },
+                "input": {
+                  "aggregate": {
+                    "common": {
+                      "direct": {
+                      }
+                    },
+                    "input": {
+                      "project": {
+                        "common": {
+                          "emit": {
+                            "outputMapping": [
+                              2
+                            ]
+                          }
+                        },
+                        "input": {
+                          "aggregate": {
+                            "common": {
+                              "direct": {
+                              }
+                            },
+                            "input": {
+                              "project": {
+                                "common": {
+                                  "emit": {
+                                    "outputMapping": [
+                                      17,
+                                      18
+                                    ]
+                                  }
+                                },
+                                "input": {
+                                  "join": {
+                                    "common": {
+                                      "direct": {
+                                      }
+                                    },
+                                    "left": {
+                                      "read": {
+                                        "common": {
+                                          "direct": {
+                                          }
+                                        },
+                                        "baseSchema": {
+                                          "names": [
+                                            "C_CUSTKEY",
+                                            "C_NAME",
+                                            "C_ADDRESS",
+                                            "C_NATIONKEY",
+                                            "C_PHONE",
+                                            "C_ACCTBAL",
+                                            "C_MKTSEGMENT",
+                                            "C_COMMENT"
+                                          ],
+                                          "struct": {
+                                            "types": [
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "varchar": {
+                                                  "length": 25,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "varchar": {
+                                                  "length": 40,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "decimal": {
+                                                  "scale": 0,
+                                                  "precision": 19,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 10,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "varchar": {
+                                                  "length": 117,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }
+                                            ],
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "local_files": {
+                                          "items": [
+                                            {
+                                              "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                                              "parquet": {}
+                                            }
+                                          ]
+                                        }
+                                      }
+                                    },
+                                    "right": {
+                                      "read": {
+                                        "common": {
+                                          "direct": {
+                                          }
+                                        },
+                                        "baseSchema": {
+                                          "names": [
+                                            "O_ORDERKEY",
+                                            "O_CUSTKEY",
+                                            "O_ORDERSTATUS",
+                                            "O_TOTALPRICE",
+                                            "O_ORDERDATE",
+                                            "O_ORDERPRIORITY",
+                                            "O_CLERK",
+                                            "O_SHIPPRIORITY",
+                                            "O_COMMENT"
+                                          ],
+                                          "struct": {
+                                            "types": [
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 1,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "decimal": {
+                                                  "scale": 0,
+                                                  "precision": 19,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "date": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "i32": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "varchar": {
+                                                  "length": 79,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }
+                                            ],
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "local_files": {
+                                          "items": [
+                                            {
+                                              "uri_file": "file://FILENAME_PLACEHOLDER_1",
+                                              "parquet": {}
+                                            }
+                                          ]
+                                        }
+                                      }
+                                    },
+                                    "expression": {
+                                      "scalarFunction": {
+                                        "functionReference": 0,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 1,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 0
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 9
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "scalarFunction": {
+                                                        "functionReference": 3,
+                                                        "args": [],
+                                                        "outputType": {
+                                                          "bool": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        "arguments": [
+                                                          {
+                                                            "value": {
+                                                              "selection": {
+                                                                "directReference": {
+                                                                  "structField": {
+                                                                    "field": 16
+                                                                  }
+                                                                },
+                                                                "rootReference": {
+                                                                }
+                                                              }
+                                                            }
+                                                          },
+                                                          {
+                                                            "value": {
+                                                              "cast": {
+                                                                "type": {
+                                                                  "varchar": {
+                                                                    "length": 79,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                "input": {
+                                                                  "literal": {
+                                                                    "fixedChar": "%special%requests%",
+                                                                    "nullable": false,
+                                                                    "typeVariationReference": 0
+                                                                  }
+                                                                },
+                                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                              }
+                                                            }
+                                                          }
+                                                        ]
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    },
+                                    "type": "JOIN_TYPE_LEFT"
+                                  }
+                                },
+                                "expressions": [
+                                  {
+                                    "selection": {
+                                      "directReference": {
+                                        "structField": {
+                                          "field": 0
+                                        }
+                                      },
+                                      "rootReference": {
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "selection": {
+                                      "directReference": {
+                                        "structField": {
+                                          "field": 8
+                                        }
+                                      },
+                                      "rootReference": {
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            },
+                            "groupings": [
+                              {
+                                "groupingExpressions": [
+                                  {
+                                    "selection": {
+                                      "directReference": {
+                                        "structField": {
+                                          "field": 0
+                                        }
+                                      },
+                                      "rootReference": {
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            ],
+                            "measures": [
+                              {
+                                "measure": {
+                                  "functionReference": 4,
+                                  "args": [],
+                                  "sorts": [],
+                                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                                  "outputType": {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  "invocation": "AGGREGATION_INVOCATION_ALL",
+                                  "arguments": [
+                                    {
+                                      "value": {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 1
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }
+                                    }
+                                  ]
+                                }
+                              }
+                            ]
+                          }
+                        },
+                        "expressions": [
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 1
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        ]
+                      }
+                    },
+                    "groupings": [
+                      {
+                        "groupingExpressions": [
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 0
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        ]
+                      }
+                    ],
+                    "measures": [
+                      {
+                        "measure": {
+                          "functionReference": 5,
+                          "args": [],
+                          "sorts": [],
+                          "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                          "outputType": {
+                            "i64": {
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_REQUIRED"
+                            }
+                          },
+                          "invocation": "AGGREGATION_INVOCATION_ALL",
+                          "arguments": []
+                        }
+                      }
+                    ]
+                  }
+                },
+                "expressions": [
+                  {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 0
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  },
+                  {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 1
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }
+                ]
+              }
+            },
+            "sorts": [
+              {
+                "expr": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 1
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                },
+                "direction": "SORT_DIRECTION_DESC_NULLS_FIRST"
+              },
+              {
+                "expr": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 0
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                },
+                "direction": "SORT_DIRECTION_DESC_NULLS_FIRST"
+              }
+            ]
+          }
+        },
+        "names": [
+          "C_COUNT",
+          "CUSTDIST"
+        ]
+      }
+    }
+  ],
+  "expectedTypeUrls": []
+}
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_14.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_14.json
new file mode 100644
index 0000000000000..380b71df8aacc
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_14.json
@@ -0,0 +1,924 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 1,
+      "uri": "/functions_boolean.yaml"
+    },
+    {
+      "extensionUriAnchor": 4,
+      "uri": "/functions_string.yaml"
+    },
+    {
+      "extensionUriAnchor": 5,
+      "uri": "/functions_arithmetic_decimal.yaml"
+    },
+    {
+      "extensionUriAnchor": 3,
+      "uri": "/functions_datetime.yaml"
+    },
+    {
+      "extensionUriAnchor": 2,
+      "uri": "/functions_comparison.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "and:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 1,
+        "name": "equal:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 2,
+        "name": "gte:date_date"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 3,
+        "name": "lt:date_date"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 4,
+        "name": "like:vchar_vchar"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 5,
+        "functionAnchor": 5,
+        "name": "multiply:opt_decimal_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 5,
+        "functionAnchor": 6,
+        "name": "subtract:opt_decimal_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 5,
+        "functionAnchor": 7,
+        "name": "sum:opt_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 5,
+        "functionAnchor": 8,
+        "name": "divide:opt_decimal_decimal"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "project": {
+            "common": {
+              "emit": {
+                "outputMapping": [
+                  2
+                ]
+              }
+            },
+            "input": {
+              "aggregate": {
+                "common": {
+                  "direct": {
+                  }
+                },
+                "input": {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          25,
+                          26
+                        ]
+                      }
+                    },
+                    "input": {
+                      "filter": {
+                        "common": {
+                          "direct": {
+                          }
+                        },
+                        "input": {
+                          "join": {
+                            "common": {
+                              "direct": {
+                              }
+                            },
+                            "left": {
+                              "read": {
+                                "common": {
+                                  "direct": {
+                                  }
+                                },
+                                "baseSchema": {
+                                  "names": [
+                                    "L_ORDERKEY",
+                                    "L_PARTKEY",
+                                    "L_SUPPKEY",
+                                    "L_LINENUMBER",
+                                    "L_QUANTITY",
+                                    "L_EXTENDEDPRICE",
+                                    "L_DISCOUNT",
+                                    "L_TAX",
+                                    "L_RETURNFLAG",
+                                    "L_LINESTATUS",
+                                    "L_SHIPDATE",
+                                    "L_COMMITDATE",
+                                    "L_RECEIPTDATE",
+                                    "L_SHIPINSTRUCT",
+                                    "L_SHIPMODE",
+                                    "L_COMMENT"
+                                  ],
+                                  "struct": {
+                                    "types": [
+                                      {
+                                        "i64": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      },
+                                      {
+                                        "i64": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      },
+                                      {
+                                        "i64": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      },
+                                      {
+                                        "i32": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "decimal": {
+                                          "scale": 0,
+                                          "precision": 19,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "decimal": {
+                                          "scale": 0,
+                                          "precision": 19,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "decimal": {
+                                          "scale": 0,
+                                          "precision": 19,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "decimal": {
+                                          "scale": 0,
+                                          "precision": 19,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "fixedChar": {
+                                          "length": 1,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "fixedChar": {
+                                          "length": 1,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "date": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "date": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "date": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "fixedChar": {
+                                          "length": 25,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "fixedChar": {
+                                          "length": 10,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "varchar": {
+                                          "length": 44,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      }
+                                    ],
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_REQUIRED"
+                                  }
+                                },
+                                "local_files": {
+                                  "items": [
+                                    {
+                                      "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                                      "parquet": {}
+                                    }
+                                  ]
+                                }
+                              }
+                            },
+                            "right": {
+                              "read": {
+                                "common": {
+                                  "direct": {
+                                  }
+                                },
+                                "baseSchema": {
+                                  "names": [
+                                    "P_PARTKEY",
+                                    "P_NAME",
+                                    "P_MFGR",
+                                    "P_BRAND",
+                                    "P_TYPE",
+                                    "P_SIZE",
+                                    "P_CONTAINER",
+                                    "P_RETAILPRICE",
+                                    "P_COMMENT"
+                                  ],
+                                  "struct": {
+                                    "types": [
+                                      {
+                                        "i64": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      },
+                                      {
+                                        "varchar": {
+                                          "length": 55,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "fixedChar": {
+                                          "length": 25,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "fixedChar": {
+                                          "length": 10,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "varchar": {
+                                          "length": 25,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "i32": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "fixedChar": {
+                                          "length": 10,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "decimal": {
+                                          "scale": 0,
+                                          "precision": 19,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "varchar": {
+                                          "length": 23,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      }
+                                    ],
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_REQUIRED"
+                                  }
+                                },
+                                "local_files": {
+                                  "items": [
+                                    {
+                                      "uri_file": "file://FILENAME_PLACEHOLDER_1",
+                                      "parquet": {}
+                                    }
+                                  ]
+                                }
+                              }
+                            },
+                            "expression": {
+                              "literal": {
+                                "boolean": true,
+                                "nullable": false,
+                                "typeVariationReference": 0
+                              }
+                            },
+                            "type": "JOIN_TYPE_INNER"
+                          }
+                        },
+                        "condition": {
+                          "scalarFunction": {
+                            "functionReference": 0,
+                            "args": [],
+                            "outputType": {
+                              "bool": {
+                                "typeVariationReference": 0,
+                                "nullability": "NULLABILITY_NULLABLE"
+                              }
+                            },
+                            "arguments": [
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 1,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_REQUIRED"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "selection": {
+                                            "directReference": {
+                                              "structField": {
+                                                "field": 1
+                                              }
+                                            },
+                                            "rootReference": {
+                                            }
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "selection": {
+                                            "directReference": {
+                                              "structField": {
+                                                "field": 16
+                                              }
+                                            },
+                                            "rootReference": {
+                                            }
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              },
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 2,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_NULLABLE"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "selection": {
+                                            "directReference": {
+                                              "structField": {
+                                                "field": 10
+                                              }
+                                            },
+                                            "rootReference": {
+                                            }
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "literal": {
+                                            "date": 9374,
+                                            "nullable": false,
+                                            "typeVariationReference": 0
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              },
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 3,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_NULLABLE"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "selection": {
+                                            "directReference": {
+                                              "structField": {
+                                                "field": 10
+                                              }
+                                            },
+                                            "rootReference": {
+                                            }
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "cast": {
+                                            "type": {
+                                              "date": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_REQUIRED"
+                                              }
+                                            },
+                                            "input": {
+                                              "literal": {
+                                                "fixedChar": "1995-10-01",
+                                                "nullable": false,
+                                                "typeVariationReference": 0
+                                              }
+                                            },
+                                            "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              }
+                            ]
+                          }
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "ifThen": {
+                          "ifs": [
+                            {
+                              "if": {
+                                "scalarFunction": {
+                                  "functionReference": 4,
+                                  "args": [],
+                                  "outputType": {
+                                    "bool": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  "arguments": [
+                                    {
+                                      "value": {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 20
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }
+                                    },
+                                    {
+                                      "value": {
+                                        "cast": {
+                                          "type": {
+                                            "varchar": {
+                                              "length": 25,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          "input": {
+                                            "literal": {
+                                              "fixedChar": "PROMO%",
+                                              "nullable": false,
+                                              "typeVariationReference": 0
+                                            }
+                                          },
+                                          "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                        }
+                                      }
+                                    }
+                                  ]
+                                }
+                              },
+                              "then": {
+                                "scalarFunction": {
+                                  "functionReference": 5,
+                                  "args": [],
+                                  "outputType": {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  "arguments": [
+                                    {
+                                      "value": {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 5
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }
+                                    },
+                                    {
+                                      "value": {
+                                        "scalarFunction": {
+                                          "functionReference": 6,
+                                          "args": [],
+                                          "outputType": {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          "arguments": [
+                                            {
+                                              "value": {
+                                                "cast": {
+                                                  "type": {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  "input": {
+                                                    "literal": {
+                                                      "i32": 1,
+                                                      "nullable": false,
+                                                      "typeVariationReference": 0
+                                                    }
+                                                  },
+                                                  "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                }
+                                              }
+                                            },
+                                            {
+                                              "value": {
+                                                "selection": {
+                                                  "directReference": {
+                                                    "structField": {
+                                                      "field": 6
+                                                    }
+                                                  },
+                                                  "rootReference": {
+                                                  }
+                                                }
+                                              }
+                                            }
+                                          ]
+                                        }
+                                      }
+                                    }
+                                  ]
+                                }
+                              }
+                            }
+                          ],
+                          "else": {
+                            "literal": {
+                              "decimal": {
+                                "value": "AAAAAAAAAAAAAAAAAAAAAA==",
+                                "precision": 19,
+                                "scale": 0
+                              },
+                              "nullable": false,
+                              "typeVariationReference": 0
+                            }
+                          }
+                        }
+                      },
+                      {
+                        "scalarFunction": {
+                          "functionReference": 5,
+                          "args": [],
+                          "outputType": {
+                            "decimal": {
+                              "scale": 0,
+                              "precision": 19,
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          },
+                          "arguments": [
+                            {
+                              "value": {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 5
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              }
+                            },
+                            {
+                              "value": {
+                                "scalarFunction": {
+                                  "functionReference": 6,
+                                  "args": [],
+                                  "outputType": {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  "arguments": [
+                                    {
+                                      "value": {
+                                        "cast": {
+                                          "type": {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          "input": {
+                                            "literal": {
+                                              "i32": 1,
+                                              "nullable": false,
+                                              "typeVariationReference": 0
+                                            }
+                                          },
+                                          "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                        }
+                                      }
+                                    },
+                                    {
+                                      "value": {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 6
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }
+                                    }
+                                  ]
+                                }
+                              }
+                            }
+                          ]
+                        }
+                      }
+                    ]
+                  }
+                },
+                "groupings": [
+                  {
+                    "groupingExpressions": []
+                  }
+                ],
+                "measures": [
+                  {
+                    "measure": {
+                      "functionReference": 7,
+                      "args": [],
+                      "sorts": [],
+                      "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                      "outputType": {
+                        "decimal": {
+                          "scale": 0,
+                          "precision": 19,
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      "invocation": "AGGREGATION_INVOCATION_ALL",
+                      "arguments": [
+                        {
+                          "value": {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 0
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        }
+                      ]
+                    }
+                  },
+                  {
+                    "measure": {
+                      "functionReference": 7,
+                      "args": [],
+                      "sorts": [],
+                      "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                      "outputType": {
+                        "decimal": {
+                          "scale": 0,
+                          "precision": 19,
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      "invocation": "AGGREGATION_INVOCATION_ALL",
+                      "arguments": [
+                        {
+                          "value": {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 1
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        }
+                      ]
+                    }
+                  }
+                ]
+              }
+            },
+            "expressions": [
+              {
+                "scalarFunction": {
+                  "functionReference": 8,
+                  "args": [],
+                  "outputType": {
+                    "decimal": {
+                      "scale": 2,
+                      "precision": 19,
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  },
+                  "arguments": [
+                    {
+                      "value": {
+                        "scalarFunction": {
+                          "functionReference": 5,
+                          "args": [],
+                          "outputType": {
+                            "decimal": {
+                              "scale": 2,
+                              "precision": 19,
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          },
+                          "arguments": [
+                            {
+                              "value": {
+                                "literal": {
+                                  "decimal": {
+                                    "value": "ECcAAAAAAAAAAAAAAAAAAA==",
+                                    "precision": 5,
+                                    "scale": 2
+                                  },
+                                  "nullable": false,
+                                  "typeVariationReference": 0
+                                }
+                              }
+                            },
+                            {
+                              "value": {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 0
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              }
+                            }
+                          ]
+                        }
+                      }
+                    },
+                    {
+                      "value": {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 1
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+            ]
+          }
+        },
+        "names": [
+          "PROMO_REVENUE"
+        ]
+      }
+    }
+  ],
+  "expectedTypeUrls": []
+}
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_16.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_16.json
new file mode 100644
index 0000000000000..f988aa7a76a26
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_16.json
@@ -0,0 +1,1175 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 4,
+      "uri": "/functions_aggregate_generic.yaml"
+    },
+    {
+      "extensionUriAnchor": 1,
+      "uri": "/functions_boolean.yaml"
+    },
+    {
+      "extensionUriAnchor": 3,
+      "uri": "/functions_string.yaml"
+    },
+    {
+      "extensionUriAnchor": 2,
+      "uri": "/functions_comparison.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "and:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 1,
+        "name": "equal:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 2,
+        "name": "not_equal:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 3,
+        "name": "not:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 4,
+        "name": "like:vchar_vchar"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 5,
+        "name": "or:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 6,
+        "name": "count:opt_any"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "sort": {
+            "common": {
+              "direct": {
+              }
+            },
+            "input": {
+              "aggregate": {
+                "common": {
+                  "direct": {
+                  }
+                },
+                "input": {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          14,
+                          15,
+                          16,
+                          17
+                        ]
+                      }
+                    },
+                    "input": {
+                      "filter": {
+                        "common": {
+                          "direct": {
+                          }
+                        },
+                        "input": {
+                          "join": {
+                            "common": {
+                              "direct": {
+                              }
+                            },
+                            "left": {
+                              "read": {
+                                "common": {
+                                  "direct": {
+                                  }
+                                },
+                                "baseSchema": {
+                                  "names": [
+                                    "PS_PARTKEY",
+                                    "PS_SUPPKEY",
+                                    "PS_AVAILQTY",
+                                    "PS_SUPPLYCOST",
+                                    "PS_COMMENT"
+                                  ],
+                                  "struct": {
+                                    "types": [
+                                      {
+                                        "i64": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      },
+                                      {
+                                        "i64": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      },
+                                      {
+                                        "i32": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "decimal": {
+                                          "scale": 0,
+                                          "precision": 19,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "varchar": {
+                                          "length": 199,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      }
+                                    ],
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_REQUIRED"
+                                  }
+                                },
+                                "local_files": {
+                                  "items": [
+                                    {
+                                      "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                                      "parquet": {}
+                                    }
+                                  ]
+                                }
+                              }
+                            },
+                            "right": {
+                              "read": {
+                                "common": {
+                                  "direct": {
+                                  }
+                                },
+                                "baseSchema": {
+                                  "names": [
+                                    "P_PARTKEY",
+                                    "P_NAME",
+                                    "P_MFGR",
+                                    "P_BRAND",
+                                    "P_TYPE",
+                                    "P_SIZE",
+                                    "P_CONTAINER",
+                                    "P_RETAILPRICE",
+                                    "P_COMMENT"
+                                  ],
+                                  "struct": {
+                                    "types": [
+                                      {
+                                        "i64": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      },
+                                      {
+                                        "varchar": {
+                                          "length": 55,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "fixedChar": {
+                                          "length": 25,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "fixedChar": {
+                                          "length": 10,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "varchar": {
+                                          "length": 25,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "i32": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "fixedChar": {
+                                          "length": 10,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "decimal": {
+                                          "scale": 0,
+                                          "precision": 19,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      {
+                                        "varchar": {
+                                          "length": 23,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      }
+                                    ],
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_REQUIRED"
+                                  }
+                                },
+                                "local_files": {
+                                  "items": [
+                                    {
+                                      "uri_file": "file://FILENAME_PLACEHOLDER_1",
+                                      "parquet": {}
+                                    }
+                                  ]
+                                }
+                              }
+                            },
+                            "expression": {
+                              "literal": {
+                                "boolean": true,
+                                "nullable": false,
+                                "typeVariationReference": 0
+                              }
+                            },
+                            "type": "JOIN_TYPE_INNER"
+                          }
+                        },
+                        "condition": {
+                          "scalarFunction": {
+                            "functionReference": 0,
+                            "args": [],
+                            "outputType": {
+                              "bool": {
+                                "typeVariationReference": 0,
+                                "nullability": "NULLABILITY_NULLABLE"
+                              }
+                            },
+                            "arguments": [
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 1,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_REQUIRED"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "selection": {
+                                            "directReference": {
+                                              "structField": {
+                                                "field": 5
+                                              }
+                                            },
+                                            "rootReference": {
+                                            }
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "selection": {
+                                            "directReference": {
+                                              "structField": {
+                                                "field": 0
+                                              }
+                                            },
+                                            "rootReference": {
+                                            }
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              },
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 2,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_NULLABLE"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "selection": {
+                                            "directReference": {
+                                              "structField": {
+                                                "field": 8
+                                              }
+                                            },
+                                            "rootReference": {
+                                            }
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "cast": {
+                                            "type": {
+                                              "fixedChar": {
+                                                "length": 10,
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_REQUIRED"
+                                              }
+                                            },
+                                            "input": {
+                                              "literal": {
+                                                "fixedChar": "Brand#45",
+                                                "nullable": false,
+                                                "typeVariationReference": 0
+                                              }
+                                            },
+                                            "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              },
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 3,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_NULLABLE"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 4,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 9
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "cast": {
+                                                    "type": {
+                                                      "varchar": {
+                                                        "length": 25,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "input": {
+                                                      "literal": {
+                                                        "fixedChar": "MEDIUM POLISHED%",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    },
+                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              },
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 5,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_NULLABLE"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 10
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "literal": {
+                                                    "i32": 49,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 10
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "literal": {
+                                                    "i32": 14,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 10
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "literal": {
+                                                    "i32": 23,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 10
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "literal": {
+                                                    "i32": 45,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 10
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "literal": {
+                                                    "i32": 19,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 10
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "literal": {
+                                                    "i32": 3,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 10
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "literal": {
+                                                    "i32": 36,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 1,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 10
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "literal": {
+                                                    "i32": 9,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              },
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 3,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_REQUIRED"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "cast": {
+                                            "type": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_REQUIRED"
+                                              }
+                                            },
+                                            "input": {
+                                              "subquery": {
+                                                "inPredicate": {
+                                                  "needles": [
+                                                    {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 1
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  ],
+                                                  "haystack": {
+                                                    "project": {
+                                                      "common": {
+                                                        "emit": {
+                                                          "outputMapping": [
+                                                            7
+                                                          ]
+                                                        }
+                                                      },
+                                                      "input": {
+                                                        "filter": {
+                                                          "common": {
+                                                            "direct": {
+                                                            }
+                                                          },
+                                                          "input": {
+                                                            "read": {
+                                                              "common": {
+                                                                "direct": {
+                                                                }
+                                                              },
+                                                              "baseSchema": {
+                                                                "names": [
+                                                                  "S_SUPPKEY",
+                                                                  "S_NAME",
+                                                                  "S_ADDRESS",
+                                                                  "S_NATIONKEY",
+                                                                  "S_PHONE",
+                                                                  "S_ACCTBAL",
+                                                                  "S_COMMENT"
+                                                                ],
+                                                                "struct": {
+                                                                  "types": [
+                                                                    {
+                                                                      "i64": {
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "fixedChar": {
+                                                                        "length": 25,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "varchar": {
+                                                                        "length": 40,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "i64": {
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "fixedChar": {
+                                                                        "length": 15,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "decimal": {
+                                                                        "scale": 0,
+                                                                        "precision": 19,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "varchar": {
+                                                                        "length": 101,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    }
+                                                                  ],
+                                                                  "typeVariationReference": 0,
+                                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                                }
+                                                              },
+                                                              "local_files": {
+                                                                "items": [
+                                                                  {
+                                                                    "uri_file": "file://FILENAME_PLACEHOLDER_2",
+                                                                    "parquet": {}
+                                                                  }
+                                                                ]
+                                                              }
+                                                            }
+                                                          },
+                                                          "condition": {
+                                                            "scalarFunction": {
+                                                              "functionReference": 4,
+                                                              "args": [],
+                                                              "outputType": {
+                                                                "bool": {
+                                                                  "typeVariationReference": 0,
+                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                }
+                                                              },
+                                                              "arguments": [
+                                                                {
+                                                                  "value": {
+                                                                    "selection": {
+                                                                      "directReference": {
+                                                                        "structField": {
+                                                                          "field": 6
+                                                                        }
+                                                                      },
+                                                                      "rootReference": {
+                                                                      }
+                                                                    }
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "value": {
+                                                                    "cast": {
+                                                                      "type": {
+                                                                        "varchar": {
+                                                                          "length": 101,
+                                                                          "typeVariationReference": 0,
+                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                        }
+                                                                      },
+                                                                      "input": {
+                                                                        "literal": {
+                                                                          "fixedChar": "%Customer%Complaints%",
+                                                                          "nullable": false,
+                                                                          "typeVariationReference": 0
+                                                                        }
+                                                                      },
+                                                                      "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                    }
+                                                                  }
+                                                                }
+                                                              ]
+                                                            }
+                                                          }
+                                                        }
+                                                      },
+                                                      "expressions": [
+                                                        {
+                                                          "selection": {
+                                                            "directReference": {
+                                                              "structField": {
+                                                                "field": 0
+                                                              }
+                                                            },
+                                                            "rootReference": {
+                                                            }
+                                                          }
+                                                        }
+                                                      ]
+                                                    }
+                                                  }
+                                                }
+                                              }
+                                            },
+                                            "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              }
+                            ]
+                          }
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 8
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 9
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 10
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 1
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      }
+                    ]
+                  }
+                },
+                "groupings": [
+                  {
+                    "groupingExpressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 0
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 1
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 2
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      }
+                    ]
+                  }
+                ],
+                "measures": [
+                  {
+                    "measure": {
+                      "functionReference": 6,
+                      "args": [],
+                      "sorts": [],
+                      "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                      "outputType": {
+                        "i64": {
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_REQUIRED"
+                        }
+                      },
+                      "invocation": "AGGREGATION_INVOCATION_DISTINCT",
+                      "arguments": [
+                        {
+                          "value": {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 3
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        }
+                      ]
+                    }
+                  }
+                ]
+              }
+            },
+            "sorts": [
+              {
+                "expr": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 3
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                },
+                "direction": "SORT_DIRECTION_DESC_NULLS_FIRST"
+              },
+              {
+                "expr": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 0
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                },
+                "direction": "SORT_DIRECTION_ASC_NULLS_LAST"
+              },
+              {
+                "expr": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 1
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                },
+                "direction": "SORT_DIRECTION_ASC_NULLS_LAST"
+              },
+              {
+                "expr": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 2
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                },
+                "direction": "SORT_DIRECTION_ASC_NULLS_LAST"
+              }
+            ]
+          }
+        },
+        "names": [
+          "P_BRAND",
+          "P_TYPE",
+          "P_SIZE",
+          "SUPPLIER_CNT"
+        ]
+      }
+    }
+  ],
+  "expectedTypeUrls": []
+}

From 4d04a6ebbb0458495d2282df34e8b22001f3971d Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 12 Jul 2024 15:51:44 -0400
Subject: [PATCH 043/357] Minor: fix labeler rules (#11428)

---
 .github/workflows/dev_pr/labeler.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index 34a37948785b5..308abd1688a6d 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -17,11 +17,11 @@
 
 development-process:
 - changed-files:
-  - any-glob-to-any-file: ['dev/**.*', '.github/**.*', 'ci/**.*', '.asf.yaml']
+  - any-glob-to-any-file: ['dev/**/*', '.github/**/*', 'ci/**/*', '.asf.yaml']
 
 documentation:
 - changed-files:
-  - any-glob-to-any-file: ['docs/**.*', 'README.md', './**/README.md', 'DEVELOPERS.md', 'datafusion/docs/**.*']
+  - any-glob-to-any-file: ['docs/**/*', 'README.md', './**/README.md', 'DEVELOPERS.md', 'datafusion/docs/**/*']
 
 sql:
 - changed-files:

From 8f8df07c80aa66bb94d57c9619be93f9c3be92a9 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 12 Jul 2024 23:14:17 -0400
Subject: [PATCH 044/357] Minor: change internal error to not supported error
 for nested field access (#11446)

---
 datafusion/sql/src/expr/identifier.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs
index d297b2e4df5b3..39736b1fbba59 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -18,8 +18,8 @@
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use arrow_schema::Field;
 use datafusion_common::{
-    internal_err, plan_datafusion_err, Column, DFSchema, DataFusionError, Result,
-    ScalarValue, TableReference,
+    internal_err, not_impl_err, plan_datafusion_err, Column, DFSchema, DataFusionError,
+    Result, ScalarValue, TableReference,
 };
 use datafusion_expr::{expr::ScalarFunction, lit, Case, Expr};
 use sqlparser::ast::{Expr as SQLExpr, Ident};
@@ -118,7 +118,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             // Though ideally once that support is in place, this code should work with it
             // TODO: remove when can support multiple nested identifiers
             if ids.len() > 5 {
-                return internal_err!("Unsupported compound identifier: {ids:?}");
+                return not_impl_err!("Compound identifier: {ids:?}");
             }
 
             let search_result = search_dfschema(&ids, schema);
@@ -127,7 +127,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 Some((field, qualifier, nested_names)) if !nested_names.is_empty() => {
                     // TODO: remove when can support multiple nested identifiers
                     if nested_names.len() > 1 {
-                        return internal_err!(
+                        return not_impl_err!(
                             "Nested identifiers not yet supported for column {}",
                             Column::from((qualifier, field)).quoted_flat_name()
                         );
@@ -154,7 +154,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     // return default where use all identifiers to not have a nested field
                     // this len check is because at 5 identifiers will have to have a nested field
                     if ids.len() == 5 {
-                        internal_err!("Unsupported compound identifier: {ids:?}")
+                        not_impl_err!("compound identifier: {ids:?}")
                     } else {
                         // check the outer_query_schema and try to find a match
                         if let Some(outer) = planner_context.outer_query_schema() {
@@ -165,7 +165,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                                     if !nested_names.is_empty() =>
                                 {
                                     // TODO: remove when can support nested identifiers for OuterReferenceColumn
-                                    internal_err!(
+                                    not_impl_err!(
                                         "Nested identifiers are not yet supported for OuterReferenceColumn {}",
                                         Column::from((qualifier, field)).quoted_flat_name()
                                     )

From 9e4a4a1599b9def33f27a6f82dd32045038de296 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 13 Jul 2024 05:33:51 -0400
Subject: [PATCH 045/357] Minor: change Datafusion --> DataFusion in docs
 (#11439)

* Minor: change Datafusion --> DataFusion in docs

* update expected
---
 datafusion-examples/README.md                             | 4 ++--
 datafusion-examples/examples/expr_api.rs                  | 2 +-
 datafusion/common/src/config.rs                           | 2 +-
 datafusion/core/src/dataframe/mod.rs                      | 2 +-
 datafusion/expr/src/signature.rs                          | 2 +-
 datafusion/optimizer/src/unwrap_cast_in_comparison.rs     | 2 +-
 datafusion/physical-expr/src/intervals/cp_solver.rs       | 2 +-
 datafusion/physical-plan/src/aggregates/mod.rs            | 2 +-
 datafusion/sql/src/parser.rs                              | 2 +-
 datafusion/sqllogictest/README.md                         | 2 +-
 datafusion/sqllogictest/test_files/information_schema.slt | 2 +-
 datafusion/sqllogictest/test_files/window.slt             | 6 +++---
 docs/source/contributor-guide/inviting.md                 | 2 +-
 docs/source/user-guide/configs.md                         | 2 +-
 14 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index 2696f74775cf3..da01f60b527d9 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -71,8 +71,8 @@ cargo run --example dataframe
 - [`parquet_index.rs`](examples/parquet_index.rs): Create an secondary index over several parquet files and use it to speed up queries
 - [`parquet_sql_multiple_files.rs`](examples/parquet_sql_multiple_files.rs): Build and run a query plan from a SQL statement against multiple local Parquet files
 - [`parquet_exec_visitor.rs`](examples/parquet_exec_visitor.rs): Extract statistics by visiting an ExecutionPlan after execution
-- [`parse_sql_expr.rs`](examples/parse_sql_expr.rs): Parse SQL text into Datafusion `Expr`.
-- [`plan_to_sql.rs`](examples/plan_to_sql.rs): Generate SQL from Datafusion `Expr` and `LogicalPlan`
+- [`parse_sql_expr.rs`](examples/parse_sql_expr.rs): Parse SQL text into DataFusion `Expr`.
+- [`plan_to_sql.rs`](examples/plan_to_sql.rs): Generate SQL from DataFusion `Expr` and `LogicalPlan`
 - [`pruning.rs`](examples/pruning.rs): Use pruning to rule out files based on statistics
 - [`query-aws-s3.rs`](examples/external_dependency/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3
 - [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` and run a query against files vi HTTP
diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs
index 43729a913e5d8..a5cf7011f8113 100644
--- a/datafusion-examples/examples/expr_api.rs
+++ b/datafusion-examples/examples/expr_api.rs
@@ -83,7 +83,7 @@ async fn main() -> Result<()> {
     Ok(())
 }
 
-/// Datafusion's `expr_fn` API makes it easy to create [`Expr`]s for the
+/// DataFusion's `expr_fn` API makes it easy to create [`Expr`]s for the
 /// full range of expression types such as aggregates and window functions.
 fn expr_fn_demo() -> Result<()> {
     // Let's say you want to call the "first_value" aggregate function
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 1d2a9589adfc6..880f0119ce0da 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -309,7 +309,7 @@ config_namespace! {
         /// Currently experimental
         pub split_file_groups_by_statistics: bool, default = false
 
-        /// Should Datafusion keep the columns used for partition_by in the output RecordBatches
+        /// Should DataFusion keep the columns used for partition_by in the output RecordBatches
         pub keep_partition_by_columns: bool, default = false
     }
 }
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index d0f2852a6e53a..05a08a6378930 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1472,7 +1472,7 @@ impl DataFrame {
     ///
     /// The method supports case sensitive rename with wrapping column name into one of following symbols (  "  or  '  or  `  )
     ///
-    /// Alternatively setting Datafusion param `datafusion.sql_parser.enable_ident_normalization` to `false` will enable  
+    /// Alternatively setting DataFusion param `datafusion.sql_parser.enable_ident_normalization` to `false` will enable
     /// case sensitive rename without need to wrap column name into special symbols
     ///
     /// # Example
diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs
index 33f643eb2dc2a..fba793dd229d3 100644
--- a/datafusion/expr/src/signature.rs
+++ b/datafusion/expr/src/signature.rs
@@ -93,7 +93,7 @@ pub enum TypeSignature {
     Variadic(Vec<DataType>),
     /// The acceptable signature and coercions rules to coerce arguments to this
     /// signature are special for this function. If this signature is specified,
-    /// Datafusion will call [`ScalarUDFImpl::coerce_types`] to prepare argument types.
+    /// DataFusion will call [`ScalarUDFImpl::coerce_types`] to prepare argument types.
     ///
     /// [`ScalarUDFImpl::coerce_types`]: crate::udf::ScalarUDFImpl::coerce_types
     UserDefined,
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 3447082525597..9941da9dd65e0 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -1080,7 +1080,7 @@ mod tests {
                 ),
             };
 
-            // Datafusion ignores timezones for comparisons of ScalarValue
+            // DataFusion ignores timezones for comparisons of ScalarValue
             // so double check it here
             assert_eq!(lit_tz_none, lit_tz_utc);
 
diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs
index fc4950ae4e7ca..f05ac3624b8e2 100644
--- a/datafusion/physical-expr/src/intervals/cp_solver.rs
+++ b/datafusion/physical-expr/src/intervals/cp_solver.rs
@@ -176,7 +176,7 @@ impl ExprIntervalGraphNode {
         &self.interval
     }
 
-    /// This function creates a DAEG node from Datafusion's [`ExprTreeNode`]
+    /// This function creates a DAEG node from DataFusion's [`ExprTreeNode`]
     /// object. Literals are created with definite, singleton intervals while
     /// any other expression starts with an indefinite interval ([-∞, ∞]).
     pub fn make_node(node: &ExprTreeNode<NodeIndex>, schema: &Schema) -> Result<Self> {
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 8caf10acf09b8..8bf808af3b5b8 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -324,7 +324,7 @@ impl AggregateExec {
 
     /// Create a new hash aggregate execution plan with the given schema.
     /// This constructor isn't part of the public API, it is used internally
-    /// by Datafusion to enforce schema consistency during when re-creating
+    /// by DataFusion to enforce schema consistency during when re-creating
     /// `AggregateExec`s inside optimization rules. Schema field names of an
     /// `AggregateExec` depends on the names of aggregate expressions. Since
     /// a rule may re-write aggregate expressions (e.g. reverse them) during
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 5da7f71765096..8147092c34aba 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -253,7 +253,7 @@ fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), ParserError> {
     Ok(())
 }
 
-/// Datafusion SQL Parser based on [`sqlparser`]
+/// DataFusion SQL Parser based on [`sqlparser`]
 ///
 /// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s [`Parser`].
 ///
diff --git a/datafusion/sqllogictest/README.md b/datafusion/sqllogictest/README.md
index 930df47967762..c7f04c0d762c1 100644
--- a/datafusion/sqllogictest/README.md
+++ b/datafusion/sqllogictest/README.md
@@ -225,7 +225,7 @@ query <type_string> <sort_mode>
 <expected_result>
 ```
 
-- `test_name`: Uniquely identify the test name (Datafusion only)
+- `test_name`: Uniquely identify the test name (DataFusion only)
 - `type_string`: A short string that specifies the number of result columns and the expected datatype of each result
   column. There is one character in the <type_string> for each result column. The characters codes are:
   - 'B' - **B**oolean,
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index acd465a0c021f..95bea1223a9ce 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -257,7 +257,7 @@ datafusion.execution.batch_size 8192 Default batch size while creating new batch
 datafusion.execution.coalesce_batches true When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting
 datafusion.execution.collect_statistics false Should DataFusion collect statistics after listing files
 datafusion.execution.enable_recursive_ctes true Should DataFusion support recursive CTEs
-datafusion.execution.keep_partition_by_columns false Should Datafusion keep the columns used for partition_by in the output RecordBatches
+datafusion.execution.keep_partition_by_columns false Should DataFusion keep the columns used for partition_by in the output RecordBatches
 datafusion.execution.listing_table_ignore_subdirectory true Should sub directories be ignored when scanning directories for data files. Defaults to true (ignores subdirectories), consistent with Hive. Note that this setting does not affect reading partitioned tables (e.g. `/table/year=2021/month=01/data.parquet`).
 datafusion.execution.max_buffered_batches_per_output_file 2 This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption
 datafusion.execution.meta_fetch_concurrency 32 Number of files to read in parallel when inferring schema and statistics
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 7f2e766aab915..a865a7ccbd8fb 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -2236,7 +2236,7 @@ SELECT SUM(c12) OVER(ORDER BY c1, c2 GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
 7.728066219895 NULL
 
 # test_c9_rn_ordering_alias
-# These tests check whether Datafusion is aware of the ordering generated by the ROW_NUMBER() window function.
+# These tests check whether DataFusion is aware of the ordering generated by the ROW_NUMBER() window function.
 # Physical plan shouldn't have a SortExec after the BoundedWindowAggExec since the table after BoundedWindowAggExec is already ordered by rn1 ASC and c9 DESC.
 query TT
 EXPLAIN SELECT c9, rn1 FROM (SELECT c9,
@@ -2275,7 +2275,7 @@ SELECT c9, rn1 FROM (SELECT c9,
 145294611 5
 
 # test_c9_rn_ordering_alias_opposite_direction
-# These tests check whether Datafusion is aware of the ordering generated by the ROW_NUMBER() window function.
+# These tests check whether DataFusion is aware of the ordering generated by the ROW_NUMBER() window function.
 # Physical plan shouldn't have a SortExec after the BoundedWindowAggExec since the table after BoundedWindowAggExec is already ordered by rn1 ASC and c9 DESC.
 query TT
 EXPLAIN SELECT c9, rn1 FROM (SELECT c9,
@@ -2314,7 +2314,7 @@ SELECT c9, rn1 FROM (SELECT c9,
 4076864659 5
 
 # test_c9_rn_ordering_alias_opposite_direction2
-# These tests check whether Datafusion is aware of the ordering generated by the ROW_NUMBER() window function.
+# These tests check whether DataFusion is aware of the ordering generated by the ROW_NUMBER() window function.
 # Physical plan _should_ have a SortExec after BoundedWindowAggExec since the table after BoundedWindowAggExec is ordered by rn1 ASC and c9 DESC, which is conflicting with the requirement rn1 DESC.
 query TT
 EXPLAIN SELECT c9, rn1 FROM (SELECT c9,
diff --git a/docs/source/contributor-guide/inviting.md b/docs/source/contributor-guide/inviting.md
index 967f417e6e9aa..4066dd9699eeb 100644
--- a/docs/source/contributor-guide/inviting.md
+++ b/docs/source/contributor-guide/inviting.md
@@ -59,7 +59,7 @@ the person. Here is an example:
 To: private@datafusion.apache.org
 Subject: [DISCUSS] $PERSONS_NAME for Committer
 
-$PERSONS_NAME has been an active contributor to the Datafusion community for the
+$PERSONS_NAME has been an active contributor to the DataFusion community for the
 last 6 months[1][2], helping others, answering questions, and improving the
 project's code.
 
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 579088f991ef2..5130b0a56d0e9 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -86,7 +86,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.listing_table_ignore_subdirectory                  | true                      | Should sub directories be ignored when scanning directories for data files. Defaults to true (ignores subdirectories), consistent with Hive. Note that this setting does not affect reading partitioned tables (e.g. `/table/year=2021/month=01/data.parquet`).                                                                                                                                                                                                                                                                                                                                         |
 | datafusion.execution.enable_recursive_ctes                              | true                      | Should DataFusion support recursive CTEs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | datafusion.execution.split_file_groups_by_statistics                    | false                     | Attempt to eliminate sorts by packing & sorting files with non-overlapping statistics into the same file groups. Currently experimental                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
-| datafusion.execution.keep_partition_by_columns                          | false                     | Should Datafusion keep the columns used for partition_by in the output RecordBatches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.keep_partition_by_columns                          | false                     | Should DataFusion keep the columns used for partition_by in the output RecordBatches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.optimizer.enable_distinct_aggregation_soft_limit             | true                      | When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.optimizer.enable_round_robin_repartition                     | true                      | When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datafusion.optimizer.enable_topk_aggregation                            | true                      | When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |

From 08fa444aaa8513a60ede5c57d92f29e6156b91a8 Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Sat, 13 Jul 2024 17:34:45 +0800
Subject: [PATCH 046/357] fix: make sure JOIN ON expression is boolean type
 (#11423)

* fix: make sure JOIN ON expression is boolean type

* Applied to DataFrame

* Update datafusion/optimizer/src/analyzer/type_coercion.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/core/src/dataframe/mod.rs          | 31 +++++++++++++++++--
 .../optimizer/src/analyzer/type_coercion.rs   | 17 +++++++++-
 datafusion/sqllogictest/test_files/join.slt   | 12 ++++++-
 3 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 05a08a6378930..c55b7c752765d 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -896,9 +896,8 @@ impl DataFrame {
         join_type: JoinType,
         on_exprs: impl IntoIterator<Item = Expr>,
     ) -> Result<DataFrame> {
-        let expr = on_exprs.into_iter().reduce(Expr::and);
         let plan = LogicalPlanBuilder::from(self.plan)
-            .join_on(right.plan, join_type, expr)?
+            .join_on(right.plan, join_type, on_exprs)?
             .build()?;
         Ok(DataFrame {
             session_state: self.session_state,
@@ -1694,7 +1693,7 @@ mod tests {
     use crate::test_util::{register_aggregate_csv, test_table, test_table_with_name};
 
     use arrow::array::{self, Int32Array};
-    use datafusion_common::{Constraint, Constraints};
+    use datafusion_common::{Constraint, Constraints, ScalarValue};
     use datafusion_common_runtime::SpawnedTask;
     use datafusion_expr::{
         array_agg, cast, create_udf, expr, lit, BuiltInWindowFunction,
@@ -2555,6 +2554,32 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn join_on_filter_datatype() -> Result<()> {
+        let left = test_table_with_name("a").await?.select_columns(&["c1"])?;
+        let right = test_table_with_name("b").await?.select_columns(&["c1"])?;
+
+        // JOIN ON untyped NULL
+        let join = left.clone().join_on(
+            right.clone(),
+            JoinType::Inner,
+            Some(Expr::Literal(ScalarValue::Null)),
+        )?;
+        let expected_plan = "CrossJoin:\
+        \n  TableScan: a projection=[c1], full_filters=[Boolean(NULL)]\
+        \n  TableScan: b projection=[c1]";
+        assert_eq!(expected_plan, format!("{:?}", join.into_optimized_plan()?));
+
+        // JOIN ON expression must be boolean type
+        let join = left.join_on(right, JoinType::Inner, Some(lit("TRUE")))?;
+        let expected = join.into_optimized_plan().unwrap_err();
+        assert_eq!(
+            expected.strip_backtrace(),
+            "type_coercion\ncaused by\nError during planning: Join condition must be boolean type, but got Utf8"
+        );
+        Ok(())
+    }
+
     #[tokio::test]
     async fn join_ambiguous_filter() -> Result<()> {
         let left = test_table_with_name("a")
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 3cab474df84e0..80a8c864e4311 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -127,7 +127,7 @@ impl<'a> TypeCoercionRewriter<'a> {
         Self { schema }
     }
 
-    /// Coerce join equality expressions
+    /// Coerce join equality expressions and join filter
     ///
     /// Joins must be treated specially as their equality expressions are stored
     /// as a parallel list of left and right expressions, rather than a single
@@ -151,9 +151,24 @@ impl<'a> TypeCoercionRewriter<'a> {
             })
             .collect::<Result<Vec<_>>>()?;
 
+        // Join filter must be boolean
+        join.filter = join
+            .filter
+            .map(|expr| self.coerce_join_filter(expr))
+            .transpose()?;
+
         Ok(LogicalPlan::Join(join))
     }
 
+    fn coerce_join_filter(&self, expr: Expr) -> Result<Expr> {
+        let expr_type = expr.get_type(self.schema)?;
+        match expr_type {
+            DataType::Boolean => Ok(expr),
+            DataType::Null => expr.cast_to(&DataType::Boolean, self.schema),
+            other => plan_err!("Join condition must be boolean type, but got {other:?}"),
+        }
+    }
+
     fn coerce_binary_op(
         &self,
         left: Expr,
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index 12cb8b3985c76..efebba1779cf7 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -988,7 +988,6 @@ statement ok
 DROP TABLE department
 
 
-# Test issue: https://github.com/apache/datafusion/issues/11269
 statement ok
 CREATE TABLE t1 (v0 BIGINT) AS VALUES (-503661263);
 
@@ -998,11 +997,22 @@ CREATE TABLE t2 (v0 DOUBLE) AS VALUES (-1.663563947387);
 statement ok
 CREATE TABLE t3 (v0 DOUBLE) AS VALUES (0.05112015193508901);
 
+# Test issue: https://github.com/apache/datafusion/issues/11269
 query RR
 SELECT t3.v0, t2.v0 FROM t1,t2,t3 WHERE t3.v0 >= t1.v0;
 ----
 0.051120151935 -1.663563947387
 
+# Test issue: https://github.com/apache/datafusion/issues/11414
+query IRR
+SELECT * FROM t1 INNER JOIN t2 ON NULL RIGHT JOIN t3 ON TRUE;
+----
+NULL NULL 0.051120151935
+
+# ON expression must be boolean type
+query error DataFusion error: type_coercion\ncaused by\nError during planning: Join condition must be boolean type, but got Utf8
+SELECT * FROM t1 INNER JOIN t2 ON 'TRUE'
+
 statement ok
 DROP TABLE t1;
 

From f5d88d1790eea85910ae5590a353ae17318f8401 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Sun, 14 Jul 2024 05:44:32 +0800
Subject: [PATCH 047/357] Support serialization/deserialization for custom
 physical exprs in proto (#11387)

* Add PhysicalExtensionExprNode

* regen proto

* Add ser/de extension expr logic

* Add test and fix clippy lint
---
 datafusion/proto/proto/datafusion.proto       |   7 +
 datafusion/proto/src/generated/pbjson.rs      | 124 +++++++++++++++
 datafusion/proto/src/generated/prost.rs       |  12 +-
 .../proto/src/physical_plan/from_proto.rs     |   8 +
 datafusion/proto/src/physical_plan/mod.rs     |  16 ++
 .../proto/src/physical_plan/to_proto.rs       |  19 ++-
 .../tests/cases/roundtrip_physical_plan.rs    | 147 +++++++++++++++++-
 7 files changed, 330 insertions(+), 3 deletions(-)

diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 345765b08be3c..9ef884531e320 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -836,6 +836,8 @@ message PhysicalExprNode {
     // was PhysicalDateTimeIntervalExprNode date_time_interval_expr = 17;
 
     PhysicalLikeExprNode like_expr = 18;
+
+    PhysicalExtensionExprNode extension = 19;
   }
 }
 
@@ -942,6 +944,11 @@ message PhysicalNegativeNode {
   PhysicalExprNode expr = 1;
 }
 
+message PhysicalExtensionExprNode {
+  bytes expr = 1;
+  repeated PhysicalExprNode inputs = 2;
+}
+
 message FilterExecNode {
   PhysicalPlanNode input = 1;
   PhysicalExprNode expr = 2;
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 905f0d9849556..fa989480fad90 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -13543,6 +13543,9 @@ impl serde::Serialize for PhysicalExprNode {
                 physical_expr_node::ExprType::LikeExpr(v) => {
                     struct_ser.serialize_field("likeExpr", v)?;
                 }
+                physical_expr_node::ExprType::Extension(v) => {
+                    struct_ser.serialize_field("extension", v)?;
+                }
             }
         }
         struct_ser.end()
@@ -13582,6 +13585,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode {
             "scalarUdf",
             "like_expr",
             "likeExpr",
+            "extension",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -13602,6 +13606,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode {
             WindowExpr,
             ScalarUdf,
             LikeExpr,
+            Extension,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -13639,6 +13644,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode {
                             "windowExpr" | "window_expr" => Ok(GeneratedField::WindowExpr),
                             "scalarUdf" | "scalar_udf" => Ok(GeneratedField::ScalarUdf),
                             "likeExpr" | "like_expr" => Ok(GeneratedField::LikeExpr),
+                            "extension" => Ok(GeneratedField::Extension),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -13771,6 +13777,13 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode {
                                 return Err(serde::de::Error::duplicate_field("likeExpr"));
                             }
                             expr_type__ = map_.next_value::<::std::option::Option<_>>()?.map(physical_expr_node::ExprType::LikeExpr)
+;
+                        }
+                        GeneratedField::Extension => {
+                            if expr_type__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("extension"));
+                            }
+                            expr_type__ = map_.next_value::<::std::option::Option<_>>()?.map(physical_expr_node::ExprType::Extension)
 ;
                         }
                     }
@@ -13783,6 +13796,117 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode {
         deserializer.deserialize_struct("datafusion.PhysicalExprNode", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for PhysicalExtensionExprNode {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if !self.expr.is_empty() {
+            len += 1;
+        }
+        if !self.inputs.is_empty() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.PhysicalExtensionExprNode", len)?;
+        if !self.expr.is_empty() {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("expr", pbjson::private::base64::encode(&self.expr).as_str())?;
+        }
+        if !self.inputs.is_empty() {
+            struct_ser.serialize_field("inputs", &self.inputs)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for PhysicalExtensionExprNode {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "expr",
+            "inputs",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Expr,
+            Inputs,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "expr" => Ok(GeneratedField::Expr),
+                            "inputs" => Ok(GeneratedField::Inputs),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = PhysicalExtensionExprNode;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.PhysicalExtensionExprNode")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<PhysicalExtensionExprNode, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut expr__ = None;
+                let mut inputs__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Expr => {
+                            if expr__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("expr"));
+                            }
+                            expr__ = 
+                                Some(map_.next_value::<::pbjson::private::BytesDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::Inputs => {
+                            if inputs__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("inputs"));
+                            }
+                            inputs__ = Some(map_.next_value()?);
+                        }
+                    }
+                }
+                Ok(PhysicalExtensionExprNode {
+                    expr: expr__.unwrap_or_default(),
+                    inputs: inputs__.unwrap_or_default(),
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.PhysicalExtensionExprNode", FIELDS, GeneratedVisitor)
+    }
+}
 impl serde::Serialize for PhysicalExtensionNode {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index b16d26ee6e1e0..8407e545fe650 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -1218,7 +1218,7 @@ pub struct PhysicalExtensionNode {
 pub struct PhysicalExprNode {
     #[prost(
         oneof = "physical_expr_node::ExprType",
-        tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 18"
+        tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 18, 19"
     )]
     pub expr_type: ::core::option::Option<physical_expr_node::ExprType>,
 }
@@ -1266,6 +1266,8 @@ pub mod physical_expr_node {
         ScalarUdf(super::PhysicalScalarUdfNode),
         #[prost(message, tag = "18")]
         LikeExpr(::prost::alloc::boxed::Box<super::PhysicalLikeExprNode>),
+        #[prost(message, tag = "19")]
+        Extension(super::PhysicalExtensionExprNode),
     }
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
@@ -1456,6 +1458,14 @@ pub struct PhysicalNegativeNode {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct PhysicalExtensionExprNode {
+    #[prost(bytes = "vec", tag = "1")]
+    pub expr: ::prost::alloc::vec::Vec<u8>,
+    #[prost(message, repeated, tag = "2")]
+    pub inputs: ::prost::alloc::vec::Vec<PhysicalExprNode>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct FilterExecNode {
     #[prost(message, optional, boxed, tag = "1")]
     pub input: ::core::option::Option<::prost::alloc::boxed::Box<PhysicalPlanNode>>,
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index e94bb3b8efcb4..52fbd5cbdcf64 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -394,6 +394,14 @@ pub fn parse_physical_expr(
                 codec,
             )?,
         )),
+        ExprType::Extension(extension) => {
+            let inputs: Vec<Arc<dyn PhysicalExpr>> = extension
+                .inputs
+                .iter()
+                .map(|e| parse_physical_expr(e, registry, input_schema, codec))
+                .collect::<Result<_>>()?;
+            (codec.try_decode_expr(extension.expr.as_slice(), &inputs)?) as _
+        }
     };
 
     Ok(pexpr)
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 56e702704798f..e5429945e97ef 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -2018,6 +2018,22 @@ pub trait PhysicalExtensionCodec: Debug + Send + Sync {
     fn try_encode_udf(&self, _node: &ScalarUDF, _buf: &mut Vec<u8>) -> Result<()> {
         Ok(())
     }
+
+    fn try_decode_expr(
+        &self,
+        _buf: &[u8],
+        _inputs: &[Arc<dyn PhysicalExpr>],
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        not_impl_err!("PhysicalExtensionCodec is not provided")
+    }
+
+    fn try_encode_expr(
+        &self,
+        _node: Arc<dyn PhysicalExpr>,
+        _buf: &mut Vec<u8>,
+    ) -> Result<()> {
+        not_impl_err!("PhysicalExtensionCodec is not provided")
+    }
 }
 
 #[derive(Debug)]
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 5e982ad2afde8..9c95acc1dcf47 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -495,7 +495,24 @@ pub fn serialize_physical_expr(
             ))),
         })
     } else {
-        internal_err!("physical_plan::to_proto() unsupported expression {value:?}")
+        let mut buf: Vec<u8> = vec![];
+        match codec.try_encode_expr(Arc::clone(&value), &mut buf) {
+            Ok(_) => {
+                let inputs: Vec<protobuf::PhysicalExprNode> = value
+                    .children()
+                    .into_iter()
+                    .map(|e| serialize_physical_expr(Arc::clone(e), codec))
+                    .collect::<Result<_>>()?;
+                Ok(protobuf::PhysicalExprNode {
+                    expr_type: Some(protobuf::physical_expr_node::ExprType::Extension(
+                        protobuf::PhysicalExtensionExprNode { expr: buf, inputs },
+                    )),
+                })
+            }
+            Err(e) => internal_err!(
+                "Unsupported physical expr and extension codec failed with [{e}]. Expr: {value:?}"
+            ),
+        }
     }
 }
 
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index d8d85ace1a29e..2fcc65008fd8f 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -15,7 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow::array::RecordBatch;
 use std::any::Any;
+use std::fmt::Display;
+use std::hash::Hasher;
 use std::ops::Deref;
 use std::sync::Arc;
 use std::vec;
@@ -38,6 +41,7 @@ use datafusion::datasource::physical_plan::{
 };
 use datafusion::execution::FunctionRegistry;
 use datafusion::logical_expr::{create_udf, JoinType, Operator, Volatility};
+use datafusion::physical_expr::aggregate::utils::down_cast_any_ref;
 use datafusion::physical_expr::expressions::Max;
 use datafusion::physical_expr::window::SlidingAggregateWindowExpr;
 use datafusion::physical_expr::{PhysicalSortRequirement, ScalarFunctionExpr};
@@ -75,7 +79,7 @@ use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::file_options::json_writer::JsonWriterOptions;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::stats::Precision;
-use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
+use datafusion_common::{internal_err, not_impl_err, plan_err, DataFusionError, Result};
 use datafusion_expr::{
     Accumulator, AccumulatorFactoryFunction, AggregateUDF, ColumnarValue, ScalarUDF,
     ScalarUDFImpl, Signature, SimpleAggregateUDF, WindowFrame, WindowFrameBound,
@@ -658,6 +662,147 @@ async fn roundtrip_parquet_exec_with_table_partition_cols() -> Result<()> {
     roundtrip_test(ParquetExec::builder(scan_config).build_arc())
 }
 
+#[test]
+fn roundtrip_parquet_exec_with_custom_predicate_expr() -> Result<()> {
+    let scan_config = FileScanConfig {
+        object_store_url: ObjectStoreUrl::local_filesystem(),
+        file_schema: Arc::new(Schema::new(vec![Field::new(
+            "col",
+            DataType::Utf8,
+            false,
+        )])),
+        file_groups: vec![vec![PartitionedFile::new(
+            "/path/to/file.parquet".to_string(),
+            1024,
+        )]],
+        statistics: Statistics {
+            num_rows: Precision::Inexact(100),
+            total_byte_size: Precision::Inexact(1024),
+            column_statistics: Statistics::unknown_column(&Arc::new(Schema::new(vec![
+                Field::new("col", DataType::Utf8, false),
+            ]))),
+        },
+        projection: None,
+        limit: None,
+        table_partition_cols: vec![],
+        output_ordering: vec![],
+    };
+
+    #[derive(Debug, Hash, Clone)]
+    struct CustomPredicateExpr {
+        inner: Arc<dyn PhysicalExpr>,
+    }
+    impl Display for CustomPredicateExpr {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            write!(f, "CustomPredicateExpr")
+        }
+    }
+    impl PartialEq<dyn Any> for CustomPredicateExpr {
+        fn eq(&self, other: &dyn Any) -> bool {
+            down_cast_any_ref(other)
+                .downcast_ref::<Self>()
+                .map(|x| self.inner.eq(&x.inner))
+                .unwrap_or(false)
+        }
+    }
+    impl PhysicalExpr for CustomPredicateExpr {
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
+            unreachable!()
+        }
+
+        fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
+            unreachable!()
+        }
+
+        fn evaluate(&self, _batch: &RecordBatch) -> Result<ColumnarValue> {
+            unreachable!()
+        }
+
+        fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
+            vec![&self.inner]
+        }
+
+        fn with_new_children(
+            self: Arc<Self>,
+            _children: Vec<Arc<dyn PhysicalExpr>>,
+        ) -> Result<Arc<dyn PhysicalExpr>> {
+            todo!()
+        }
+
+        fn dyn_hash(&self, _state: &mut dyn Hasher) {
+            unreachable!()
+        }
+    }
+
+    #[derive(Debug)]
+    struct CustomPhysicalExtensionCodec;
+    impl PhysicalExtensionCodec for CustomPhysicalExtensionCodec {
+        fn try_decode(
+            &self,
+            _buf: &[u8],
+            _inputs: &[Arc<dyn ExecutionPlan>],
+            _registry: &dyn FunctionRegistry,
+        ) -> Result<Arc<dyn ExecutionPlan>> {
+            unreachable!()
+        }
+
+        fn try_encode(
+            &self,
+            _node: Arc<dyn ExecutionPlan>,
+            _buf: &mut Vec<u8>,
+        ) -> Result<()> {
+            unreachable!()
+        }
+
+        fn try_decode_expr(
+            &self,
+            buf: &[u8],
+            inputs: &[Arc<dyn PhysicalExpr>],
+        ) -> Result<Arc<dyn PhysicalExpr>> {
+            if buf == "CustomPredicateExpr".as_bytes() {
+                Ok(Arc::new(CustomPredicateExpr {
+                    inner: inputs[0].clone(),
+                }))
+            } else {
+                internal_err!("Not supported")
+            }
+        }
+
+        fn try_encode_expr(
+            &self,
+            node: Arc<dyn PhysicalExpr>,
+            buf: &mut Vec<u8>,
+        ) -> Result<()> {
+            if node
+                .as_ref()
+                .as_any()
+                .downcast_ref::<CustomPredicateExpr>()
+                .is_some()
+            {
+                buf.extend_from_slice("CustomPredicateExpr".as_bytes());
+                Ok(())
+            } else {
+                internal_err!("Not supported")
+            }
+        }
+    }
+
+    let custom_predicate_expr = Arc::new(CustomPredicateExpr {
+        inner: Arc::new(Column::new("col", 1)),
+    });
+    let exec_plan = ParquetExec::builder(scan_config)
+        .with_predicate(custom_predicate_expr)
+        .build_arc();
+
+    let ctx = SessionContext::new();
+    roundtrip_test_and_return(exec_plan, &ctx, &CustomPhysicalExtensionCodec {})?;
+    Ok(())
+}
+
 #[test]
 fn roundtrip_scalar_udf() -> Result<()> {
     let field_a = Field::new("a", DataType::Int64, false);

From a43cf79bf0b133379ee6f2a236c025e59a5ef822 Mon Sep 17 00:00:00 2001
From: kf zheng <100595273+Kev1n8@users.noreply.github.com>
Date: Sun, 14 Jul 2024 05:45:03 +0800
Subject: [PATCH 048/357] remove termtree dependency (#11416)

* remove termtree dependency

* impl Display for TopKHeap, replace uses of tree_print in tests

* use to_string instead of format!
---
 datafusion/physical-plan/Cargo.toml           |  1 -
 .../physical-plan/src/aggregates/topk/heap.rs | 86 ++++++++++++-------
 2 files changed, 55 insertions(+), 32 deletions(-)

diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index f5f756417ebf8..00fc81ebde978 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -66,7 +66,6 @@ tokio = { workspace = true }
 [dev-dependencies]
 rstest = { workspace = true }
 rstest_reuse = "0.7.0"
-termtree = "0.5.0"
 tokio = { workspace = true, features = [
     "rt-multi-thread",
     "fs",
diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs
index 51593f5c28cef..81eadbc018b34 100644
--- a/datafusion/physical-plan/src/aggregates/topk/heap.rs
+++ b/datafusion/physical-plan/src/aggregates/topk/heap.rs
@@ -27,7 +27,7 @@ use datafusion_common::Result;
 use datafusion_physical_expr::aggregate::utils::adjust_output_array;
 use half::f16;
 use std::cmp::Ordering;
-use std::fmt::{Debug, Formatter};
+use std::fmt::{Debug, Display, Formatter};
 use std::sync::Arc;
 
 /// A custom version of `Ord` that only exists to we can implement it for the Values in our heap
@@ -323,29 +323,53 @@ impl<VAL: ValueType> TopKHeap<VAL> {
         }
     }
 
-    #[cfg(test)]
-    fn _tree_print(&self, idx: usize) -> Option<termtree::Tree<String>> {
-        let hi = self.heap.get(idx)?;
-        match hi {
-            None => None,
-            Some(hi) => {
-                let label =
-                    format!("val={:?} idx={}, bucket={}", hi.val, idx, hi.map_idx);
-                let left = self._tree_print(idx * 2 + 1);
-                let right = self._tree_print(idx * 2 + 2);
-                let children = left.into_iter().chain(right);
-                let me = termtree::Tree::new(label).with_leaves(children);
-                Some(me)
+    fn _tree_print(
+        &self,
+        idx: usize,
+        prefix: String,
+        is_tail: bool,
+        output: &mut String,
+    ) {
+        if let Some(Some(hi)) = self.heap.get(idx) {
+            let connector = if idx != 0 {
+                if is_tail {
+                    "└── "
+                } else {
+                    "├── "
+                }
+            } else {
+                ""
+            };
+            output.push_str(&format!(
+                "{}{}val={:?} idx={}, bucket={}\n",
+                prefix, connector, hi.val, idx, hi.map_idx
+            ));
+            let new_prefix = if is_tail { "" } else { "│   " };
+            let child_prefix = format!("{}{}", prefix, new_prefix);
+
+            let left_idx = idx * 2 + 1;
+            let right_idx = idx * 2 + 2;
+
+            let left_exists = left_idx < self.len;
+            let right_exists = right_idx < self.len;
+
+            if left_exists {
+                self._tree_print(left_idx, child_prefix.clone(), !right_exists, output);
+            }
+            if right_exists {
+                self._tree_print(right_idx, child_prefix, true, output);
             }
         }
     }
+}
 
-    #[cfg(test)]
-    fn tree_print(&self) -> String {
-        match self._tree_print(0) {
-            None => "".to_string(),
-            Some(root) => format!("{}", root),
+impl<VAL: ValueType> Display for TopKHeap<VAL> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        let mut output = String::new();
+        if self.heap.first().is_some() {
+            self._tree_print(0, String::new(), true, &mut output);
         }
+        write!(f, "{}", output)
     }
 }
 
@@ -361,9 +385,9 @@ impl<VAL: ValueType> HeapItem<VAL> {
 impl<VAL: ValueType> Debug for HeapItem<VAL> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         f.write_str("bucket=")?;
-        self.map_idx.fmt(f)?;
+        Debug::fmt(&self.map_idx, f)?;
         f.write_str(" val=")?;
-        self.val.fmt(f)?;
+        Debug::fmt(&self.val, f)?;
         f.write_str("\n")?;
         Ok(())
     }
@@ -462,7 +486,7 @@ mod tests {
         let mut heap = TopKHeap::new(10, false);
         heap.append_or_replace(1, 1, &mut map);
 
-        let actual = heap.tree_print();
+        let actual = heap.to_string();
         let expected = r#"
 val=1 idx=0, bucket=1
         "#;
@@ -482,7 +506,7 @@ val=1 idx=0, bucket=1
         heap.append_or_replace(2, 2, &mut map);
         assert_eq!(map, vec![(2, 0), (1, 1)]);
 
-        let actual = heap.tree_print();
+        let actual = heap.to_string();
         let expected = r#"
 val=2 idx=0, bucket=2
 └── val=1 idx=1, bucket=1
@@ -500,7 +524,7 @@ val=2 idx=0, bucket=2
         heap.append_or_replace(1, 1, &mut map);
         heap.append_or_replace(2, 2, &mut map);
         heap.append_or_replace(3, 3, &mut map);
-        let actual = heap.tree_print();
+        let actual = heap.to_string();
         let expected = r#"
 val=3 idx=0, bucket=3
 ├── val=1 idx=1, bucket=1
@@ -510,7 +534,7 @@ val=3 idx=0, bucket=3
 
         let mut map = vec![];
         heap.append_or_replace(0, 0, &mut map);
-        let actual = heap.tree_print();
+        let actual = heap.to_string();
         let expected = r#"
 val=2 idx=0, bucket=2
 ├── val=1 idx=1, bucket=1
@@ -531,7 +555,7 @@ val=2 idx=0, bucket=2
         heap.append_or_replace(2, 2, &mut map);
         heap.append_or_replace(3, 3, &mut map);
         heap.append_or_replace(4, 4, &mut map);
-        let actual = heap.tree_print();
+        let actual = heap.to_string();
         let expected = r#"
 val=4 idx=0, bucket=4
 ├── val=3 idx=1, bucket=3
@@ -542,7 +566,7 @@ val=4 idx=0, bucket=4
 
         let mut map = vec![];
         heap.replace_if_better(1, 0, &mut map);
-        let actual = heap.tree_print();
+        let actual = heap.to_string();
         let expected = r#"
 val=4 idx=0, bucket=4
 ├── val=1 idx=1, bucket=1
@@ -563,7 +587,7 @@ val=4 idx=0, bucket=4
         heap.append_or_replace(1, 1, &mut map);
         heap.append_or_replace(2, 2, &mut map);
 
-        let actual = heap.tree_print();
+        let actual = heap.to_string();
         let expected = r#"
 val=2 idx=0, bucket=2
 └── val=1 idx=1, bucket=1
@@ -584,7 +608,7 @@ val=2 idx=0, bucket=2
         heap.append_or_replace(1, 1, &mut map);
         heap.append_or_replace(2, 2, &mut map);
 
-        let actual = heap.tree_print();
+        let actual = heap.to_string();
         let expected = r#"
 val=2 idx=0, bucket=2
 └── val=1 idx=1, bucket=1
@@ -607,7 +631,7 @@ val=2 idx=0, bucket=2
         heap.append_or_replace(1, 1, &mut map);
         heap.append_or_replace(2, 2, &mut map);
 
-        let actual = heap.tree_print();
+        let actual = heap.to_string();
         let expected = r#"
 val=2 idx=0, bucket=2
 └── val=1 idx=1, bucket=1
@@ -616,7 +640,7 @@ val=2 idx=0, bucket=2
 
         let numbers = vec![(0, 1), (1, 2)];
         heap.renumber(numbers.as_slice());
-        let actual = heap.tree_print();
+        let actual = heap.to_string();
         let expected = r#"
 val=2 idx=0, bucket=1
 └── val=1 idx=1, bucket=2

From a7041feff32c2af09854c144a760d945e30fb38a Mon Sep 17 00:00:00 2001
From: Jax Liu <liugs963@gmail.com>
Date: Sun, 14 Jul 2024 05:47:47 +0800
Subject: [PATCH 049/357] Minor: Add an example for backtrace pretty print
 (#11450)

* add the example for printing backtrace pretty

* add empty end line

* fix prettier

* sync the usage example

* Update docs/source/user-guide/crate-configuration.md

Co-authored-by: Oleks V <comphead@users.noreply.github.com>

---------

Co-authored-by: Oleks V <comphead@users.noreply.github.com>
---
 docs/source/user-guide/crate-configuration.md | 44 ++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/docs/source/user-guide/crate-configuration.md b/docs/source/user-guide/crate-configuration.md
index 0587d06a39191..9d22e3403097f 100644
--- a/docs/source/user-guide/crate-configuration.md
+++ b/docs/source/user-guide/crate-configuration.md
@@ -121,7 +121,7 @@ backtrace:    0: std::backtrace_rs::backtrace::libunwind::trace
 
 The backtraces are useful when debugging code. If there is a test in `datafusion/core/src/physical_planner.rs`
 
-```
+```rust
 #[tokio::test]
 async fn test_get_backtrace_for_failed_code() -> Result<()> {
     let ctx = SessionContext::new();
@@ -141,6 +141,48 @@ To obtain a backtrace:
 ```bash
 cargo build --features=backtrace
 RUST_BACKTRACE=1 cargo test --features=backtrace --package datafusion --lib -- physical_planner::tests::test_get_backtrace_for_failed_code --exact --nocapture
+
+running 1 test
+Error: Plan("Invalid function 'row_numer'.\nDid you mean 'ROW_NUMBER'?\n\nbacktrace:    0: std::backtrace_rs::backtrace::libunwind::trace\n             at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/../../backtrace/src/backtrace/libunwind.rs:105:5\n   1: std::backtrace_rs::backtrace::trace_unsynchronized\n...
 ```
 
 Note: The backtrace wrapped into systems calls, so some steps on top of the backtrace can be ignored
+
+To show the backtrace in a pretty-printed format use `eprintln!("{e}");`.
+
+```rust
+#[tokio::test]
+async fn test_get_backtrace_for_failed_code() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let sql = "select row_numer() over (partition by a order by a) from (select 1 a);";
+
+    let _ = match ctx.sql(sql).await {
+        Ok(result) => result.show().await?,
+        Err(e) => {
+            eprintln!("{e}");
+        }
+    };
+
+    Ok(())
+}
+```
+
+Then run the test:
+
+```bash
+$ RUST_BACKTRACE=1 cargo test --features=backtrace --package datafusion --lib -- physical_planner::tests::test_get_backtrace_for_failed_code --exact --nocapture
+
+running 1 test
+Error during planning: Invalid function 'row_numer'.
+Did you mean 'ROW_NUMBER'?
+
+backtrace:    0: std::backtrace_rs::backtrace::libunwind::trace
+             at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/../../backtrace/src/backtrace/libunwind.rs:105:5
+   1: std::backtrace_rs::backtrace::trace_unsynchronized
+             at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
+   2: std::backtrace::Backtrace::create
+             at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/backtrace.rs:331:13
+   3: std::backtrace::Backtrace::capture
+   ...
+```

From 84758062f808f97ba3b7e9d8a9d3839df4c39d98 Mon Sep 17 00:00:00 2001
From: Bruce Ritchie <bruce.ritchie@veeva.com>
Date: Sun, 14 Jul 2024 15:00:31 -0400
Subject: [PATCH 050/357] Add SessionStateBuilder and extract out the
 registration of defaults (#11403)

* Create a SessionStateBuilder and use it for creating anything but a basic SessionState.

* Updated new_from_existing to take a reference to the existing SessionState and clone it.

* Minor documentation update.

* SessionStateDefaults improvements.

* Reworked how SessionStateBuilder works from PR feedback.

* Bug fix for missing array_expressions cfg feature.

* Review feedback updates + doc fixes for SessionStateDefaults

* Cargo fmt update.
---
 datafusion-cli/src/catalog.rs                 |  11 +-
 .../examples/custom_file_format.rs            |   9 +-
 .../core/src/datasource/file_format/csv.rs    |   7 +-
 datafusion/core/src/execution/context/mod.rs  |  25 +-
 .../core/src/execution/session_state.rs       | 965 ++++++++++++++----
 datafusion/core/src/physical_planner.rs       |   7 +-
 datafusion/core/src/test/object_store.rs      |   8 +-
 datafusion/core/tests/dataframe/mod.rs        |  19 +-
 datafusion/core/tests/memory_limit/mod.rs     |  14 +-
 .../core/tests/parquet/file_statistics.rs     |   6 +-
 datafusion/core/tests/sql/create_drop.rs      |  13 +-
 .../tests/user_defined/user_defined_plan.rs   |  11 +-
 .../tests/cases/roundtrip_logical_plan.rs     |   8 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  13 +-
 14 files changed, 884 insertions(+), 232 deletions(-)

diff --git a/datafusion-cli/src/catalog.rs b/datafusion-cli/src/catalog.rs
index c11eb3280c20f..b83f659756105 100644
--- a/datafusion-cli/src/catalog.rs
+++ b/datafusion-cli/src/catalog.rs
@@ -29,6 +29,7 @@ use datafusion::datasource::listing::{
 use datafusion::datasource::TableProvider;
 use datafusion::error::Result;
 use datafusion::execution::context::SessionState;
+use datafusion::execution::session_state::SessionStateBuilder;
 
 use async_trait::async_trait;
 use dirs::home_dir;
@@ -162,6 +163,7 @@ impl SchemaProvider for DynamicFileSchemaProvider {
             .ok_or_else(|| plan_datafusion_err!("locking error"))?
             .read()
             .clone();
+        let mut builder = SessionStateBuilder::from(state.clone());
         let optimized_name = substitute_tilde(name.to_owned());
         let table_url = ListingTableUrl::parse(optimized_name.as_str())?;
         let scheme = table_url.scheme();
@@ -178,13 +180,18 @@ impl SchemaProvider for DynamicFileSchemaProvider {
                 // to any command options so the only choice is to use an empty collection
                 match scheme {
                     "s3" | "oss" | "cos" => {
-                        state = state.add_table_options_extension(AwsOptions::default());
+                        if let Some(table_options) = builder.table_options() {
+                            table_options.extensions.insert(AwsOptions::default())
+                        }
                     }
                     "gs" | "gcs" => {
-                        state = state.add_table_options_extension(GcpOptions::default())
+                        if let Some(table_options) = builder.table_options() {
+                            table_options.extensions.insert(GcpOptions::default())
+                        }
                     }
                     _ => {}
                 };
+                state = builder.build();
                 let store = get_object_store(
                     &state,
                     table_url.scheme(),
diff --git a/datafusion-examples/examples/custom_file_format.rs b/datafusion-examples/examples/custom_file_format.rs
index fe936418bce4a..bdb702375c945 100644
--- a/datafusion-examples/examples/custom_file_format.rs
+++ b/datafusion-examples/examples/custom_file_format.rs
@@ -22,6 +22,7 @@ use arrow::{
     datatypes::UInt64Type,
 };
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::{
     datasource::{
         file_format::{
@@ -32,9 +33,9 @@ use datafusion::{
         MemTable,
     },
     error::Result,
-    execution::{context::SessionState, runtime_env::RuntimeEnv},
+    execution::context::SessionState,
     physical_plan::ExecutionPlan,
-    prelude::{SessionConfig, SessionContext},
+    prelude::SessionContext,
 };
 use datafusion_common::{GetExt, Statistics};
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
@@ -176,9 +177,7 @@ impl GetExt for TSVFileFactory {
 #[tokio::main]
 async fn main() -> Result<()> {
     // Create a new context with the default configuration
-    let config = SessionConfig::new();
-    let runtime = RuntimeEnv::default();
-    let mut state = SessionState::new_with_config_rt(config, Arc::new(runtime));
+    let mut state = SessionStateBuilder::new().with_default_features().build();
 
     // Register the custom file format
     let file_format = Arc::new(TSVFileFactory::new());
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 92cb11e2b47a4..baeaf51fb56d1 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -632,6 +632,7 @@ mod tests {
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
     use datafusion_expr::{col, lit};
 
+    use crate::execution::session_state::SessionStateBuilder;
     use chrono::DateTime;
     use object_store::local::LocalFileSystem;
     use object_store::path::Path;
@@ -814,7 +815,11 @@ mod tests {
         let runtime = Arc::new(RuntimeEnv::new(RuntimeConfig::new()).unwrap());
         let mut cfg = SessionConfig::new();
         cfg.options_mut().catalog.has_header = true;
-        let session_state = SessionState::new_with_config_rt(cfg, runtime);
+        let session_state = SessionStateBuilder::new()
+            .with_config(cfg)
+            .with_runtime_env(runtime)
+            .with_default_features()
+            .build();
         let integration = LocalFileSystem::new_with_prefix(arrow_test_data()).unwrap();
         let path = Path::from("csv/aggregate_test_100.csv");
         let csv = CsvFormat::default().with_has_header(true);
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 4b9e3e843341a..640a9b14a65f1 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -73,6 +73,7 @@ use object_store::ObjectStore;
 use parking_lot::RwLock;
 use url::Url;
 
+use crate::execution::session_state::SessionStateBuilder;
 pub use datafusion_execution::config::SessionConfig;
 pub use datafusion_execution::TaskContext;
 pub use datafusion_expr::execution_props::ExecutionProps;
@@ -294,7 +295,11 @@ impl SessionContext {
     /// all `SessionContext`'s should be configured with the
     /// same `RuntimeEnv`.
     pub fn new_with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self {
-        let state = SessionState::new_with_config_rt(config, runtime);
+        let state = SessionStateBuilder::new()
+            .with_config(config)
+            .with_runtime_env(runtime)
+            .with_default_features()
+            .build();
         Self::new_with_state(state)
     }
 
@@ -315,7 +320,7 @@ impl SessionContext {
     }
 
     /// Creates a new `SessionContext` using the provided [`SessionState`]
-    #[deprecated(since = "32.0.0", note = "Use SessionState::new_with_state")]
+    #[deprecated(since = "32.0.0", note = "Use SessionContext::new_with_state")]
     pub fn with_state(state: SessionState) -> Self {
         Self::new_with_state(state)
     }
@@ -1574,6 +1579,7 @@ mod tests {
     use datafusion_common_runtime::SpawnedTask;
 
     use crate::catalog::schema::SchemaProvider;
+    use crate::execution::session_state::SessionStateBuilder;
     use crate::physical_planner::PhysicalPlanner;
     use async_trait::async_trait;
     use tempfile::TempDir;
@@ -1707,7 +1713,11 @@ mod tests {
             .set_str("datafusion.catalog.location", url.as_str())
             .set_str("datafusion.catalog.format", "CSV")
             .set_str("datafusion.catalog.has_header", "true");
-        let session_state = SessionState::new_with_config_rt(cfg, runtime);
+        let session_state = SessionStateBuilder::new()
+            .with_config(cfg)
+            .with_runtime_env(runtime)
+            .with_default_features()
+            .build();
         let ctx = SessionContext::new_with_state(session_state);
         ctx.refresh_catalogs().await?;
 
@@ -1733,9 +1743,12 @@ mod tests {
     #[tokio::test]
     async fn custom_query_planner() -> Result<()> {
         let runtime = Arc::new(RuntimeEnv::default());
-        let session_state =
-            SessionState::new_with_config_rt(SessionConfig::new(), runtime)
-                .with_query_planner(Arc::new(MyQueryPlanner {}));
+        let session_state = SessionStateBuilder::new()
+            .with_config(SessionConfig::new())
+            .with_runtime_env(runtime)
+            .with_default_features()
+            .with_query_planner(Arc::new(MyQueryPlanner {}))
+            .build();
         let ctx = SessionContext::new_with_state(session_state);
 
         let df = ctx.sql("SELECT 1").await?;
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index dbfba9ea93521..75eef43454873 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -77,6 +77,8 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_plan::ExecutionPlan;
 use datafusion_sql::parser::{DFParser, Statement};
 use datafusion_sql::planner::{ContextProvider, ParserOptions, PlannerContext, SqlToRel};
+use itertools::Itertools;
+use log::{debug, info};
 use sqlparser::ast::Expr as SQLExpr;
 use sqlparser::dialect::dialect_from_str;
 use std::collections::hash_map::Entry;
@@ -89,9 +91,29 @@ use uuid::Uuid;
 /// Execution context for registering data sources and executing queries.
 /// See [`SessionContext`] for a higher level API.
 ///
+/// Use the [`SessionStateBuilder`] to build a SessionState object.
+///
+/// ```
+/// use datafusion::prelude::*;
+/// # use datafusion::{error::Result, assert_batches_eq};
+/// # use datafusion::execution::session_state::SessionStateBuilder;
+/// # use datafusion_execution::runtime_env::RuntimeEnv;
+/// # use std::sync::Arc;
+/// # #[tokio::main]
+/// # async fn main() -> Result<()> {
+///     let state = SessionStateBuilder::new()
+///         .with_config(SessionConfig::new())  
+///         .with_runtime_env(Arc::new(RuntimeEnv::default()))
+///         .with_default_features()
+///         .build();
+///     Ok(())  
+/// # }
+/// ```
+///
 /// Note that there is no `Default` or `new()` for SessionState,
 /// to avoid accidentally running queries or other operations without passing through
-/// the [`SessionConfig`] or [`RuntimeEnv`]. See [`SessionContext`].
+/// the [`SessionConfig`] or [`RuntimeEnv`]. See [`SessionStateBuilder`] and
+/// [`SessionContext`].
 ///
 /// [`SessionContext`]: crate::execution::context::SessionContext
 #[derive(Clone)]
@@ -140,7 +162,6 @@ pub struct SessionState {
     table_factories: HashMap<String, Arc<dyn TableProviderFactory>>,
     /// Runtime environment
     runtime_env: Arc<RuntimeEnv>,
-
     /// [FunctionFactory] to support pluggable user defined function handler.
     ///
     /// It will be invoked on `CREATE FUNCTION` statements.
@@ -153,6 +174,7 @@ impl Debug for SessionState {
         f.debug_struct("SessionState")
             .field("session_id", &self.session_id)
             .field("analyzer", &"...")
+            .field("expr_planners", &"...")
             .field("optimizer", &"...")
             .field("physical_optimizers", &"...")
             .field("query_planner", &"...")
@@ -175,193 +197,56 @@ impl Debug for SessionState {
 impl SessionState {
     /// Returns new [`SessionState`] using the provided
     /// [`SessionConfig`] and [`RuntimeEnv`].
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     pub fn new_with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self {
-        let catalog_list =
-            Arc::new(MemoryCatalogProviderList::new()) as Arc<dyn CatalogProviderList>;
-        Self::new_with_config_rt_and_catalog_list(config, runtime, catalog_list)
+        SessionStateBuilder::new()
+            .with_config(config)
+            .with_runtime_env(runtime)
+            .with_default_features()
+            .build()
     }
 
     /// Returns new [`SessionState`] using the provided
     /// [`SessionConfig`] and [`RuntimeEnv`].
-    #[deprecated(since = "32.0.0", note = "Use SessionState::new_with_config_rt")]
+    #[deprecated(since = "32.0.0", note = "Use SessionStateBuilder")]
     pub fn with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self {
-        Self::new_with_config_rt(config, runtime)
+        SessionStateBuilder::new()
+            .with_config(config)
+            .with_runtime_env(runtime)
+            .with_default_features()
+            .build()
     }
 
     /// Returns new [`SessionState`] using the provided
     /// [`SessionConfig`],  [`RuntimeEnv`], and [`CatalogProviderList`]
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     pub fn new_with_config_rt_and_catalog_list(
         config: SessionConfig,
         runtime: Arc<RuntimeEnv>,
         catalog_list: Arc<dyn CatalogProviderList>,
     ) -> Self {
-        let session_id = Uuid::new_v4().to_string();
-
-        // Create table_factories for all default formats
-        let mut table_factories: HashMap<String, Arc<dyn TableProviderFactory>> =
-            HashMap::new();
-        #[cfg(feature = "parquet")]
-        table_factories.insert("PARQUET".into(), Arc::new(DefaultTableFactory::new()));
-        table_factories.insert("CSV".into(), Arc::new(DefaultTableFactory::new()));
-        table_factories.insert("JSON".into(), Arc::new(DefaultTableFactory::new()));
-        table_factories.insert("NDJSON".into(), Arc::new(DefaultTableFactory::new()));
-        table_factories.insert("AVRO".into(), Arc::new(DefaultTableFactory::new()));
-        table_factories.insert("ARROW".into(), Arc::new(DefaultTableFactory::new()));
-
-        if config.create_default_catalog_and_schema() {
-            let default_catalog = MemoryCatalogProvider::new();
-
-            default_catalog
-                .register_schema(
-                    &config.options().catalog.default_schema,
-                    Arc::new(MemorySchemaProvider::new()),
-                )
-                .expect("memory catalog provider can register schema");
-
-            Self::register_default_schema(
-                &config,
-                &table_factories,
-                &runtime,
-                &default_catalog,
-            );
-
-            catalog_list.register_catalog(
-                config.options().catalog.default_catalog.clone(),
-                Arc::new(default_catalog),
-            );
-        }
-
-        let expr_planners: Vec<Arc<dyn ExprPlanner>> = vec![
-            Arc::new(functions::core::planner::CoreFunctionPlanner::default()),
-            // register crate of array expressions (if enabled)
-            #[cfg(feature = "array_expressions")]
-            Arc::new(functions_array::planner::ArrayFunctionPlanner),
-            #[cfg(feature = "array_expressions")]
-            Arc::new(functions_array::planner::FieldAccessPlanner),
-            #[cfg(any(
-                feature = "datetime_expressions",
-                feature = "unicode_expressions"
-            ))]
-            Arc::new(functions::planner::UserDefinedFunctionPlanner),
-        ];
-
-        let mut new_self = SessionState {
-            session_id,
-            analyzer: Analyzer::new(),
-            expr_planners,
-            optimizer: Optimizer::new(),
-            physical_optimizers: PhysicalOptimizer::new(),
-            query_planner: Arc::new(DefaultQueryPlanner {}),
-            catalog_list,
-            table_functions: HashMap::new(),
-            scalar_functions: HashMap::new(),
-            aggregate_functions: HashMap::new(),
-            window_functions: HashMap::new(),
-            serializer_registry: Arc::new(EmptySerializerRegistry),
-            file_formats: HashMap::new(),
-            table_options: TableOptions::default_from_session_config(config.options()),
-            config,
-            execution_props: ExecutionProps::new(),
-            runtime_env: runtime,
-            table_factories,
-            function_factory: None,
-        };
-
-        #[cfg(feature = "parquet")]
-        if let Err(e) =
-            new_self.register_file_format(Arc::new(ParquetFormatFactory::new()), false)
-        {
-            log::info!("Unable to register default ParquetFormat: {e}")
-        };
-
-        if let Err(e) =
-            new_self.register_file_format(Arc::new(JsonFormatFactory::new()), false)
-        {
-            log::info!("Unable to register default JsonFormat: {e}")
-        };
-
-        if let Err(e) =
-            new_self.register_file_format(Arc::new(CsvFormatFactory::new()), false)
-        {
-            log::info!("Unable to register default CsvFormat: {e}")
-        };
-
-        if let Err(e) =
-            new_self.register_file_format(Arc::new(ArrowFormatFactory::new()), false)
-        {
-            log::info!("Unable to register default ArrowFormat: {e}")
-        };
-
-        if let Err(e) =
-            new_self.register_file_format(Arc::new(AvroFormatFactory::new()), false)
-        {
-            log::info!("Unable to register default AvroFormat: {e}")
-        };
-
-        // register built in functions
-        functions::register_all(&mut new_self)
-            .expect("can not register built in functions");
-
-        // register crate of array expressions (if enabled)
-        #[cfg(feature = "array_expressions")]
-        functions_array::register_all(&mut new_self)
-            .expect("can not register array expressions");
-
-        functions_aggregate::register_all(&mut new_self)
-            .expect("can not register aggregate functions");
-
-        new_self
+        SessionStateBuilder::new()
+            .with_config(config)
+            .with_runtime_env(runtime)
+            .with_catalog_list(catalog_list)
+            .with_default_features()
+            .build()
     }
+
     /// Returns new [`SessionState`] using the provided
     /// [`SessionConfig`] and [`RuntimeEnv`].
-    #[deprecated(
-        since = "32.0.0",
-        note = "Use SessionState::new_with_config_rt_and_catalog_list"
-    )]
+    #[deprecated(since = "32.0.0", note = "Use SessionStateBuilder")]
     pub fn with_config_rt_and_catalog_list(
         config: SessionConfig,
         runtime: Arc<RuntimeEnv>,
         catalog_list: Arc<dyn CatalogProviderList>,
     ) -> Self {
-        Self::new_with_config_rt_and_catalog_list(config, runtime, catalog_list)
-    }
-    fn register_default_schema(
-        config: &SessionConfig,
-        table_factories: &HashMap<String, Arc<dyn TableProviderFactory>>,
-        runtime: &Arc<RuntimeEnv>,
-        default_catalog: &MemoryCatalogProvider,
-    ) {
-        let url = config.options().catalog.location.as_ref();
-        let format = config.options().catalog.format.as_ref();
-        let (url, format) = match (url, format) {
-            (Some(url), Some(format)) => (url, format),
-            _ => return,
-        };
-        let url = url.to_string();
-        let format = format.to_string();
-
-        let url = Url::parse(url.as_str()).expect("Invalid default catalog location!");
-        let authority = match url.host_str() {
-            Some(host) => format!("{}://{}", url.scheme(), host),
-            None => format!("{}://", url.scheme()),
-        };
-        let path = &url.as_str()[authority.len()..];
-        let path = object_store::path::Path::parse(path).expect("Can't parse path");
-        let store = ObjectStoreUrl::parse(authority.as_str())
-            .expect("Invalid default catalog url");
-        let store = match runtime.object_store(store) {
-            Ok(store) => store,
-            _ => return,
-        };
-        let factory = match table_factories.get(format.as_str()) {
-            Some(factory) => factory,
-            _ => return,
-        };
-        let schema =
-            ListingSchemaProvider::new(authority, path, factory.clone(), store, format);
-        let _ = default_catalog
-            .register_schema("default", Arc::new(schema))
-            .expect("Failed to register default schema");
+        SessionStateBuilder::new()
+            .with_config(config)
+            .with_runtime_env(runtime)
+            .with_catalog_list(catalog_list)
+            .with_default_features()
+            .build()
     }
 
     pub(crate) fn resolve_table_ref(
@@ -400,12 +285,14 @@ impl SessionState {
             })
     }
 
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     /// Replace the random session id.
     pub fn with_session_id(mut self, session_id: String) -> Self {
         self.session_id = session_id;
         self
     }
 
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     /// override default query planner with `query_planner`
     pub fn with_query_planner(
         mut self,
@@ -415,6 +302,7 @@ impl SessionState {
         self
     }
 
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     /// Override the [`AnalyzerRule`]s optimizer plan rules.
     pub fn with_analyzer_rules(
         mut self,
@@ -424,6 +312,7 @@ impl SessionState {
         self
     }
 
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     /// Replace the entire list of [`OptimizerRule`]s used to optimize plans
     pub fn with_optimizer_rules(
         mut self,
@@ -433,6 +322,7 @@ impl SessionState {
         self
     }
 
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     /// Replace the entire list of [`PhysicalOptimizerRule`]s used to optimize plans
     pub fn with_physical_optimizer_rules(
         mut self,
@@ -452,6 +342,7 @@ impl SessionState {
         self
     }
 
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     /// Add `optimizer_rule` to the end of the list of
     /// [`OptimizerRule`]s used to rewrite queries.
     pub fn add_optimizer_rule(
@@ -472,6 +363,7 @@ impl SessionState {
         self.optimizer.rules.push(optimizer_rule);
     }
 
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     /// Add `physical_optimizer_rule` to the end of the list of
     /// [`PhysicalOptimizerRule`]s used to rewrite queries.
     pub fn add_physical_optimizer_rule(
@@ -482,6 +374,7 @@ impl SessionState {
         self
     }
 
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     /// Adds a new [`ConfigExtension`] to TableOptions
     pub fn add_table_options_extension<T: ConfigExtension>(
         mut self,
@@ -491,6 +384,7 @@ impl SessionState {
         self
     }
 
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     /// Registers a [`FunctionFactory`] to handle `CREATE FUNCTION` statements
     pub fn with_function_factory(
         mut self,
@@ -505,6 +399,7 @@ impl SessionState {
         self.function_factory = Some(function_factory);
     }
 
+    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
     /// Replace the extension [`SerializerRegistry`]
     pub fn with_serializer_registry(
         mut self,
@@ -858,19 +753,20 @@ impl SessionState {
         &self.table_options
     }
 
-    /// Return mutable table opptions
+    /// Return mutable table options
     pub fn table_options_mut(&mut self) -> &mut TableOptions {
         &mut self.table_options
     }
 
-    /// Registers a [`ConfigExtension`] as a table option extention that can be
+    /// Registers a [`ConfigExtension`] as a table option extension that can be
     /// referenced from SQL statements executed against this context.
     pub fn register_table_options_extension<T: ConfigExtension>(&mut self, extension: T) {
         self.table_options.extensions.insert(extension)
     }
 
-    /// Adds or updates a [FileFormatFactory] which can be used with COPY TO or CREATE EXTERNAL TABLE statements for reading
-    /// and writing files of custom formats.
+    /// Adds or updates a [FileFormatFactory] which can be used with COPY TO or
+    /// CREATE EXTERNAL TABLE statements for reading and writing files of custom
+    /// formats.
     pub fn register_file_format(
         &mut self,
         file_format: Arc<dyn FileFormatFactory>,
@@ -950,7 +846,7 @@ impl SessionState {
         );
     }
 
-    /// Deregsiter a user defined table function
+    /// Deregister a user defined table function
     pub fn deregister_udtf(
         &mut self,
         name: &str,
@@ -974,6 +870,733 @@ impl SessionState {
     }
 }
 
+/// A builder to be used for building [`SessionState`]'s. Defaults will
+/// be used for all values unless explicitly provided.
+///
+/// See example on [`SessionState`]
+pub struct SessionStateBuilder {
+    session_id: Option<String>,
+    analyzer: Option<Analyzer>,
+    expr_planners: Option<Vec<Arc<dyn ExprPlanner>>>,
+    optimizer: Option<Optimizer>,
+    physical_optimizers: Option<PhysicalOptimizer>,
+    query_planner: Option<Arc<dyn QueryPlanner + Send + Sync>>,
+    catalog_list: Option<Arc<dyn CatalogProviderList>>,
+    table_functions: Option<HashMap<String, Arc<TableFunction>>>,
+    scalar_functions: Option<Vec<Arc<ScalarUDF>>>,
+    aggregate_functions: Option<Vec<Arc<AggregateUDF>>>,
+    window_functions: Option<Vec<Arc<WindowUDF>>>,
+    serializer_registry: Option<Arc<dyn SerializerRegistry>>,
+    file_formats: Option<Vec<Arc<dyn FileFormatFactory>>>,
+    config: Option<SessionConfig>,
+    table_options: Option<TableOptions>,
+    execution_props: Option<ExecutionProps>,
+    table_factories: Option<HashMap<String, Arc<dyn TableProviderFactory>>>,
+    runtime_env: Option<Arc<RuntimeEnv>>,
+    function_factory: Option<Arc<dyn FunctionFactory>>,
+    // fields to support convenience functions
+    analyzer_rules: Option<Vec<Arc<dyn AnalyzerRule + Send + Sync>>>,
+    optimizer_rules: Option<Vec<Arc<dyn OptimizerRule + Send + Sync>>>,
+    physical_optimizer_rules: Option<Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>>,
+}
+
+impl SessionStateBuilder {
+    /// Returns a new [`SessionStateBuilder`] with no options set.
+    pub fn new() -> Self {
+        Self {
+            session_id: None,
+            analyzer: None,
+            expr_planners: None,
+            optimizer: None,
+            physical_optimizers: None,
+            query_planner: None,
+            catalog_list: None,
+            table_functions: None,
+            scalar_functions: None,
+            aggregate_functions: None,
+            window_functions: None,
+            serializer_registry: None,
+            file_formats: None,
+            table_options: None,
+            config: None,
+            execution_props: None,
+            table_factories: None,
+            runtime_env: None,
+            function_factory: None,
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Returns a new [SessionStateBuilder] based on an existing [SessionState]
+    /// The session id for the new builder will be unset; all other fields will
+    /// be cloned from what is set in the provided session state
+    pub fn new_from_existing(existing: SessionState) -> Self {
+        Self {
+            session_id: None,
+            analyzer: Some(existing.analyzer),
+            expr_planners: Some(existing.expr_planners),
+            optimizer: Some(existing.optimizer),
+            physical_optimizers: Some(existing.physical_optimizers),
+            query_planner: Some(existing.query_planner),
+            catalog_list: Some(existing.catalog_list),
+            table_functions: Some(existing.table_functions),
+            scalar_functions: Some(existing.scalar_functions.into_values().collect_vec()),
+            aggregate_functions: Some(
+                existing.aggregate_functions.into_values().collect_vec(),
+            ),
+            window_functions: Some(existing.window_functions.into_values().collect_vec()),
+            serializer_registry: Some(existing.serializer_registry),
+            file_formats: Some(existing.file_formats.into_values().collect_vec()),
+            config: Some(existing.config),
+            table_options: Some(existing.table_options),
+            execution_props: Some(existing.execution_props),
+            table_factories: Some(existing.table_factories),
+            runtime_env: Some(existing.runtime_env),
+            function_factory: existing.function_factory,
+
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Set defaults for table_factories, file formats, expr_planners and builtin
+    /// scalar and aggregate functions.
+    pub fn with_default_features(mut self) -> Self {
+        self.table_factories = Some(SessionStateDefaults::default_table_factories());
+        self.file_formats = Some(SessionStateDefaults::default_file_formats());
+        self.expr_planners = Some(SessionStateDefaults::default_expr_planners());
+        self.scalar_functions = Some(SessionStateDefaults::default_scalar_functions());
+        self.aggregate_functions =
+            Some(SessionStateDefaults::default_aggregate_functions());
+        self
+    }
+
+    /// Set the session id.
+    pub fn with_session_id(mut self, session_id: String) -> Self {
+        self.session_id = Some(session_id);
+        self
+    }
+
+    /// Set the [`AnalyzerRule`]s optimizer plan rules.
+    pub fn with_analyzer_rules(
+        mut self,
+        rules: Vec<Arc<dyn AnalyzerRule + Send + Sync>>,
+    ) -> Self {
+        self.analyzer = Some(Analyzer::with_rules(rules));
+        self
+    }
+
+    /// Add `analyzer_rule` to the end of the list of
+    /// [`AnalyzerRule`]s used to rewrite queries.
+    pub fn with_analyzer_rule(
+        mut self,
+        analyzer_rule: Arc<dyn AnalyzerRule + Send + Sync>,
+    ) -> Self {
+        let mut rules = self.analyzer_rules.unwrap_or_default();
+        rules.push(analyzer_rule);
+        self.analyzer_rules = Some(rules);
+        self
+    }
+
+    /// Set the [`OptimizerRule`]s used to optimize plans.
+    pub fn with_optimizer_rules(
+        mut self,
+        rules: Vec<Arc<dyn OptimizerRule + Send + Sync>>,
+    ) -> Self {
+        self.optimizer = Some(Optimizer::with_rules(rules));
+        self
+    }
+
+    /// Add `optimizer_rule` to the end of the list of
+    /// [`OptimizerRule`]s used to rewrite queries.
+    pub fn with_optimizer_rule(
+        mut self,
+        optimizer_rule: Arc<dyn OptimizerRule + Send + Sync>,
+    ) -> Self {
+        let mut rules = self.optimizer_rules.unwrap_or_default();
+        rules.push(optimizer_rule);
+        self.optimizer_rules = Some(rules);
+        self
+    }
+
+    /// Set the [`ExprPlanner`]s used to customize the behavior of the SQL planner.
+    pub fn with_expr_planners(
+        mut self,
+        expr_planners: Vec<Arc<dyn ExprPlanner>>,
+    ) -> Self {
+        self.expr_planners = Some(expr_planners);
+        self
+    }
+
+    /// Set tje [`PhysicalOptimizerRule`]s used to optimize plans.
+    pub fn with_physical_optimizer_rules(
+        mut self,
+        physical_optimizers: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,
+    ) -> Self {
+        self.physical_optimizers =
+            Some(PhysicalOptimizer::with_rules(physical_optimizers));
+        self
+    }
+
+    /// Add `physical_optimizer_rule` to the end of the list of
+    /// [`PhysicalOptimizerRule`]s used to rewrite queries.
+    pub fn with_physical_optimizer_rule(
+        mut self,
+        physical_optimizer_rule: Arc<dyn PhysicalOptimizerRule + Send + Sync>,
+    ) -> Self {
+        let mut rules = self.physical_optimizer_rules.unwrap_or_default();
+        rules.push(physical_optimizer_rule);
+        self.physical_optimizer_rules = Some(rules);
+        self
+    }
+
+    /// Set the [`QueryPlanner`]
+    pub fn with_query_planner(
+        mut self,
+        query_planner: Arc<dyn QueryPlanner + Send + Sync>,
+    ) -> Self {
+        self.query_planner = Some(query_planner);
+        self
+    }
+
+    /// Set the [`CatalogProviderList`]
+    pub fn with_catalog_list(
+        mut self,
+        catalog_list: Arc<dyn CatalogProviderList>,
+    ) -> Self {
+        self.catalog_list = Some(catalog_list);
+        self
+    }
+
+    /// Set the map of [`TableFunction`]s
+    pub fn with_table_functions(
+        mut self,
+        table_functions: HashMap<String, Arc<TableFunction>>,
+    ) -> Self {
+        self.table_functions = Some(table_functions);
+        self
+    }
+
+    /// Set the map of [`ScalarUDF`]s
+    pub fn with_scalar_functions(
+        mut self,
+        scalar_functions: Vec<Arc<ScalarUDF>>,
+    ) -> Self {
+        self.scalar_functions = Some(scalar_functions);
+        self
+    }
+
+    /// Set the map of [`AggregateUDF`]s
+    pub fn with_aggregate_functions(
+        mut self,
+        aggregate_functions: Vec<Arc<AggregateUDF>>,
+    ) -> Self {
+        self.aggregate_functions = Some(aggregate_functions);
+        self
+    }
+
+    /// Set the map of [`WindowUDF`]s
+    pub fn with_window_functions(
+        mut self,
+        window_functions: Vec<Arc<WindowUDF>>,
+    ) -> Self {
+        self.window_functions = Some(window_functions);
+        self
+    }
+
+    /// Set the [`SerializerRegistry`]
+    pub fn with_serializer_registry(
+        mut self,
+        serializer_registry: Arc<dyn SerializerRegistry>,
+    ) -> Self {
+        self.serializer_registry = Some(serializer_registry);
+        self
+    }
+
+    /// Set the map of [`FileFormatFactory`]s
+    pub fn with_file_formats(
+        mut self,
+        file_formats: Vec<Arc<dyn FileFormatFactory>>,
+    ) -> Self {
+        self.file_formats = Some(file_formats);
+        self
+    }
+
+    /// Set the [`SessionConfig`]
+    pub fn with_config(mut self, config: SessionConfig) -> Self {
+        self.config = Some(config);
+        self
+    }
+
+    /// Set the [`TableOptions`]
+    pub fn with_table_options(mut self, table_options: TableOptions) -> Self {
+        self.table_options = Some(table_options);
+        self
+    }
+
+    /// Set the [`ExecutionProps`]
+    pub fn with_execution_props(mut self, execution_props: ExecutionProps) -> Self {
+        self.execution_props = Some(execution_props);
+        self
+    }
+
+    /// Set the map of [`TableProviderFactory`]s
+    pub fn with_table_factories(
+        mut self,
+        table_factories: HashMap<String, Arc<dyn TableProviderFactory>>,
+    ) -> Self {
+        self.table_factories = Some(table_factories);
+        self
+    }
+
+    /// Set the [`RuntimeEnv`]
+    pub fn with_runtime_env(mut self, runtime_env: Arc<RuntimeEnv>) -> Self {
+        self.runtime_env = Some(runtime_env);
+        self
+    }
+
+    /// Set a [`FunctionFactory`] to handle `CREATE FUNCTION` statements
+    pub fn with_function_factory(
+        mut self,
+        function_factory: Option<Arc<dyn FunctionFactory>>,
+    ) -> Self {
+        self.function_factory = function_factory;
+        self
+    }
+
+    /// Builds a [`SessionState`] with the current configuration.
+    ///
+    /// Note that there is an explicit option for enabling catalog and schema defaults
+    /// in [SessionConfig::create_default_catalog_and_schema] which if enabled
+    /// will be built here.
+    pub fn build(self) -> SessionState {
+        let Self {
+            session_id,
+            analyzer,
+            expr_planners,
+            optimizer,
+            physical_optimizers,
+            query_planner,
+            catalog_list,
+            table_functions,
+            scalar_functions,
+            aggregate_functions,
+            window_functions,
+            serializer_registry,
+            file_formats,
+            table_options,
+            config,
+            execution_props,
+            table_factories,
+            runtime_env,
+            function_factory,
+            analyzer_rules,
+            optimizer_rules,
+            physical_optimizer_rules,
+        } = self;
+
+        let config = config.unwrap_or_default();
+        let runtime_env = runtime_env.unwrap_or(Arc::new(RuntimeEnv::default()));
+
+        let mut state = SessionState {
+            session_id: session_id.unwrap_or(Uuid::new_v4().to_string()),
+            analyzer: analyzer.unwrap_or_default(),
+            expr_planners: expr_planners.unwrap_or_default(),
+            optimizer: optimizer.unwrap_or_default(),
+            physical_optimizers: physical_optimizers.unwrap_or_default(),
+            query_planner: query_planner.unwrap_or(Arc::new(DefaultQueryPlanner {})),
+            catalog_list: catalog_list
+                .unwrap_or(Arc::new(MemoryCatalogProviderList::new())
+                    as Arc<dyn CatalogProviderList>),
+            table_functions: table_functions.unwrap_or_default(),
+            scalar_functions: HashMap::new(),
+            aggregate_functions: HashMap::new(),
+            window_functions: HashMap::new(),
+            serializer_registry: serializer_registry
+                .unwrap_or(Arc::new(EmptySerializerRegistry)),
+            file_formats: HashMap::new(),
+            table_options: table_options
+                .unwrap_or(TableOptions::default_from_session_config(config.options())),
+            config,
+            execution_props: execution_props.unwrap_or_default(),
+            table_factories: table_factories.unwrap_or_default(),
+            runtime_env,
+            function_factory,
+        };
+
+        if let Some(file_formats) = file_formats {
+            for file_format in file_formats {
+                if let Err(e) = state.register_file_format(file_format, false) {
+                    info!("Unable to register file format: {e}")
+                };
+            }
+        }
+
+        if let Some(scalar_functions) = scalar_functions {
+            scalar_functions.into_iter().for_each(|udf| {
+                let existing_udf = state.register_udf(udf);
+                if let Ok(Some(existing_udf)) = existing_udf {
+                    debug!("Overwrote an existing UDF: {}", existing_udf.name());
+                }
+            });
+        }
+
+        if let Some(aggregate_functions) = aggregate_functions {
+            aggregate_functions.into_iter().for_each(|udaf| {
+                let existing_udf = state.register_udaf(udaf);
+                if let Ok(Some(existing_udf)) = existing_udf {
+                    debug!("Overwrote an existing UDF: {}", existing_udf.name());
+                }
+            });
+        }
+
+        if let Some(window_functions) = window_functions {
+            window_functions.into_iter().for_each(|udwf| {
+                let existing_udf = state.register_udwf(udwf);
+                if let Ok(Some(existing_udf)) = existing_udf {
+                    debug!("Overwrote an existing UDF: {}", existing_udf.name());
+                }
+            });
+        }
+
+        if state.config.create_default_catalog_and_schema() {
+            let default_catalog = SessionStateDefaults::default_catalog(
+                &state.config,
+                &state.table_factories,
+                &state.runtime_env,
+            );
+
+            state.catalog_list.register_catalog(
+                state.config.options().catalog.default_catalog.clone(),
+                Arc::new(default_catalog),
+            );
+        }
+
+        if let Some(analyzer_rules) = analyzer_rules {
+            for analyzer_rule in analyzer_rules {
+                state.analyzer.rules.push(analyzer_rule);
+            }
+        }
+
+        if let Some(optimizer_rules) = optimizer_rules {
+            for optimizer_rule in optimizer_rules {
+                state.optimizer.rules.push(optimizer_rule);
+            }
+        }
+
+        if let Some(physical_optimizer_rules) = physical_optimizer_rules {
+            for physical_optimizer_rule in physical_optimizer_rules {
+                state
+                    .physical_optimizers
+                    .rules
+                    .push(physical_optimizer_rule);
+            }
+        }
+
+        state
+    }
+
+    /// Returns the current session_id value
+    pub fn session_id(&self) -> &Option<String> {
+        &self.session_id
+    }
+
+    /// Returns the current analyzer value
+    pub fn analyzer(&mut self) -> &mut Option<Analyzer> {
+        &mut self.analyzer
+    }
+
+    /// Returns the current expr_planners value
+    pub fn expr_planners(&mut self) -> &mut Option<Vec<Arc<dyn ExprPlanner>>> {
+        &mut self.expr_planners
+    }
+
+    /// Returns the current optimizer value
+    pub fn optimizer(&mut self) -> &mut Option<Optimizer> {
+        &mut self.optimizer
+    }
+
+    /// Returns the current physical_optimizers value
+    pub fn physical_optimizers(&mut self) -> &mut Option<PhysicalOptimizer> {
+        &mut self.physical_optimizers
+    }
+
+    /// Returns the current query_planner value
+    pub fn query_planner(&mut self) -> &mut Option<Arc<dyn QueryPlanner + Send + Sync>> {
+        &mut self.query_planner
+    }
+
+    /// Returns the current catalog_list value
+    pub fn catalog_list(&mut self) -> &mut Option<Arc<dyn CatalogProviderList>> {
+        &mut self.catalog_list
+    }
+
+    /// Returns the current table_functions value
+    pub fn table_functions(
+        &mut self,
+    ) -> &mut Option<HashMap<String, Arc<TableFunction>>> {
+        &mut self.table_functions
+    }
+
+    /// Returns the current scalar_functions value
+    pub fn scalar_functions(&mut self) -> &mut Option<Vec<Arc<ScalarUDF>>> {
+        &mut self.scalar_functions
+    }
+
+    /// Returns the current aggregate_functions value
+    pub fn aggregate_functions(&mut self) -> &mut Option<Vec<Arc<AggregateUDF>>> {
+        &mut self.aggregate_functions
+    }
+
+    /// Returns the current window_functions value
+    pub fn window_functions(&mut self) -> &mut Option<Vec<Arc<WindowUDF>>> {
+        &mut self.window_functions
+    }
+
+    /// Returns the current serializer_registry value
+    pub fn serializer_registry(&mut self) -> &mut Option<Arc<dyn SerializerRegistry>> {
+        &mut self.serializer_registry
+    }
+
+    /// Returns the current file_formats value
+    pub fn file_formats(&mut self) -> &mut Option<Vec<Arc<dyn FileFormatFactory>>> {
+        &mut self.file_formats
+    }
+
+    /// Returns the current session_config value
+    pub fn config(&mut self) -> &mut Option<SessionConfig> {
+        &mut self.config
+    }
+
+    /// Returns the current table_options value
+    pub fn table_options(&mut self) -> &mut Option<TableOptions> {
+        &mut self.table_options
+    }
+
+    /// Returns the current execution_props value
+    pub fn execution_props(&mut self) -> &mut Option<ExecutionProps> {
+        &mut self.execution_props
+    }
+
+    /// Returns the current table_factories value
+    pub fn table_factories(
+        &mut self,
+    ) -> &mut Option<HashMap<String, Arc<dyn TableProviderFactory>>> {
+        &mut self.table_factories
+    }
+
+    /// Returns the current runtime_env value
+    pub fn runtime_env(&mut self) -> &mut Option<Arc<RuntimeEnv>> {
+        &mut self.runtime_env
+    }
+
+    /// Returns the current function_factory value
+    pub fn function_factory(&mut self) -> &mut Option<Arc<dyn FunctionFactory>> {
+        &mut self.function_factory
+    }
+
+    /// Returns the current analyzer_rules value
+    pub fn analyzer_rules(
+        &mut self,
+    ) -> &mut Option<Vec<Arc<dyn AnalyzerRule + Send + Sync>>> {
+        &mut self.analyzer_rules
+    }
+
+    /// Returns the current optimizer_rules value
+    pub fn optimizer_rules(
+        &mut self,
+    ) -> &mut Option<Vec<Arc<dyn OptimizerRule + Send + Sync>>> {
+        &mut self.optimizer_rules
+    }
+
+    /// Returns the current physical_optimizer_rules value
+    pub fn physical_optimizer_rules(
+        &mut self,
+    ) -> &mut Option<Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>> {
+        &mut self.physical_optimizer_rules
+    }
+}
+
+impl Default for SessionStateBuilder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl From<SessionState> for SessionStateBuilder {
+    fn from(state: SessionState) -> Self {
+        SessionStateBuilder::new_from_existing(state)
+    }
+}
+
+/// Defaults that are used as part of creating a SessionState such as table providers,
+/// file formats, registering of builtin functions, etc.
+pub struct SessionStateDefaults {}
+
+impl SessionStateDefaults {
+    /// returns a map of the default [`TableProviderFactory`]s
+    pub fn default_table_factories() -> HashMap<String, Arc<dyn TableProviderFactory>> {
+        let mut table_factories: HashMap<String, Arc<dyn TableProviderFactory>> =
+            HashMap::new();
+        #[cfg(feature = "parquet")]
+        table_factories.insert("PARQUET".into(), Arc::new(DefaultTableFactory::new()));
+        table_factories.insert("CSV".into(), Arc::new(DefaultTableFactory::new()));
+        table_factories.insert("JSON".into(), Arc::new(DefaultTableFactory::new()));
+        table_factories.insert("NDJSON".into(), Arc::new(DefaultTableFactory::new()));
+        table_factories.insert("AVRO".into(), Arc::new(DefaultTableFactory::new()));
+        table_factories.insert("ARROW".into(), Arc::new(DefaultTableFactory::new()));
+
+        table_factories
+    }
+
+    /// returns the default MemoryCatalogProvider
+    pub fn default_catalog(
+        config: &SessionConfig,
+        table_factories: &HashMap<String, Arc<dyn TableProviderFactory>>,
+        runtime: &Arc<RuntimeEnv>,
+    ) -> MemoryCatalogProvider {
+        let default_catalog = MemoryCatalogProvider::new();
+
+        default_catalog
+            .register_schema(
+                &config.options().catalog.default_schema,
+                Arc::new(MemorySchemaProvider::new()),
+            )
+            .expect("memory catalog provider can register schema");
+
+        Self::register_default_schema(config, table_factories, runtime, &default_catalog);
+
+        default_catalog
+    }
+
+    /// returns the list of default [`ExprPlanner`]s
+    pub fn default_expr_planners() -> Vec<Arc<dyn ExprPlanner>> {
+        let expr_planners: Vec<Arc<dyn ExprPlanner>> = vec![
+            Arc::new(functions::core::planner::CoreFunctionPlanner::default()),
+            // register crate of array expressions (if enabled)
+            #[cfg(feature = "array_expressions")]
+            Arc::new(functions_array::planner::ArrayFunctionPlanner),
+            #[cfg(feature = "array_expressions")]
+            Arc::new(functions_array::planner::FieldAccessPlanner),
+            #[cfg(any(
+                feature = "datetime_expressions",
+                feature = "unicode_expressions"
+            ))]
+            Arc::new(functions::planner::UserDefinedFunctionPlanner),
+        ];
+
+        expr_planners
+    }
+
+    /// returns the list of default [`ScalarUDF']'s
+    pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
+        let mut functions: Vec<Arc<ScalarUDF>> = functions::all_default_functions();
+        #[cfg(feature = "array_expressions")]
+        functions.append(&mut functions_array::all_default_array_functions());
+
+        functions
+    }
+
+    /// returns the list of default [`AggregateUDF']'s
+    pub fn default_aggregate_functions() -> Vec<Arc<AggregateUDF>> {
+        functions_aggregate::all_default_aggregate_functions()
+    }
+
+    /// returns the list of default [`FileFormatFactory']'s
+    pub fn default_file_formats() -> Vec<Arc<dyn FileFormatFactory>> {
+        let file_formats: Vec<Arc<dyn FileFormatFactory>> = vec![
+            #[cfg(feature = "parquet")]
+            Arc::new(ParquetFormatFactory::new()),
+            Arc::new(JsonFormatFactory::new()),
+            Arc::new(CsvFormatFactory::new()),
+            Arc::new(ArrowFormatFactory::new()),
+            Arc::new(AvroFormatFactory::new()),
+        ];
+
+        file_formats
+    }
+
+    /// registers all builtin functions - scalar, array and aggregate
+    pub fn register_builtin_functions(state: &mut SessionState) {
+        Self::register_scalar_functions(state);
+        Self::register_array_functions(state);
+        Self::register_aggregate_functions(state);
+    }
+
+    /// registers all the builtin scalar functions
+    pub fn register_scalar_functions(state: &mut SessionState) {
+        functions::register_all(state).expect("can not register built in functions");
+    }
+
+    /// registers all the builtin array functions
+    pub fn register_array_functions(state: &mut SessionState) {
+        // register crate of array expressions (if enabled)
+        #[cfg(feature = "array_expressions")]
+        functions_array::register_all(state).expect("can not register array expressions");
+    }
+
+    /// registers all the builtin aggregate functions
+    pub fn register_aggregate_functions(state: &mut SessionState) {
+        functions_aggregate::register_all(state)
+            .expect("can not register aggregate functions");
+    }
+
+    /// registers the default schema
+    pub fn register_default_schema(
+        config: &SessionConfig,
+        table_factories: &HashMap<String, Arc<dyn TableProviderFactory>>,
+        runtime: &Arc<RuntimeEnv>,
+        default_catalog: &MemoryCatalogProvider,
+    ) {
+        let url = config.options().catalog.location.as_ref();
+        let format = config.options().catalog.format.as_ref();
+        let (url, format) = match (url, format) {
+            (Some(url), Some(format)) => (url, format),
+            _ => return,
+        };
+        let url = url.to_string();
+        let format = format.to_string();
+
+        let url = Url::parse(url.as_str()).expect("Invalid default catalog location!");
+        let authority = match url.host_str() {
+            Some(host) => format!("{}://{}", url.scheme(), host),
+            None => format!("{}://", url.scheme()),
+        };
+        let path = &url.as_str()[authority.len()..];
+        let path = object_store::path::Path::parse(path).expect("Can't parse path");
+        let store = ObjectStoreUrl::parse(authority.as_str())
+            .expect("Invalid default catalog url");
+        let store = match runtime.object_store(store) {
+            Ok(store) => store,
+            _ => return,
+        };
+        let factory = match table_factories.get(format.as_str()) {
+            Some(factory) => factory,
+            _ => return,
+        };
+        let schema =
+            ListingSchemaProvider::new(authority, path, factory.clone(), store, format);
+        let _ = default_catalog
+            .register_schema("default", Arc::new(schema))
+            .expect("Failed to register default schema");
+    }
+
+    /// registers the default [`FileFormatFactory`]s
+    pub fn register_default_file_formats(state: &mut SessionState) {
+        let formats = SessionStateDefaults::default_file_formats();
+        for format in formats {
+            if let Err(e) = state.register_file_format(format, false) {
+                log::info!("Unable to register default file format: {e}")
+            };
+        }
+    }
+}
+
 struct SessionContextProvider<'a> {
     state: &'a SessionState,
     tables: HashMap<String, Arc<dyn TableSource>>,
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index d2bc334ec3248..efc83d8f6b5c2 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -2269,6 +2269,7 @@ mod tests {
     use crate::prelude::{SessionConfig, SessionContext};
     use crate::test_util::{scan_empty, scan_empty_with_partitions};
 
+    use crate::execution::session_state::SessionStateBuilder;
     use arrow::array::{ArrayRef, DictionaryArray, Int32Array};
     use arrow::datatypes::{DataType, Field, Int32Type};
     use datafusion_common::{assert_contains, DFSchemaRef, TableReference};
@@ -2282,7 +2283,11 @@ mod tests {
         let runtime = Arc::new(RuntimeEnv::default());
         let config = SessionConfig::new().with_target_partitions(4);
         let config = config.set_bool("datafusion.optimizer.skip_failed_rules", false);
-        SessionState::new_with_config_rt(config, runtime)
+        SessionStateBuilder::new()
+            .with_config(config)
+            .with_runtime_env(runtime)
+            .with_default_features()
+            .build()
     }
 
     async fn plan(logical_plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
diff --git a/datafusion/core/src/test/object_store.rs b/datafusion/core/src/test/object_store.rs
index bea6f7b9ceb7b..6c0a2fc7bec47 100644
--- a/datafusion/core/src/test/object_store.rs
+++ b/datafusion/core/src/test/object_store.rs
@@ -16,9 +16,8 @@
 // under the License.
 //! Object store implementation used for testing
 use crate::execution::context::SessionState;
+use crate::execution::session_state::SessionStateBuilder;
 use crate::prelude::SessionContext;
-use datafusion_execution::config::SessionConfig;
-use datafusion_execution::runtime_env::RuntimeEnv;
 use futures::FutureExt;
 use object_store::{memory::InMemory, path::Path, ObjectMeta, ObjectStore};
 use std::sync::Arc;
@@ -44,10 +43,7 @@ pub fn make_test_store_and_state(files: &[(&str, u64)]) -> (Arc<InMemory>, Sessi
 
     (
         Arc::new(memory),
-        SessionState::new_with_config_rt(
-            SessionConfig::default(),
-            Arc::new(RuntimeEnv::default()),
-        ),
+        SessionStateBuilder::new().with_default_features().build(),
     )
 }
 
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index f1d57c44293be..1b2a6770cf013 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -42,7 +42,8 @@ use url::Url;
 use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
 use datafusion::datasource::MemTable;
 use datafusion::error::Result;
-use datafusion::execution::context::{SessionContext, SessionState};
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::prelude::JoinType;
 use datafusion::prelude::{CsvReadOptions, ParquetReadOptions};
 use datafusion::test_util::{parquet_test_data, populate_csv_partitions};
@@ -1544,7 +1545,11 @@ async fn unnest_non_nullable_list() -> Result<()> {
 async fn test_read_batches() -> Result<()> {
     let config = SessionConfig::new();
     let runtime = Arc::new(RuntimeEnv::default());
-    let state = SessionState::new_with_config_rt(config, runtime);
+    let state = SessionStateBuilder::new()
+        .with_config(config)
+        .with_runtime_env(runtime)
+        .with_default_features()
+        .build();
     let ctx = SessionContext::new_with_state(state);
 
     let schema = Arc::new(Schema::new(vec![
@@ -1594,7 +1599,11 @@ async fn test_read_batches() -> Result<()> {
 async fn test_read_batches_empty() -> Result<()> {
     let config = SessionConfig::new();
     let runtime = Arc::new(RuntimeEnv::default());
-    let state = SessionState::new_with_config_rt(config, runtime);
+    let state = SessionStateBuilder::new()
+        .with_config(config)
+        .with_runtime_env(runtime)
+        .with_default_features()
+        .build();
     let ctx = SessionContext::new_with_state(state);
 
     let batches = vec![];
@@ -1608,9 +1617,7 @@ async fn test_read_batches_empty() -> Result<()> {
 
 #[tokio::test]
 async fn consecutive_projection_same_schema() -> Result<()> {
-    let config = SessionConfig::new();
-    let runtime = Arc::new(RuntimeEnv::default());
-    let state = SessionState::new_with_config_rt(config, runtime);
+    let state = SessionStateBuilder::new().with_default_features().build();
     let ctx = SessionContext::new_with_state(state);
 
     let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index 7ef24609e238d..1d151f9fd3683 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -38,6 +38,7 @@ use datafusion::datasource::{MemTable, TableProvider};
 use datafusion::execution::context::SessionState;
 use datafusion::execution::disk_manager::DiskManagerConfig;
 use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::physical_optimizer::join_selection::JoinSelection;
 use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream};
@@ -459,13 +460,16 @@ impl TestCase {
         let runtime = RuntimeEnv::new(rt_config).unwrap();
 
         // Configure execution
-        let state = SessionState::new_with_config_rt(config, Arc::new(runtime));
-        let state = match scenario.rules() {
-            Some(rules) => state.with_physical_optimizer_rules(rules),
-            None => state,
+        let builder = SessionStateBuilder::new()
+            .with_config(config)
+            .with_runtime_env(Arc::new(runtime))
+            .with_default_features();
+        let builder = match scenario.rules() {
+            Some(rules) => builder.with_physical_optimizer_rules(rules),
+            None => builder,
         };
 
-        let ctx = SessionContext::new_with_state(state);
+        let ctx = SessionContext::new_with_state(builder.build());
         ctx.register_table("t", table).expect("registering table");
 
         let query = query.expect("Test error: query not specified");
diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs
index 9f94a59a3e598..bf25b36f48e8b 100644
--- a/datafusion/core/tests/parquet/file_statistics.rs
+++ b/datafusion/core/tests/parquet/file_statistics.rs
@@ -35,6 +35,7 @@ use datafusion_execution::cache::cache_unit::{
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
 
+use datafusion::execution::session_state::SessionStateBuilder;
 use tempfile::tempdir;
 
 #[tokio::test]
@@ -167,10 +168,7 @@ async fn get_listing_table(
 ) -> ListingTable {
     let schema = opt
         .infer_schema(
-            &SessionState::new_with_config_rt(
-                SessionConfig::default(),
-                Arc::new(RuntimeEnv::default()),
-            ),
+            &SessionStateBuilder::new().with_default_features().build(),
             table_path,
         )
         .await
diff --git a/datafusion/core/tests/sql/create_drop.rs b/datafusion/core/tests/sql/create_drop.rs
index 2174009b85573..83712053b9542 100644
--- a/datafusion/core/tests/sql/create_drop.rs
+++ b/datafusion/core/tests/sql/create_drop.rs
@@ -15,18 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion::execution::context::SessionState;
-use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::test_util::TestTableFactory;
 
 use super::*;
 
 #[tokio::test]
 async fn create_custom_table() -> Result<()> {
-    let cfg = RuntimeConfig::new();
-    let env = RuntimeEnv::new(cfg).unwrap();
-    let ses = SessionConfig::new();
-    let mut state = SessionState::new_with_config_rt(ses, Arc::new(env));
+    let mut state = SessionStateBuilder::new().with_default_features().build();
     state
         .table_factories_mut()
         .insert("DELTATABLE".to_string(), Arc::new(TestTableFactory {}));
@@ -45,10 +41,7 @@ async fn create_custom_table() -> Result<()> {
 
 #[tokio::test]
 async fn create_external_table_with_ddl() -> Result<()> {
-    let cfg = RuntimeConfig::new();
-    let env = RuntimeEnv::new(cfg).unwrap();
-    let ses = SessionConfig::new();
-    let mut state = SessionState::new_with_config_rt(ses, Arc::new(env));
+    let mut state = SessionStateBuilder::new().with_default_features().build();
     state
         .table_factories_mut()
         .insert("MOCKTABLE".to_string(), Arc::new(TestTableFactory {}));
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index 38ed142cf922f..a44f522ba95ac 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -92,6 +92,7 @@ use datafusion::{
 };
 
 use async_trait::async_trait;
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::ScalarValue;
@@ -290,10 +291,14 @@ async fn topk_plan() -> Result<()> {
 fn make_topk_context() -> SessionContext {
     let config = SessionConfig::new().with_target_partitions(48);
     let runtime = Arc::new(RuntimeEnv::default());
-    let mut state = SessionState::new_with_config_rt(config, runtime)
+    let state = SessionStateBuilder::new()
+        .with_config(config)
+        .with_runtime_env(runtime)
+        .with_default_features()
         .with_query_planner(Arc::new(TopKQueryPlanner {}))
-        .add_optimizer_rule(Arc::new(TopKOptimizerRule {}));
-    state.add_analyzer_rule(Arc::new(MyAnalyzerRule {}));
+        .with_optimizer_rule(Arc::new(TopKOptimizerRule {}))
+        .with_analyzer_rule(Arc::new(MyAnalyzerRule {}))
+        .build();
     SessionContext::new_with_state(state)
 }
 
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index f764a050a6cdd..d0209d811b7ce 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -39,8 +39,7 @@ use prost::Message;
 
 use datafusion::datasource::provider::TableProviderFactory;
 use datafusion::datasource::TableProvider;
-use datafusion::execution::context::SessionState;
-use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::execution::FunctionRegistry;
 use datafusion::functions_aggregate::count::count_udaf;
 use datafusion::functions_aggregate::expr_fn::{
@@ -202,10 +201,7 @@ async fn roundtrip_custom_tables() -> Result<()> {
     let mut table_factories: HashMap<String, Arc<dyn TableProviderFactory>> =
         HashMap::new();
     table_factories.insert("TESTTABLE".to_string(), Arc::new(TestTableFactory {}));
-    let cfg = RuntimeConfig::new();
-    let env = RuntimeEnv::new(cfg).unwrap();
-    let ses = SessionConfig::new();
-    let mut state = SessionState::new_with_config_rt(ses, Arc::new(env));
+    let mut state = SessionStateBuilder::new().with_default_features().build();
     // replace factories
     *state.table_factories_mut() = table_factories;
     let ctx = SessionContext::new_with_state(state);
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 2893b1a31a26c..5b2d0fbacaef0 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -28,7 +28,6 @@ use std::sync::Arc;
 use datafusion::arrow::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit};
 use datafusion::common::{not_impl_err, plan_err, DFSchema, DFSchemaRef};
 use datafusion::error::Result;
-use datafusion::execution::context::SessionState;
 use datafusion::execution::registry::SerializerRegistry;
 use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::logical_expr::{
@@ -37,6 +36,7 @@ use datafusion::logical_expr::{
 use datafusion::optimizer::simplify_expressions::expr_simplifier::THRESHOLD_INLINE_INLIST;
 use datafusion::prelude::*;
 
+use datafusion::execution::session_state::SessionStateBuilder;
 use substrait::proto::extensions::simple_extension_declaration::MappingType;
 use substrait::proto::rel::RelType;
 use substrait::proto::{plan_rel, Plan, Rel};
@@ -1121,11 +1121,12 @@ async fn function_extension_info(sql: &str) -> Result<(Vec<String>, Vec<u32>)> {
 }
 
 async fn create_context() -> Result<SessionContext> {
-    let mut state = SessionState::new_with_config_rt(
-        SessionConfig::default(),
-        Arc::new(RuntimeEnv::default()),
-    )
-    .with_serializer_registry(Arc::new(MockSerializerRegistry));
+    let mut state = SessionStateBuilder::new()
+        .with_config(SessionConfig::default())
+        .with_runtime_env(Arc::new(RuntimeEnv::default()))
+        .with_default_features()
+        .with_serializer_registry(Arc::new(MockSerializerRegistry))
+        .build();
 
     // register udaf for test, e.g. `sum()`
     datafusion_functions_aggregate::register_all(&mut state)

From bfd815622f1fe2c84d6fab32596b83ffbe52a84a Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Sun, 14 Jul 2024 12:06:14 -0700
Subject: [PATCH 051/357] integrate consumer tests, implement tpch query 18 to
 22 (#11462)

---
 .../tests/cases/consumer_integration.rs       |  191 ++
 .../tpch_substrait_plans/query_18.json        | 1128 ++++++++
 .../tpch_substrait_plans/query_19.json        | 2386 +++++++++++++++++
 .../tpch_substrait_plans/query_20.json        | 1273 +++++++++
 .../tpch_substrait_plans/query_21.json        | 1493 +++++++++++
 .../tpch_substrait_plans/query_22.json        | 2034 ++++++++++++++
 6 files changed, 8505 insertions(+)
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_18.json
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_19.json
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_20.json
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_21.json
 create mode 100644 datafusion/substrait/tests/testdata/tpch_substrait_plans/query_22.json

diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs
index c8130220ef4ae..8fbcd721166e3 100644
--- a/datafusion/substrait/tests/cases/consumer_integration.rs
+++ b/datafusion/substrait/tests/cases/consumer_integration.rs
@@ -398,4 +398,195 @@ mod tests {
         \n            TableScan: FILENAME_PLACEHOLDER_1 projection=[p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment]");
         Ok(())
     }
+    /// this test has some problem in json file internally, gonna fix it
+    #[ignore]
+    #[tokio::test]
+    async fn tpch_test_17() -> Result<()> {
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/lineitem.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/part.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/lineitem.csv"),
+        ])
+        .await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_17.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let _plan = from_substrait_plan(&ctx, &proto).await?;
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpch_test_18() -> Result<()> {
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/customer.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/orders.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/lineitem.csv"),
+            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/lineitem.csv"),
+        ])
+        .await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_18.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.c_name AS C_NAME, FILENAME_PLACEHOLDER_0.c_custkey AS C_CUSTKEY, FILENAME_PLACEHOLDER_1.o_orderkey AS O_ORDERKEY, FILENAME_PLACEHOLDER_1.o_orderdate AS O_ORDERDATE, FILENAME_PLACEHOLDER_1.o_totalprice AS O_TOTALPRICE, sum(FILENAME_PLACEHOLDER_2.l_quantity) AS EXPR$5\
+        \n  Limit: skip=0, fetch=100\
+        \n    Sort: FILENAME_PLACEHOLDER_1.o_totalprice DESC NULLS FIRST, FILENAME_PLACEHOLDER_1.o_orderdate ASC NULLS LAST\
+        \n      Aggregate: groupBy=[[FILENAME_PLACEHOLDER_0.c_name, FILENAME_PLACEHOLDER_0.c_custkey, FILENAME_PLACEHOLDER_1.o_orderkey, FILENAME_PLACEHOLDER_1.o_orderdate, FILENAME_PLACEHOLDER_1.o_totalprice]], aggr=[[sum(FILENAME_PLACEHOLDER_2.l_quantity)]]\
+        \n        Projection: FILENAME_PLACEHOLDER_0.c_name, FILENAME_PLACEHOLDER_0.c_custkey, FILENAME_PLACEHOLDER_1.o_orderkey, FILENAME_PLACEHOLDER_1.o_orderdate, FILENAME_PLACEHOLDER_1.o_totalprice, FILENAME_PLACEHOLDER_2.l_quantity\
+        \n          Filter: CAST(FILENAME_PLACEHOLDER_1.o_orderkey IN (<subquery>) AS Boolean) AND FILENAME_PLACEHOLDER_0.c_custkey = FILENAME_PLACEHOLDER_1.o_custkey AND FILENAME_PLACEHOLDER_1.o_orderkey = FILENAME_PLACEHOLDER_2.l_orderkey\
+        \n            Subquery:\
+        \n              Projection: FILENAME_PLACEHOLDER_3.l_orderkey\
+        \n                Filter: sum(FILENAME_PLACEHOLDER_3.l_quantity) > CAST(Int32(300) AS Decimal128(19, 0))\
+        \n                  Aggregate: groupBy=[[FILENAME_PLACEHOLDER_3.l_orderkey]], aggr=[[sum(FILENAME_PLACEHOLDER_3.l_quantity)]]\
+        \n                    Projection: FILENAME_PLACEHOLDER_3.l_orderkey, FILENAME_PLACEHOLDER_3.l_quantity\
+        \n                      TableScan: FILENAME_PLACEHOLDER_3 projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment]\
+        \n            Inner Join:  Filter: Boolean(true)\
+        \n              Inner Join:  Filter: Boolean(true)\
+        \n                TableScan: FILENAME_PLACEHOLDER_0 projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment]\
+        \n                TableScan: FILENAME_PLACEHOLDER_1 projection=[o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment]\
+        \n              TableScan: FILENAME_PLACEHOLDER_2 projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment]");
+        Ok(())
+    }
+    #[tokio::test]
+    async fn tpch_test_19() -> Result<()> {
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/lineitem.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/part.csv"),
+        ])
+        .await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_19.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Aggregate: groupBy=[[]], aggr=[[sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount) AS REVENUE]]\n  Projection: FILENAME_PLACEHOLDER_0.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_0.l_discount)\
+        \n    Filter: FILENAME_PLACEHOLDER_1.p_partkey = FILENAME_PLACEHOLDER_0.l_partkey AND FILENAME_PLACEHOLDER_1.p_brand = CAST(Utf8(\"Brand#12\") AS Utf8) AND (FILENAME_PLACEHOLDER_1.p_container = Utf8(\"SM CASE\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"SM BOX\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"SM PACK\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"SM PKG\")) AND FILENAME_PLACEHOLDER_0.l_quantity >= CAST(Int32(1) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_0.l_quantity <= CAST(Int32(1) + Int32(10) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_1.p_size >= Int32(1) AND FILENAME_PLACEHOLDER_1.p_size <= Int32(5) AND (FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR\") OR FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR REG\")) AND FILENAME_PLACEHOLDER_0.l_shipinstruct = CAST(Utf8(\"DELIVER IN PERSON\") AS Utf8) OR FILENAME_PLACEHOLDER_1.p_partkey = FILENAME_PLACEHOLDER_0.l_partkey AND FILENAME_PLACEHOLDER_1.p_brand = CAST(Utf8(\"Brand#23\") AS Utf8) AND (FILENAME_PLACEHOLDER_1.p_container = Utf8(\"MED BAG\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"MED BOX\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"MED PKG\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"MED PACK\")) AND FILENAME_PLACEHOLDER_0.l_quantity >= CAST(Int32(10) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_0.l_quantity <= CAST(Int32(10) + Int32(10) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_1.p_size >= Int32(1) AND FILENAME_PLACEHOLDER_1.p_size <= Int32(10) AND (FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR\") OR FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR REG\")) AND FILENAME_PLACEHOLDER_0.l_shipinstruct = CAST(Utf8(\"DELIVER IN PERSON\") AS Utf8) OR FILENAME_PLACEHOLDER_1.p_partkey = FILENAME_PLACEHOLDER_0.l_partkey AND FILENAME_PLACEHOLDER_1.p_brand = CAST(Utf8(\"Brand#34\") AS Utf8) AND (FILENAME_PLACEHOLDER_1.p_container = Utf8(\"LG CASE\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"LG BOX\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"LG PACK\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"LG PKG\")) AND FILENAME_PLACEHOLDER_0.l_quantity >= CAST(Int32(20) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_0.l_quantity <= CAST(Int32(20) + Int32(10) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_1.p_size >= Int32(1) AND FILENAME_PLACEHOLDER_1.p_size <= Int32(15) AND (FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR\") OR FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR REG\")) AND FILENAME_PLACEHOLDER_0.l_shipinstruct = CAST(Utf8(\"DELIVER IN PERSON\") AS Utf8)\
+        \n      Inner Join:  Filter: Boolean(true)\
+        \n        TableScan: FILENAME_PLACEHOLDER_0 projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment]\
+        \n        TableScan: FILENAME_PLACEHOLDER_1 projection=[p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment]");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpch_test_20() -> Result<()> {
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/supplier.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/nation.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/partsupp.csv"),
+            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/part.csv"),
+            ("FILENAME_PLACEHOLDER_4", "tests/testdata/tpch/lineitem.csv"),
+        ])
+        .await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_20.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.s_name AS S_NAME, FILENAME_PLACEHOLDER_0.s_address AS S_ADDRESS\
+        \n  Sort: FILENAME_PLACEHOLDER_0.s_name ASC NULLS LAST\
+        \n    Projection: FILENAME_PLACEHOLDER_0.s_name, FILENAME_PLACEHOLDER_0.s_address\
+        \n      Filter: CAST(FILENAME_PLACEHOLDER_0.s_suppkey IN (<subquery>) AS Boolean) AND FILENAME_PLACEHOLDER_0.s_nationkey = FILENAME_PLACEHOLDER_1.n_nationkey AND FILENAME_PLACEHOLDER_1.n_name = CAST(Utf8(\"CANADA\") AS Utf8)\
+        \n        Subquery:\
+        \n          Projection: FILENAME_PLACEHOLDER_2.ps_suppkey\
+        \n            Filter: CAST(FILENAME_PLACEHOLDER_2.ps_partkey IN (<subquery>) AS Boolean) AND CAST(FILENAME_PLACEHOLDER_2.ps_availqty AS Decimal128(19, 1)) > (<subquery>)\
+        \n              Subquery:\
+        \n                Projection: FILENAME_PLACEHOLDER_3.p_partkey\
+        \n                  Filter: FILENAME_PLACEHOLDER_3.p_name LIKE CAST(Utf8(\"forest%\") AS Utf8)\
+        \n                    TableScan: FILENAME_PLACEHOLDER_3 projection=[p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment]\
+        \n              Subquery:\
+        \n                Projection: Decimal128(Some(5),2,1) * sum(FILENAME_PLACEHOLDER_4.l_quantity)\
+        \n                  Aggregate: groupBy=[[]], aggr=[[sum(FILENAME_PLACEHOLDER_4.l_quantity)]]\
+        \n                    Projection: FILENAME_PLACEHOLDER_4.l_quantity\
+        \n                      Filter: FILENAME_PLACEHOLDER_4.l_partkey = FILENAME_PLACEHOLDER_4.l_orderkey AND FILENAME_PLACEHOLDER_4.l_suppkey = FILENAME_PLACEHOLDER_4.l_partkey AND FILENAME_PLACEHOLDER_4.l_shipdate >= CAST(Utf8(\"1994-01-01\") AS Date32) AND FILENAME_PLACEHOLDER_4.l_shipdate < CAST(Utf8(\"1995-01-01\") AS Date32)\
+        \n                        TableScan: FILENAME_PLACEHOLDER_4 projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment]\
+        \n              TableScan: FILENAME_PLACEHOLDER_2 projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment]\
+        \n        Inner Join:  Filter: Boolean(true)\
+        \n          TableScan: FILENAME_PLACEHOLDER_0 projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]\
+        \n          TableScan: FILENAME_PLACEHOLDER_1 projection=[n_nationkey, n_name, n_regionkey, n_comment]");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpch_test_21() -> Result<()> {
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/supplier.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/lineitem.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/orders.csv"),
+            ("FILENAME_PLACEHOLDER_3", "tests/testdata/tpch/nation.csv"),
+            ("FILENAME_PLACEHOLDER_4", "tests/testdata/tpch/lineitem.csv"),
+            ("FILENAME_PLACEHOLDER_5", "tests/testdata/tpch/lineitem.csv"),
+        ])
+        .await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_21.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.s_name AS S_NAME, count(Int64(1)) AS NUMWAIT\
+        \n  Limit: skip=0, fetch=100\
+        \n    Sort: count(Int64(1)) DESC NULLS FIRST, FILENAME_PLACEHOLDER_0.s_name ASC NULLS LAST\
+        \n      Aggregate: groupBy=[[FILENAME_PLACEHOLDER_0.s_name]], aggr=[[count(Int64(1))]]\
+        \n        Projection: FILENAME_PLACEHOLDER_0.s_name\
+        \n          Filter: FILENAME_PLACEHOLDER_0.s_suppkey = FILENAME_PLACEHOLDER_1.l_suppkey AND FILENAME_PLACEHOLDER_2.o_orderkey = FILENAME_PLACEHOLDER_1.l_orderkey AND FILENAME_PLACEHOLDER_2.o_orderstatus = Utf8(\"F\") AND FILENAME_PLACEHOLDER_1.l_receiptdate > FILENAME_PLACEHOLDER_1.l_commitdate AND EXISTS (<subquery>) AND NOT EXISTS (<subquery>) AND FILENAME_PLACEHOLDER_0.s_nationkey = FILENAME_PLACEHOLDER_3.n_nationkey AND FILENAME_PLACEHOLDER_3.n_name = CAST(Utf8(\"SAUDI ARABIA\") AS Utf8)\
+        \n            Subquery:\
+        \n              Filter: FILENAME_PLACEHOLDER_4.l_orderkey = FILENAME_PLACEHOLDER_4.l_tax AND FILENAME_PLACEHOLDER_4.l_suppkey != FILENAME_PLACEHOLDER_4.l_linestatus\
+        \n                TableScan: FILENAME_PLACEHOLDER_4 projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment]\
+        \n            Subquery:\
+        \n              Filter: FILENAME_PLACEHOLDER_5.l_orderkey = FILENAME_PLACEHOLDER_5.l_tax AND FILENAME_PLACEHOLDER_5.l_suppkey != FILENAME_PLACEHOLDER_5.l_linestatus AND FILENAME_PLACEHOLDER_5.l_receiptdate > FILENAME_PLACEHOLDER_5.l_commitdate\
+        \n                TableScan: FILENAME_PLACEHOLDER_5 projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment]\
+        \n            Inner Join:  Filter: Boolean(true)\
+        \n              Inner Join:  Filter: Boolean(true)\
+        \n                Inner Join:  Filter: Boolean(true)\
+        \n                  TableScan: FILENAME_PLACEHOLDER_0 projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]\
+        \n                  TableScan: FILENAME_PLACEHOLDER_1 projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment]\n                TableScan: FILENAME_PLACEHOLDER_2 projection=[o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment]\
+        \n              TableScan: FILENAME_PLACEHOLDER_3 projection=[n_nationkey, n_name, n_regionkey, n_comment]");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn tpch_test_22() -> Result<()> {
+        let ctx = create_context(vec![
+            ("FILENAME_PLACEHOLDER_0", "tests/testdata/tpch/customer.csv"),
+            ("FILENAME_PLACEHOLDER_1", "tests/testdata/tpch/customer.csv"),
+            ("FILENAME_PLACEHOLDER_2", "tests/testdata/tpch/orders.csv"),
+        ])
+        .await?;
+        let path = "tests/testdata/tpch_substrait_plans/query_22.json";
+        let proto = serde_json::from_reader::<_, Plan>(BufReader::new(
+            File::open(path).expect("file not found"),
+        ))
+        .expect("failed to parse json");
+
+        let plan = from_substrait_plan(&ctx, &proto).await?;
+        let plan_str = format!("{:?}", plan);
+        assert_eq!(plan_str, "Projection: substr(FILENAME_PLACEHOLDER_0.c_phone,Int32(1),Int32(2)) AS CNTRYCODE, count(Int64(1)) AS NUMCUST, sum(FILENAME_PLACEHOLDER_0.c_acctbal) AS TOTACCTBAL\n  Sort: substr(FILENAME_PLACEHOLDER_0.c_phone,Int32(1),Int32(2)) ASC NULLS LAST\
+        \n    Aggregate: groupBy=[[substr(FILENAME_PLACEHOLDER_0.c_phone,Int32(1),Int32(2))]], aggr=[[count(Int64(1)), sum(FILENAME_PLACEHOLDER_0.c_acctbal)]]\
+        \n      Projection: substr(FILENAME_PLACEHOLDER_0.c_phone, Int32(1), Int32(2)), FILENAME_PLACEHOLDER_0.c_acctbal\
+        \n        Filter: (substr(FILENAME_PLACEHOLDER_0.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"13\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_0.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"31\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_0.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"23\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_0.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"29\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_0.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"30\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_0.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"18\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_0.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"17\") AS Utf8)) AND FILENAME_PLACEHOLDER_0.c_acctbal > (<subquery>) AND NOT EXISTS (<subquery>)\
+        \n          Subquery:\
+        \n            Aggregate: groupBy=[[]], aggr=[[avg(FILENAME_PLACEHOLDER_1.c_acctbal)]]\
+        \n              Projection: FILENAME_PLACEHOLDER_1.c_acctbal\
+        \n                Filter: FILENAME_PLACEHOLDER_1.c_acctbal > Decimal128(Some(0),3,2) AND (substr(FILENAME_PLACEHOLDER_1.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"13\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_1.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"31\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_1.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"23\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_1.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"29\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_1.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"30\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_1.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"18\") AS Utf8) OR substr(FILENAME_PLACEHOLDER_1.c_phone, Int32(1), Int32(2)) = CAST(Utf8(\"17\") AS Utf8))\
+        \n                  TableScan: FILENAME_PLACEHOLDER_1 projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment]\n          Subquery:\
+        \n            Filter: FILENAME_PLACEHOLDER_2.o_custkey = FILENAME_PLACEHOLDER_2.o_orderkey\
+        \n              TableScan: FILENAME_PLACEHOLDER_2 projection=[o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment]\
+        \n          TableScan: FILENAME_PLACEHOLDER_0 projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment]");
+        Ok(())
+    }
 }
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_18.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_18.json
new file mode 100644
index 0000000000000..a4f0b25db9562
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_18.json
@@ -0,0 +1,1128 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 1,
+      "uri": "/functions_boolean.yaml"
+    },
+    {
+      "extensionUriAnchor": 2,
+      "uri": "/functions_arithmetic_decimal.yaml"
+    },
+    {
+      "extensionUriAnchor": 3,
+      "uri": "/functions_comparison.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "and:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 1,
+        "name": "sum:opt_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 2,
+        "name": "gt:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 3,
+        "name": "equal:any1_any1"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "fetch": {
+            "common": {
+              "direct": {
+              }
+            },
+            "input": {
+              "sort": {
+                "common": {
+                  "direct": {
+                  }
+                },
+                "input": {
+                  "aggregate": {
+                    "common": {
+                      "direct": {
+                      }
+                    },
+                    "input": {
+                      "project": {
+                        "common": {
+                          "emit": {
+                            "outputMapping": [
+                              33,
+                              34,
+                              35,
+                              36,
+                              37,
+                              38
+                            ]
+                          }
+                        },
+                        "input": {
+                          "filter": {
+                            "common": {
+                              "direct": {
+                              }
+                            },
+                            "input": {
+                              "join": {
+                                "common": {
+                                  "direct": {
+                                  }
+                                },
+                                "left": {
+                                  "join": {
+                                    "common": {
+                                      "direct": {
+                                      }
+                                    },
+                                    "left": {
+                                      "read": {
+                                        "common": {
+                                          "direct": {
+                                          }
+                                        },
+                                        "baseSchema": {
+                                          "names": [
+                                            "C_CUSTKEY",
+                                            "C_NAME",
+                                            "C_ADDRESS",
+                                            "C_NATIONKEY",
+                                            "C_PHONE",
+                                            "C_ACCTBAL",
+                                            "C_MKTSEGMENT",
+                                            "C_COMMENT"
+                                          ],
+                                          "struct": {
+                                            "types": [
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "varchar": {
+                                                  "length": 25,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "varchar": {
+                                                  "length": 40,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "decimal": {
+                                                  "scale": 0,
+                                                  "precision": 19,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 10,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "varchar": {
+                                                  "length": 117,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }
+                                            ],
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "local_files": {
+                                          "items": [
+                                            {
+                                              "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                                              "parquet": {}
+                                            }
+                                          ]
+                                        }
+                                      }
+                                    },
+                                    "right": {
+                                      "read": {
+                                        "common": {
+                                          "direct": {
+                                          }
+                                        },
+                                        "baseSchema": {
+                                          "names": [
+                                            "O_ORDERKEY",
+                                            "O_CUSTKEY",
+                                            "O_ORDERSTATUS",
+                                            "O_TOTALPRICE",
+                                            "O_ORDERDATE",
+                                            "O_ORDERPRIORITY",
+                                            "O_CLERK",
+                                            "O_SHIPPRIORITY",
+                                            "O_COMMENT"
+                                          ],
+                                          "struct": {
+                                            "types": [
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 1,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "decimal": {
+                                                  "scale": 0,
+                                                  "precision": 19,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "date": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "i32": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "varchar": {
+                                                  "length": 79,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }
+                                            ],
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "local_files": {
+                                          "items": [
+                                            {
+                                              "uri_file": "file://FILENAME_PLACEHOLDER_1",
+                                              "parquet": {}
+                                            }
+                                          ]
+                                        }
+                                      }
+                                    },
+                                    "expression": {
+                                      "literal": {
+                                        "boolean": true,
+                                        "nullable": false,
+                                        "typeVariationReference": 0
+                                      }
+                                    },
+                                    "type": "JOIN_TYPE_INNER"
+                                  }
+                                },
+                                "right": {
+                                  "read": {
+                                    "common": {
+                                      "direct": {
+                                      }
+                                    },
+                                    "baseSchema": {
+                                      "names": [
+                                        "L_ORDERKEY",
+                                        "L_PARTKEY",
+                                        "L_SUPPKEY",
+                                        "L_LINENUMBER",
+                                        "L_QUANTITY",
+                                        "L_EXTENDEDPRICE",
+                                        "L_DISCOUNT",
+                                        "L_TAX",
+                                        "L_RETURNFLAG",
+                                        "L_LINESTATUS",
+                                        "L_SHIPDATE",
+                                        "L_COMMITDATE",
+                                        "L_RECEIPTDATE",
+                                        "L_SHIPINSTRUCT",
+                                        "L_SHIPMODE",
+                                        "L_COMMENT"
+                                      ],
+                                      "struct": {
+                                        "types": [
+                                          {
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          },
+                                          {
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          },
+                                          {
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          },
+                                          {
+                                            "i32": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "fixedChar": {
+                                              "length": 1,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "fixedChar": {
+                                              "length": 1,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "date": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "date": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "date": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "fixedChar": {
+                                              "length": 25,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "fixedChar": {
+                                              "length": 10,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "varchar": {
+                                              "length": 44,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }
+                                        ],
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_REQUIRED"
+                                      }
+                                    },
+                                    "local_files": {
+                                      "items": [
+                                        {
+                                          "uri_file": "file://FILENAME_PLACEHOLDER_2",
+                                          "parquet": {}
+                                        }
+                                      ]
+                                    }
+                                  }
+                                },
+                                "expression": {
+                                  "literal": {
+                                    "boolean": true,
+                                    "nullable": false,
+                                    "typeVariationReference": 0
+                                  }
+                                },
+                                "type": "JOIN_TYPE_INNER"
+                              }
+                            },
+                            "condition": {
+                              "scalarFunction": {
+                                "functionReference": 0,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_REQUIRED"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "cast": {
+                                        "type": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "input": {
+                                          "subquery": {
+                                            "inPredicate": {
+                                              "needles": [
+                                                {
+                                                  "selection": {
+                                                    "directReference": {
+                                                      "structField": {
+                                                        "field": 8
+                                                      }
+                                                    },
+                                                    "rootReference": {
+                                                    }
+                                                  }
+                                                }
+                                              ],
+                                              "haystack": {
+                                                "project": {
+                                                  "common": {
+                                                    "emit": {
+                                                      "outputMapping": [
+                                                        2
+                                                      ]
+                                                    }
+                                                  },
+                                                  "input": {
+                                                    "filter": {
+                                                      "common": {
+                                                        "direct": {
+                                                        }
+                                                      },
+                                                      "input": {
+                                                        "aggregate": {
+                                                          "common": {
+                                                            "direct": {
+                                                            }
+                                                          },
+                                                          "input": {
+                                                            "project": {
+                                                              "common": {
+                                                                "emit": {
+                                                                  "outputMapping": [
+                                                                    16,
+                                                                    17
+                                                                  ]
+                                                                }
+                                                              },
+                                                              "input": {
+                                                                "read": {
+                                                                  "common": {
+                                                                    "direct": {
+                                                                    }
+                                                                  },
+                                                                  "baseSchema": {
+                                                                    "names": [
+                                                                      "L_ORDERKEY",
+                                                                      "L_PARTKEY",
+                                                                      "L_SUPPKEY",
+                                                                      "L_LINENUMBER",
+                                                                      "L_QUANTITY",
+                                                                      "L_EXTENDEDPRICE",
+                                                                      "L_DISCOUNT",
+                                                                      "L_TAX",
+                                                                      "L_RETURNFLAG",
+                                                                      "L_LINESTATUS",
+                                                                      "L_SHIPDATE",
+                                                                      "L_COMMITDATE",
+                                                                      "L_RECEIPTDATE",
+                                                                      "L_SHIPINSTRUCT",
+                                                                      "L_SHIPMODE",
+                                                                      "L_COMMENT"
+                                                                    ],
+                                                                    "struct": {
+                                                                      "types": [
+                                                                        {
+                                                                          "i64": {
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "i64": {
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "i64": {
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "i32": {
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "decimal": {
+                                                                            "scale": 0,
+                                                                            "precision": 19,
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "decimal": {
+                                                                            "scale": 0,
+                                                                            "precision": 19,
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "decimal": {
+                                                                            "scale": 0,
+                                                                            "precision": 19,
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "decimal": {
+                                                                            "scale": 0,
+                                                                            "precision": 19,
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "fixedChar": {
+                                                                            "length": 1,
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "fixedChar": {
+                                                                            "length": 1,
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "date": {
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "date": {
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "date": {
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "fixedChar": {
+                                                                            "length": 25,
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "fixedChar": {
+                                                                            "length": 10,
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "varchar": {
+                                                                            "length": 44,
+                                                                            "typeVariationReference": 0,
+                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                          }
+                                                                        }
+                                                                      ],
+                                                                      "typeVariationReference": 0,
+                                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                                    }
+                                                                  },
+                                                                  "local_files": {
+                                                                    "items": [
+                                                                      {
+                                                                        "uri_file": "file://FILENAME_PLACEHOLDER_3",
+                                                                        "parquet": {}
+                                                                      }
+                                                                    ]
+                                                                  }
+                                                                }
+                                                              },
+                                                              "expressions": [
+                                                                {
+                                                                  "selection": {
+                                                                    "directReference": {
+                                                                      "structField": {
+                                                                        "field": 0
+                                                                      }
+                                                                    },
+                                                                    "rootReference": {
+                                                                    }
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "selection": {
+                                                                    "directReference": {
+                                                                      "structField": {
+                                                                        "field": 4
+                                                                      }
+                                                                    },
+                                                                    "rootReference": {
+                                                                    }
+                                                                  }
+                                                                }
+                                                              ]
+                                                            }
+                                                          },
+                                                          "groupings": [
+                                                            {
+                                                              "groupingExpressions": [
+                                                                {
+                                                                  "selection": {
+                                                                    "directReference": {
+                                                                      "structField": {
+                                                                        "field": 0
+                                                                      }
+                                                                    },
+                                                                    "rootReference": {
+                                                                    }
+                                                                  }
+                                                                }
+                                                              ]
+                                                            }
+                                                          ],
+                                                          "measures": [
+                                                            {
+                                                              "measure": {
+                                                                "functionReference": 1,
+                                                                "args": [],
+                                                                "sorts": [],
+                                                                "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                                                                "outputType": {
+                                                                  "decimal": {
+                                                                    "scale": 0,
+                                                                    "precision": 19,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                "invocation": "AGGREGATION_INVOCATION_ALL",
+                                                                "arguments": [
+                                                                  {
+                                                                    "value": {
+                                                                      "selection": {
+                                                                        "directReference": {
+                                                                          "structField": {
+                                                                            "field": 1
+                                                                          }
+                                                                        },
+                                                                        "rootReference": {
+                                                                        }
+                                                                      }
+                                                                    }
+                                                                  }
+                                                                ]
+                                                              }
+                                                            }
+                                                          ]
+                                                        }
+                                                      },
+                                                      "condition": {
+                                                        "scalarFunction": {
+                                                          "functionReference": 2,
+                                                          "args": [],
+                                                          "outputType": {
+                                                            "bool": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          },
+                                                          "arguments": [
+                                                            {
+                                                              "value": {
+                                                                "selection": {
+                                                                  "directReference": {
+                                                                    "structField": {
+                                                                      "field": 1
+                                                                    }
+                                                                  },
+                                                                  "rootReference": {
+                                                                  }
+                                                                }
+                                                              }
+                                                            },
+                                                            {
+                                                              "value": {
+                                                                "cast": {
+                                                                  "type": {
+                                                                    "decimal": {
+                                                                      "scale": 0,
+                                                                      "precision": 19,
+                                                                      "typeVariationReference": 0,
+                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                    }
+                                                                  },
+                                                                  "input": {
+                                                                    "literal": {
+                                                                      "i32": 300,
+                                                                      "nullable": false,
+                                                                      "typeVariationReference": 0
+                                                                    }
+                                                                  },
+                                                                  "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                }
+                                                              }
+                                                            }
+                                                          ]
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  "expressions": [
+                                                    {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 0
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  ]
+                                                }
+                                              }
+                                            }
+                                          }
+                                        },
+                                        "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 3,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 0
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 9
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 3,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 8
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 17
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          }
+                        },
+                        "expressions": [
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 1
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          },
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 0
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          },
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 8
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          },
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 12
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          },
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 11
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          },
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 21
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        ]
+                      }
+                    },
+                    "groupings": [
+                      {
+                        "groupingExpressions": [
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 0
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          },
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 1
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          },
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 2
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          },
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 3
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          },
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 4
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        ]
+                      }
+                    ],
+                    "measures": [
+                      {
+                        "measure": {
+                          "functionReference": 1,
+                          "args": [],
+                          "sorts": [],
+                          "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                          "outputType": {
+                            "decimal": {
+                              "scale": 0,
+                              "precision": 19,
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          },
+                          "invocation": "AGGREGATION_INVOCATION_ALL",
+                          "arguments": [
+                            {
+                              "value": {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 5
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              }
+                            }
+                          ]
+                        }
+                      }
+                    ]
+                  }
+                },
+                "sorts": [
+                  {
+                    "expr": {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 4
+                          }
+                        },
+                        "rootReference": {
+                        }
+                      }
+                    },
+                    "direction": "SORT_DIRECTION_DESC_NULLS_FIRST"
+                  },
+                  {
+                    "expr": {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 3
+                          }
+                        },
+                        "rootReference": {
+                        }
+                      }
+                    },
+                    "direction": "SORT_DIRECTION_ASC_NULLS_LAST"
+                  }
+                ]
+              }
+            },
+            "offset": "0",
+            "count": "100"
+          }
+        },
+        "names": [
+          "C_NAME",
+          "C_CUSTKEY",
+          "O_ORDERKEY",
+          "O_ORDERDATE",
+          "O_TOTALPRICE",
+          "EXPR$5"
+        ]
+      }
+    }
+  ],
+  "expectedTypeUrls": []
+}
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_19.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_19.json
new file mode 100644
index 0000000000000..356111a480f3b
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_19.json
@@ -0,0 +1,2386 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 3,
+      "uri": "/functions_arithmetic.yaml"
+    },
+    {
+      "extensionUriAnchor": 1,
+      "uri": "/functions_boolean.yaml"
+    },
+    {
+      "extensionUriAnchor": 4,
+      "uri": "/functions_arithmetic_decimal.yaml"
+    },
+    {
+      "extensionUriAnchor": 2,
+      "uri": "/functions_comparison.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "or:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 1,
+        "name": "and:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 2,
+        "name": "equal:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 3,
+        "name": "gte:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 4,
+        "name": "lte:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 5,
+        "name": "add:opt_i32_i32"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 6,
+        "name": "multiply:opt_decimal_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 7,
+        "name": "subtract:opt_decimal_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 8,
+        "name": "sum:opt_decimal"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "aggregate": {
+            "common": {
+              "direct": {
+              }
+            },
+            "input": {
+              "project": {
+                "common": {
+                  "emit": {
+                    "outputMapping": [
+                      25
+                    ]
+                  }
+                },
+                "input": {
+                  "filter": {
+                    "common": {
+                      "direct": {
+                      }
+                    },
+                    "input": {
+                      "join": {
+                        "common": {
+                          "direct": {
+                          }
+                        },
+                        "left": {
+                          "read": {
+                            "common": {
+                              "direct": {
+                              }
+                            },
+                            "baseSchema": {
+                              "names": [
+                                "L_ORDERKEY",
+                                "L_PARTKEY",
+                                "L_SUPPKEY",
+                                "L_LINENUMBER",
+                                "L_QUANTITY",
+                                "L_EXTENDEDPRICE",
+                                "L_DISCOUNT",
+                                "L_TAX",
+                                "L_RETURNFLAG",
+                                "L_LINESTATUS",
+                                "L_SHIPDATE",
+                                "L_COMMITDATE",
+                                "L_RECEIPTDATE",
+                                "L_SHIPINSTRUCT",
+                                "L_SHIPMODE",
+                                "L_COMMENT"
+                              ],
+                              "struct": {
+                                "types": [
+                                  {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  {
+                                    "i32": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 1,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 1,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "date": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "date": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "date": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 25,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 10,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "varchar": {
+                                      "length": 44,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  }
+                                ],
+                                "typeVariationReference": 0,
+                                "nullability": "NULLABILITY_REQUIRED"
+                              }
+                            },
+                            "local_files": {
+                              "items": [
+                                {
+                                  "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                                  "parquet": {}
+                                }
+                              ]
+                            }
+                          }
+                        },
+                        "right": {
+                          "read": {
+                            "common": {
+                              "direct": {
+                              }
+                            },
+                            "baseSchema": {
+                              "names": [
+                                "P_PARTKEY",
+                                "P_NAME",
+                                "P_MFGR",
+                                "P_BRAND",
+                                "P_TYPE",
+                                "P_SIZE",
+                                "P_CONTAINER",
+                                "P_RETAILPRICE",
+                                "P_COMMENT"
+                              ],
+                              "struct": {
+                                "types": [
+                                  {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  {
+                                    "varchar": {
+                                      "length": 55,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 25,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 10,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "varchar": {
+                                      "length": 25,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "i32": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 10,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "varchar": {
+                                      "length": 23,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  }
+                                ],
+                                "typeVariationReference": 0,
+                                "nullability": "NULLABILITY_REQUIRED"
+                              }
+                            },
+                            "local_files": {
+                              "items": [
+                                {
+                                  "uri_file": "file://FILENAME_PLACEHOLDER_1",
+                                  "parquet": {}
+                                }
+                              ]
+                            }
+                          }
+                        },
+                        "expression": {
+                          "literal": {
+                            "boolean": true,
+                            "nullable": false,
+                            "typeVariationReference": 0
+                          }
+                        },
+                        "type": "JOIN_TYPE_INNER"
+                      }
+                    },
+                    "condition": {
+                      "scalarFunction": {
+                        "functionReference": 0,
+                        "args": [],
+                        "outputType": {
+                          "bool": {
+                            "typeVariationReference": 0,
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "arguments": [
+                          {
+                            "value": {
+                              "scalarFunction": {
+                                "functionReference": 1,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 2,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 16
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 1
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 2,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 19
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "fixedChar": {
+                                                    "length": 10,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "literal": {
+                                                    "fixedChar": "Brand#12",
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 0,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "SM CASE",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "SM BOX",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "SM PACK",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "SM PKG",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 3,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 4
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "decimal": {
+                                                    "scale": 0,
+                                                    "precision": 19,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "literal": {
+                                                    "i32": 1,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 4,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 4
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "decimal": {
+                                                    "scale": 0,
+                                                    "precision": 19,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 5,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "i32": {
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                      }
+                                                    },
+                                                    "arguments": [
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 1,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 10,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      }
+                                                    ]
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 3,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 21
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "literal": {
+                                                "i32": 1,
+                                                "nullable": false,
+                                                "typeVariationReference": 0
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 4,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 21
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "literal": {
+                                                "i32": 5,
+                                                "nullable": false,
+                                                "typeVariationReference": 0
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 0,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 14
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "AIR",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 14
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "AIR REG",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 2,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 13
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "fixedChar": {
+                                                    "length": 25,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "literal": {
+                                                    "fixedChar": "DELIVER IN PERSON",
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          },
+                          {
+                            "value": {
+                              "scalarFunction": {
+                                "functionReference": 1,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 2,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 16
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 1
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 2,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 19
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "fixedChar": {
+                                                    "length": 10,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "literal": {
+                                                    "fixedChar": "Brand#23",
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 0,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "MED BAG",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "MED BOX",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "MED PKG",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "MED PACK",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 3,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 4
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "decimal": {
+                                                    "scale": 0,
+                                                    "precision": 19,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "literal": {
+                                                    "i32": 10,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 4,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 4
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "decimal": {
+                                                    "scale": 0,
+                                                    "precision": 19,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 5,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "i32": {
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                      }
+                                                    },
+                                                    "arguments": [
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 10,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 10,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      }
+                                                    ]
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 3,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 21
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "literal": {
+                                                "i32": 1,
+                                                "nullable": false,
+                                                "typeVariationReference": 0
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 4,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 21
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "literal": {
+                                                "i32": 10,
+                                                "nullable": false,
+                                                "typeVariationReference": 0
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 0,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 14
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "AIR",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 14
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "AIR REG",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 2,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 13
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "fixedChar": {
+                                                    "length": 25,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "literal": {
+                                                    "fixedChar": "DELIVER IN PERSON",
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          },
+                          {
+                            "value": {
+                              "scalarFunction": {
+                                "functionReference": 1,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 2,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 16
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 1
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 2,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 19
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "fixedChar": {
+                                                    "length": 10,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "literal": {
+                                                    "fixedChar": "Brand#34",
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 0,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "LG CASE",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "LG BOX",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "LG PACK",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 22
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "LG PKG",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 3,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 4
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "decimal": {
+                                                    "scale": 0,
+                                                    "precision": 19,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "literal": {
+                                                    "i32": 20,
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 4,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 4
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "decimal": {
+                                                    "scale": 0,
+                                                    "precision": 19,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 5,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "i32": {
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                      }
+                                                    },
+                                                    "arguments": [
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 20,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 10,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      }
+                                                    ]
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 3,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 21
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "literal": {
+                                                "i32": 1,
+                                                "nullable": false,
+                                                "typeVariationReference": 0
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 4,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 21
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "literal": {
+                                                "i32": 15,
+                                                "nullable": false,
+                                                "typeVariationReference": 0
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 0,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 14
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "AIR",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "scalarFunction": {
+                                                "functionReference": 2,
+                                                "args": [],
+                                                "outputType": {
+                                                  "bool": {
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                  }
+                                                },
+                                                "arguments": [
+                                                  {
+                                                    "value": {
+                                                      "selection": {
+                                                        "directReference": {
+                                                          "structField": {
+                                                            "field": 14
+                                                          }
+                                                        },
+                                                        "rootReference": {
+                                                        }
+                                                      }
+                                                    }
+                                                  },
+                                                  {
+                                                    "value": {
+                                                      "literal": {
+                                                        "fixedChar": "AIR REG",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    }
+                                                  }
+                                                ]
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 2,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 13
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "fixedChar": {
+                                                    "length": 25,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "literal": {
+                                                    "fixedChar": "DELIVER IN PERSON",
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          }
+                        ]
+                      }
+                    }
+                  }
+                },
+                "expressions": [
+                  {
+                    "scalarFunction": {
+                      "functionReference": 6,
+                      "args": [],
+                      "outputType": {
+                        "decimal": {
+                          "scale": 0,
+                          "precision": 19,
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      "arguments": [
+                        {
+                          "value": {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 5
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        },
+                        {
+                          "value": {
+                            "scalarFunction": {
+                              "functionReference": 7,
+                              "args": [],
+                              "outputType": {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              "arguments": [
+                                {
+                                  "value": {
+                                    "cast": {
+                                      "type": {
+                                        "decimal": {
+                                          "scale": 0,
+                                          "precision": 19,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      "input": {
+                                        "literal": {
+                                          "i32": 1,
+                                          "nullable": false,
+                                          "typeVariationReference": 0
+                                        }
+                                      },
+                                      "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                    }
+                                  }
+                                },
+                                {
+                                  "value": {
+                                    "selection": {
+                                      "directReference": {
+                                        "structField": {
+                                          "field": 6
+                                        }
+                                      },
+                                      "rootReference": {
+                                      }
+                                    }
+                                  }
+                                }
+                              ]
+                            }
+                          }
+                        }
+                      ]
+                    }
+                  }
+                ]
+              }
+            },
+            "groupings": [
+              {
+                "groupingExpressions": []
+              }
+            ],
+            "measures": [
+              {
+                "measure": {
+                  "functionReference": 8,
+                  "args": [],
+                  "sorts": [],
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                  "outputType": {
+                    "decimal": {
+                      "scale": 0,
+                      "precision": 19,
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  },
+                  "invocation": "AGGREGATION_INVOCATION_ALL",
+                  "arguments": [
+                    {
+                      "value": {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 0
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      }
+                    }
+                  ]
+                }
+              }
+            ]
+          }
+        },
+        "names": [
+          "REVENUE"
+        ]
+      }
+    }
+  ],
+  "expectedTypeUrls": []
+}
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_20.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_20.json
new file mode 100644
index 0000000000000..54a71fa553f89
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_20.json
@@ -0,0 +1,1273 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 1,
+      "uri": "/functions_boolean.yaml"
+    },
+    {
+      "extensionUriAnchor": 2,
+      "uri": "/functions_string.yaml"
+    },
+    {
+      "extensionUriAnchor": 5,
+      "uri": "/functions_arithmetic_decimal.yaml"
+    },
+    {
+      "extensionUriAnchor": 4,
+      "uri": "/functions_datetime.yaml"
+    },
+    {
+      "extensionUriAnchor": 3,
+      "uri": "/functions_comparison.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "and:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 1,
+        "name": "like:vchar_vchar"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 2,
+        "name": "gt:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 3,
+        "name": "equal:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 4,
+        "name": "gte:date_date"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 5,
+        "name": "lt:date_date"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 5,
+        "functionAnchor": 6,
+        "name": "sum:opt_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 5,
+        "functionAnchor": 7,
+        "name": "multiply:opt_decimal_decimal"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "sort": {
+            "common": {
+              "direct": {}
+            },
+            "input": {
+              "project": {
+                "common": {
+                  "emit": {
+                    "outputMapping": [
+                      11,
+                      12
+                    ]
+                  }
+                },
+                "input": {
+                  "filter": {
+                    "common": {
+                      "direct": {}
+                    },
+                    "input": {
+                      "join": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "left": {
+                          "read": {
+                            "common": {
+                              "direct": {}
+                            },
+                            "baseSchema": {
+                              "names": [
+                                "S_SUPPKEY",
+                                "S_NAME",
+                                "S_ADDRESS",
+                                "S_NATIONKEY",
+                                "S_PHONE",
+                                "S_ACCTBAL",
+                                "S_COMMENT"
+                              ],
+                              "struct": {
+                                "types": [
+                                  {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 25,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "varchar": {
+                                      "length": 40,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 15,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "varchar": {
+                                      "length": 101,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  }
+                                ],
+                                "typeVariationReference": 0,
+                                "nullability": "NULLABILITY_REQUIRED"
+                              }
+                            },
+                            "local_files": {
+                              "items": [
+                                {
+                                  "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                                  "parquet": {}
+                                }
+                              ]
+                            }
+                          }
+                        },
+                        "right": {
+                          "read": {
+                            "common": {
+                              "direct": {}
+                            },
+                            "baseSchema": {
+                              "names": [
+                                "N_NATIONKEY",
+                                "N_NAME",
+                                "N_REGIONKEY",
+                                "N_COMMENT"
+                              ],
+                              "struct": {
+                                "types": [
+                                  {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 25,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  {
+                                    "varchar": {
+                                      "length": 152,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  }
+                                ],
+                                "typeVariationReference": 0,
+                                "nullability": "NULLABILITY_REQUIRED"
+                              }
+                            },
+                            "local_files": {
+                              "items": [
+                                {
+                                  "uri_file": "file://FILENAME_PLACEHOLDER_1",
+                                  "parquet": {}
+                                }
+                              ]
+                            }
+                          }
+                        },
+                        "expression": {
+                          "literal": {
+                            "boolean": true,
+                            "nullable": false,
+                            "typeVariationReference": 0
+                          }
+                        },
+                        "type": "JOIN_TYPE_INNER"
+                      }
+                    },
+                    "condition": {
+                      "scalarFunction": {
+                        "functionReference": 0,
+                        "args": [],
+                        "outputType": {
+                          "bool": {
+                            "typeVariationReference": 0,
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "arguments": [
+                          {
+                            "value": {
+                              "cast": {
+                                "type": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "input": {
+                                  "subquery": {
+                                    "inPredicate": {
+                                      "needles": [
+                                        {
+                                          "selection": {
+                                            "directReference": {
+                                              "structField": {
+                                                "field": 0
+                                              }
+                                            },
+                                            "rootReference": {}
+                                          }
+                                        }
+                                      ],
+                                      "haystack": {
+                                        "project": {
+                                          "common": {
+                                            "emit": {
+                                              "outputMapping": [
+                                                5
+                                              ]
+                                            }
+                                          },
+                                          "input": {
+                                            "filter": {
+                                              "common": {
+                                                "direct": {}
+                                              },
+                                              "input": {
+                                                "read": {
+                                                  "common": {
+                                                    "direct": {}
+                                                  },
+                                                  "baseSchema": {
+                                                    "names": [
+                                                      "PS_PARTKEY",
+                                                      "PS_SUPPKEY",
+                                                      "PS_AVAILQTY",
+                                                      "PS_SUPPLYCOST",
+                                                      "PS_COMMENT"
+                                                    ],
+                                                    "struct": {
+                                                      "types": [
+                                                        {
+                                                          "i64": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                          }
+                                                        },
+                                                        {
+                                                          "i64": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                          }
+                                                        },
+                                                        {
+                                                          "i32": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "decimal": {
+                                                            "scale": 0,
+                                                            "precision": 19,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "varchar": {
+                                                            "length": 199,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        }
+                                                      ],
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  "local_files": {
+                                                    "items": [
+                                                      {
+                                                        "uri_file": "file://FILENAME_PLACEHOLDER_2",
+                                                        "parquet": {}
+                                                      }
+                                                    ]
+                                                  }
+                                                }
+                                              },
+                                              "condition": {
+                                                "scalarFunction": {
+                                                  "functionReference": 0,
+                                                  "args": [],
+                                                  "outputType": {
+                                                    "bool": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  "arguments": [
+                                                    {
+                                                      "value": {
+                                                        "cast": {
+                                                          "type": {
+                                                            "bool": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          },
+                                                          "input": {
+                                                            "subquery": {
+                                                              "inPredicate": {
+                                                                "needles": [
+                                                                  {
+                                                                    "selection": {
+                                                                      "directReference": {
+                                                                        "structField": {
+                                                                          "field": 0
+                                                                        }
+                                                                      },
+                                                                      "rootReference": {}
+                                                                    }
+                                                                  }
+                                                                ],
+                                                                "haystack": {
+                                                                  "project": {
+                                                                    "common": {
+                                                                      "emit": {
+                                                                        "outputMapping": [
+                                                                          9
+                                                                        ]
+                                                                      }
+                                                                    },
+                                                                    "input": {
+                                                                      "filter": {
+                                                                        "common": {
+                                                                          "direct": {}
+                                                                        },
+                                                                        "input": {
+                                                                          "read": {
+                                                                            "common": {
+                                                                              "direct": {}
+                                                                            },
+                                                                            "baseSchema": {
+                                                                              "names": [
+                                                                                "P_PARTKEY",
+                                                                                "P_NAME",
+                                                                                "P_MFGR",
+                                                                                "P_BRAND",
+                                                                                "P_TYPE",
+                                                                                "P_SIZE",
+                                                                                "P_CONTAINER",
+                                                                                "P_RETAILPRICE",
+                                                                                "P_COMMENT"
+                                                                              ],
+                                                                              "struct": {
+                                                                                "types": [
+                                                                                  {
+                                                                                    "i64": {
+                                                                                      "typeVariationReference": 0,
+                                                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                                                    }
+                                                                                  },
+                                                                                  {
+                                                                                    "varchar": {
+                                                                                      "length": 55,
+                                                                                      "typeVariationReference": 0,
+                                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                                    }
+                                                                                  },
+                                                                                  {
+                                                                                    "fixedChar": {
+                                                                                      "length": 25,
+                                                                                      "typeVariationReference": 0,
+                                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                                    }
+                                                                                  },
+                                                                                  {
+                                                                                    "fixedChar": {
+                                                                                      "length": 10,
+                                                                                      "typeVariationReference": 0,
+                                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                                    }
+                                                                                  },
+                                                                                  {
+                                                                                    "varchar": {
+                                                                                      "length": 25,
+                                                                                      "typeVariationReference": 0,
+                                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                                    }
+                                                                                  },
+                                                                                  {
+                                                                                    "i32": {
+                                                                                      "typeVariationReference": 0,
+                                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                                    }
+                                                                                  },
+                                                                                  {
+                                                                                    "fixedChar": {
+                                                                                      "length": 10,
+                                                                                      "typeVariationReference": 0,
+                                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                                    }
+                                                                                  },
+                                                                                  {
+                                                                                    "decimal": {
+                                                                                      "scale": 0,
+                                                                                      "precision": 19,
+                                                                                      "typeVariationReference": 0,
+                                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                                    }
+                                                                                  },
+                                                                                  {
+                                                                                    "varchar": {
+                                                                                      "length": 23,
+                                                                                      "typeVariationReference": 0,
+                                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                                    }
+                                                                                  }
+                                                                                ],
+                                                                                "typeVariationReference": 0,
+                                                                                "nullability": "NULLABILITY_REQUIRED"
+                                                                              }
+                                                                            },
+                                                                            "local_files": {
+                                                                              "items": [
+                                                                                {
+                                                                                  "uri_file": "file://FILENAME_PLACEHOLDER_3",
+                                                                                  "parquet": {}
+                                                                                }
+                                                                              ]
+                                                                            }
+                                                                          }
+                                                                        },
+                                                                        "condition": {
+                                                                          "scalarFunction": {
+                                                                            "functionReference": 1,
+                                                                            "args": [],
+                                                                            "outputType": {
+                                                                              "bool": {
+                                                                                "typeVariationReference": 0,
+                                                                                "nullability": "NULLABILITY_NULLABLE"
+                                                                              }
+                                                                            },
+                                                                            "arguments": [
+                                                                              {
+                                                                                "value": {
+                                                                                  "selection": {
+                                                                                    "directReference": {
+                                                                                      "structField": {
+                                                                                        "field": 1
+                                                                                      }
+                                                                                    },
+                                                                                    "rootReference": {}
+                                                                                  }
+                                                                                }
+                                                                              },
+                                                                              {
+                                                                                "value": {
+                                                                                  "cast": {
+                                                                                    "type": {
+                                                                                      "varchar": {
+                                                                                        "length": 55,
+                                                                                        "typeVariationReference": 0,
+                                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                                      }
+                                                                                    },
+                                                                                    "input": {
+                                                                                      "literal": {
+                                                                                        "fixedChar": "forest%",
+                                                                                        "nullable": false,
+                                                                                        "typeVariationReference": 0
+                                                                                      }
+                                                                                    },
+                                                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                                  }
+                                                                                }
+                                                                              }
+                                                                            ]
+                                                                          }
+                                                                        }
+                                                                      }
+                                                                    },
+                                                                    "expressions": [
+                                                                      {
+                                                                        "selection": {
+                                                                          "directReference": {
+                                                                            "structField": {
+                                                                              "field": 0
+                                                                            }
+                                                                          },
+                                                                          "rootReference": {}
+                                                                        }
+                                                                      }
+                                                                    ]
+                                                                  }
+                                                                }
+                                                              }
+                                                            }
+                                                          },
+                                                          "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                        }
+                                                      }
+                                                    },
+                                                    {
+                                                      "value": {
+                                                        "scalarFunction": {
+                                                          "functionReference": 2,
+                                                          "args": [],
+                                                          "outputType": {
+                                                            "bool": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          },
+                                                          "arguments": [
+                                                            {
+                                                              "value": {
+                                                                "cast": {
+                                                                  "type": {
+                                                                    "decimal": {
+                                                                      "scale": 1,
+                                                                      "precision": 19,
+                                                                      "typeVariationReference": 0,
+                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                    }
+                                                                  },
+                                                                  "input": {
+                                                                    "selection": {
+                                                                      "directReference": {
+                                                                        "structField": {
+                                                                          "field": 2
+                                                                        }
+                                                                      },
+                                                                      "rootReference": {}
+                                                                    }
+                                                                  },
+                                                                  "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                }
+                                                              }
+                                                            },
+                                                            {
+                                                              "value": {
+                                                                "subquery": {
+                                                                  "scalar": {
+                                                                    "input": {
+                                                                      "project": {
+                                                                        "common": {
+                                                                          "emit": {
+                                                                            "outputMapping": [
+                                                                              1
+                                                                            ]
+                                                                          }
+                                                                        },
+                                                                        "input": {
+                                                                          "aggregate": {
+                                                                            "common": {
+                                                                              "direct": {}
+                                                                            },
+                                                                            "input": {
+                                                                              "project": {
+                                                                                "common": {
+                                                                                  "emit": {
+                                                                                    "outputMapping": [
+                                                                                      16
+                                                                                    ]
+                                                                                  }
+                                                                                },
+                                                                                "input": {
+                                                                                  "filter": {
+                                                                                    "common": {
+                                                                                      "direct": {}
+                                                                                    },
+                                                                                    "input": {
+                                                                                      "read": {
+                                                                                        "common": {
+                                                                                          "direct": {}
+                                                                                        },
+                                                                                        "baseSchema": {
+                                                                                          "names": [
+                                                                                            "L_ORDERKEY",
+                                                                                            "L_PARTKEY",
+                                                                                            "L_SUPPKEY",
+                                                                                            "L_LINENUMBER",
+                                                                                            "L_QUANTITY",
+                                                                                            "L_EXTENDEDPRICE",
+                                                                                            "L_DISCOUNT",
+                                                                                            "L_TAX",
+                                                                                            "L_RETURNFLAG",
+                                                                                            "L_LINESTATUS",
+                                                                                            "L_SHIPDATE",
+                                                                                            "L_COMMITDATE",
+                                                                                            "L_RECEIPTDATE",
+                                                                                            "L_SHIPINSTRUCT",
+                                                                                            "L_SHIPMODE",
+                                                                                            "L_COMMENT"
+                                                                                          ],
+                                                                                          "struct": {
+                                                                                            "types": [
+                                                                                              {
+                                                                                                "i64": {
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "i64": {
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "i64": {
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "i32": {
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "decimal": {
+                                                                                                  "scale": 0,
+                                                                                                  "precision": 19,
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "decimal": {
+                                                                                                  "scale": 0,
+                                                                                                  "precision": 19,
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "decimal": {
+                                                                                                  "scale": 0,
+                                                                                                  "precision": 19,
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "decimal": {
+                                                                                                  "scale": 0,
+                                                                                                  "precision": 19,
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "fixedChar": {
+                                                                                                  "length": 1,
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "fixedChar": {
+                                                                                                  "length": 1,
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "date": {
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "date": {
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "date": {
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "fixedChar": {
+                                                                                                  "length": 25,
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "fixedChar": {
+                                                                                                  "length": 10,
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              },
+                                                                                              {
+                                                                                                "varchar": {
+                                                                                                  "length": 44,
+                                                                                                  "typeVariationReference": 0,
+                                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                                }
+                                                                                              }
+                                                                                            ],
+                                                                                            "typeVariationReference": 0,
+                                                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                                                          }
+                                                                                        },
+                                                                                        "local_files": {
+                                                                                          "items": [
+                                                                                            {
+                                                                                              "uri_file": "file://FILENAME_PLACEHOLDER_4",
+                                                                                              "parquet": {}
+                                                                                            }
+                                                                                          ]
+                                                                                        }
+                                                                                      }
+                                                                                    },
+                                                                                    "condition": {
+                                                                                      "scalarFunction": {
+                                                                                        "functionReference": 0,
+                                                                                        "args": [],
+                                                                                        "outputType": {
+                                                                                          "bool": {
+                                                                                            "typeVariationReference": 0,
+                                                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                                                          }
+                                                                                        },
+                                                                                        "arguments": [
+                                                                                          {
+                                                                                            "value": {
+                                                                                              "scalarFunction": {
+                                                                                                "functionReference": 3,
+                                                                                                "args": [],
+                                                                                                "outputType": {
+                                                                                                  "bool": {
+                                                                                                    "typeVariationReference": 0,
+                                                                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                                                                  }
+                                                                                                },
+                                                                                                "arguments": [
+                                                                                                  {
+                                                                                                    "value": {
+                                                                                                      "selection": {
+                                                                                                        "directReference": {
+                                                                                                          "structField": {
+                                                                                                            "field": 1
+                                                                                                          }
+                                                                                                        },
+                                                                                                        "rootReference": {}
+                                                                                                      }
+                                                                                                    }
+                                                                                                  },
+                                                                                                  {
+                                                                                                    "value": {
+                                                                                                      "selection": {
+                                                                                                        "directReference": {
+                                                                                                          "structField": {
+                                                                                                            "field": 0
+                                                                                                          }
+                                                                                                        },
+                                                                                                        "outerReference": {
+                                                                                                          "stepsOut": 1
+                                                                                                        }
+                                                                                                      }
+                                                                                                    }
+                                                                                                  }
+                                                                                                ]
+                                                                                              }
+                                                                                            }
+                                                                                          },
+                                                                                          {
+                                                                                            "value": {
+                                                                                              "scalarFunction": {
+                                                                                                "functionReference": 3,
+                                                                                                "args": [],
+                                                                                                "outputType": {
+                                                                                                  "bool": {
+                                                                                                    "typeVariationReference": 0,
+                                                                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                                                                  }
+                                                                                                },
+                                                                                                "arguments": [
+                                                                                                  {
+                                                                                                    "value": {
+                                                                                                      "selection": {
+                                                                                                        "directReference": {
+                                                                                                          "structField": {
+                                                                                                            "field": 2
+                                                                                                          }
+                                                                                                        },
+                                                                                                        "rootReference": {}
+                                                                                                      }
+                                                                                                    }
+                                                                                                  },
+                                                                                                  {
+                                                                                                    "value": {
+                                                                                                      "selection": {
+                                                                                                        "directReference": {
+                                                                                                          "structField": {
+                                                                                                            "field": 1
+                                                                                                          }
+                                                                                                        },
+                                                                                                        "outerReference": {
+                                                                                                          "stepsOut": 1
+                                                                                                        }
+                                                                                                      }
+                                                                                                    }
+                                                                                                  }
+                                                                                                ]
+                                                                                              }
+                                                                                            }
+                                                                                          },
+                                                                                          {
+                                                                                            "value": {
+                                                                                              "scalarFunction": {
+                                                                                                "functionReference": 4,
+                                                                                                "args": [],
+                                                                                                "outputType": {
+                                                                                                  "bool": {
+                                                                                                    "typeVariationReference": 0,
+                                                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                                                  }
+                                                                                                },
+                                                                                                "arguments": [
+                                                                                                  {
+                                                                                                    "value": {
+                                                                                                      "selection": {
+                                                                                                        "directReference": {
+                                                                                                          "structField": {
+                                                                                                            "field": 10
+                                                                                                          }
+                                                                                                        },
+                                                                                                        "rootReference": {}
+                                                                                                      }
+                                                                                                    }
+                                                                                                  },
+                                                                                                  {
+                                                                                                    "value": {
+                                                                                                      "cast": {
+                                                                                                        "type": {
+                                                                                                          "date": {
+                                                                                                            "typeVariationReference": 0,
+                                                                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                                                                          }
+                                                                                                        },
+                                                                                                        "input": {
+                                                                                                          "literal": {
+                                                                                                            "fixedChar": "1994-01-01",
+                                                                                                            "nullable": false,
+                                                                                                            "typeVariationReference": 0
+                                                                                                          }
+                                                                                                        },
+                                                                                                        "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                                                      }
+                                                                                                    }
+                                                                                                  }
+                                                                                                ]
+                                                                                              }
+                                                                                            }
+                                                                                          },
+                                                                                          {
+                                                                                            "value": {
+                                                                                              "scalarFunction": {
+                                                                                                "functionReference": 5,
+                                                                                                "args": [],
+                                                                                                "outputType": {
+                                                                                                  "bool": {
+                                                                                                    "typeVariationReference": 0,
+                                                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                                                  }
+                                                                                                },
+                                                                                                "arguments": [
+                                                                                                  {
+                                                                                                    "value": {
+                                                                                                      "selection": {
+                                                                                                        "directReference": {
+                                                                                                          "structField": {
+                                                                                                            "field": 10
+                                                                                                          }
+                                                                                                        },
+                                                                                                        "rootReference": {}
+                                                                                                      }
+                                                                                                    }
+                                                                                                  },
+                                                                                                  {
+                                                                                                    "value": {
+                                                                                                      "cast": {
+                                                                                                        "type": {
+                                                                                                          "date": {
+                                                                                                            "typeVariationReference": 0,
+                                                                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                                                                          }
+                                                                                                        },
+                                                                                                        "input": {
+                                                                                                          "literal": {
+                                                                                                            "fixedChar": "1995-01-01",
+                                                                                                            "nullable": false,
+                                                                                                            "typeVariationReference": 0
+                                                                                                          }
+                                                                                                        },
+                                                                                                        "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                                                      }
+                                                                                                    }
+                                                                                                  }
+                                                                                                ]
+                                                                                              }
+                                                                                            }
+                                                                                          }
+                                                                                        ]
+                                                                                      }
+                                                                                    }
+                                                                                  }
+                                                                                },
+                                                                                "expressions": [
+                                                                                  {
+                                                                                    "selection": {
+                                                                                      "directReference": {
+                                                                                        "structField": {
+                                                                                          "field": 4
+                                                                                        }
+                                                                                      },
+                                                                                      "rootReference": {}
+                                                                                    }
+                                                                                  }
+                                                                                ]
+                                                                              }
+                                                                            },
+                                                                            "groupings": [
+                                                                              {
+                                                                                "groupingExpressions": []
+                                                                              }
+                                                                            ],
+                                                                            "measures": [
+                                                                              {
+                                                                                "measure": {
+                                                                                  "functionReference": 6,
+                                                                                  "args": [],
+                                                                                  "sorts": [],
+                                                                                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                                                                                  "outputType": {
+                                                                                    "decimal": {
+                                                                                      "scale": 0,
+                                                                                      "precision": 19,
+                                                                                      "typeVariationReference": 0,
+                                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                                    }
+                                                                                  },
+                                                                                  "invocation": "AGGREGATION_INVOCATION_ALL",
+                                                                                  "arguments": [
+                                                                                    {
+                                                                                      "value": {
+                                                                                        "selection": {
+                                                                                          "directReference": {
+                                                                                            "structField": {
+                                                                                              "field": 0
+                                                                                            }
+                                                                                          },
+                                                                                          "rootReference": {}
+                                                                                        }
+                                                                                      }
+                                                                                    }
+                                                                                  ]
+                                                                                }
+                                                                              }
+                                                                            ]
+                                                                          }
+                                                                        },
+                                                                        "expressions": [
+                                                                          {
+                                                                            "scalarFunction": {
+                                                                              "functionReference": 7,
+                                                                              "args": [],
+                                                                              "outputType": {
+                                                                                "decimal": {
+                                                                                  "scale": 1,
+                                                                                  "precision": 19,
+                                                                                  "typeVariationReference": 0,
+                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                }
+                                                                              },
+                                                                              "arguments": [
+                                                                                {
+                                                                                  "value": {
+                                                                                    "literal": {
+                                                                                      "decimal": {
+                                                                                        "value": "BQAAAAAAAAAAAAAAAAAAAA==",
+                                                                                        "precision": 2,
+                                                                                        "scale": 1
+                                                                                      },
+                                                                                      "nullable": false,
+                                                                                      "typeVariationReference": 0
+                                                                                    }
+                                                                                  }
+                                                                                },
+                                                                                {
+                                                                                  "value": {
+                                                                                    "selection": {
+                                                                                      "directReference": {
+                                                                                        "structField": {
+                                                                                          "field": 0
+                                                                                        }
+                                                                                      },
+                                                                                      "rootReference": {}
+                                                                                    }
+                                                                                  }
+                                                                                }
+                                                                              ]
+                                                                            }
+                                                                          }
+                                                                        ]
+                                                                      }
+                                                                    }
+                                                                  }
+                                                                }
+                                                              }
+                                                            }
+                                                          ]
+                                                        }
+                                                      }
+                                                    }
+                                                  ]
+                                                }
+                                              }
+                                            }
+                                          },
+                                          "expressions": [
+                                            {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 1
+                                                  }
+                                                },
+                                                "rootReference": {}
+                                              }
+                                            }
+                                          ]
+                                        }
+                                      }
+                                    }
+                                  }
+                                },
+                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                              }
+                            }
+                          },
+                          {
+                            "value": {
+                              "scalarFunction": {
+                                "functionReference": 3,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_REQUIRED"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 3
+                                          }
+                                        },
+                                        "rootReference": {}
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 7
+                                          }
+                                        },
+                                        "rootReference": {}
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          },
+                          {
+                            "value": {
+                              "scalarFunction": {
+                                "functionReference": 3,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 8
+                                          }
+                                        },
+                                        "rootReference": {}
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "cast": {
+                                        "type": {
+                                          "fixedChar": {
+                                            "length": 25,
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "input": {
+                                          "literal": {
+                                            "fixedChar": "CANADA",
+                                            "nullable": false,
+                                            "typeVariationReference": 0
+                                          }
+                                        },
+                                        "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          }
+                        ]
+                      }
+                    }
+                  }
+                },
+                "expressions": [
+                  {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 1
+                        }
+                      },
+                      "rootReference": {}
+                    }
+                  },
+                  {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 2
+                        }
+                      },
+                      "rootReference": {}
+                    }
+                  }
+                ]
+              }
+            },
+            "sorts": [
+              {
+                "expr": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 0
+                      }
+                    },
+                    "rootReference": {}
+                  }
+                },
+                "direction": "SORT_DIRECTION_ASC_NULLS_LAST"
+              }
+            ]
+          }
+        },
+        "names": [
+          "S_NAME",
+          "S_ADDRESS"
+        ]
+      }
+    }
+  ],
+  "expectedTypeUrls": []
+}
\ No newline at end of file
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_21.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_21.json
new file mode 100644
index 0000000000000..d35c1517228bc
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_21.json
@@ -0,0 +1,1493 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 4,
+      "uri": "/functions_aggregate_generic.yaml"
+    },
+    {
+      "extensionUriAnchor": 1,
+      "uri": "/functions_boolean.yaml"
+    },
+    {
+      "extensionUriAnchor": 3,
+      "uri": "/functions_datetime.yaml"
+    },
+    {
+      "extensionUriAnchor": 2,
+      "uri": "/functions_comparison.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "and:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 1,
+        "name": "equal:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 2,
+        "name": "gt:date_date"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 3,
+        "name": "not_equal:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 4,
+        "name": "not:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 5,
+        "name": "count:opt"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "fetch": {
+            "common": {
+              "direct": {
+              }
+            },
+            "input": {
+              "sort": {
+                "common": {
+                  "direct": {
+                  }
+                },
+                "input": {
+                  "aggregate": {
+                    "common": {
+                      "direct": {
+                      }
+                    },
+                    "input": {
+                      "project": {
+                        "common": {
+                          "emit": {
+                            "outputMapping": [
+                              36
+                            ]
+                          }
+                        },
+                        "input": {
+                          "filter": {
+                            "common": {
+                              "direct": {
+                              }
+                            },
+                            "input": {
+                              "join": {
+                                "common": {
+                                  "direct": {
+                                  }
+                                },
+                                "left": {
+                                  "join": {
+                                    "common": {
+                                      "direct": {
+                                      }
+                                    },
+                                    "left": {
+                                      "join": {
+                                        "common": {
+                                          "direct": {
+                                          }
+                                        },
+                                        "left": {
+                                          "read": {
+                                            "common": {
+                                              "direct": {
+                                              }
+                                            },
+                                            "baseSchema": {
+                                              "names": [
+                                                "S_SUPPKEY",
+                                                "S_NAME",
+                                                "S_ADDRESS",
+                                                "S_NATIONKEY",
+                                                "S_PHONE",
+                                                "S_ACCTBAL",
+                                                "S_COMMENT"
+                                              ],
+                                              "struct": {
+                                                "types": [
+                                                  {
+                                                    "i64": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  {
+                                                    "fixedChar": {
+                                                      "length": 25,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "varchar": {
+                                                      "length": 40,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "i64": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  {
+                                                    "fixedChar": {
+                                                      "length": 15,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "varchar": {
+                                                      "length": 101,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  }
+                                                ],
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_REQUIRED"
+                                              }
+                                            },
+                                            "local_files": {
+                                              "items": [
+                                                {
+                                                  "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                                                  "parquet": {}
+                                                }
+                                              ]
+                                            }
+                                          }
+                                        },
+                                        "right": {
+                                          "read": {
+                                            "common": {
+                                              "direct": {
+                                              }
+                                            },
+                                            "baseSchema": {
+                                              "names": [
+                                                "L_ORDERKEY",
+                                                "L_PARTKEY",
+                                                "L_SUPPKEY",
+                                                "L_LINENUMBER",
+                                                "L_QUANTITY",
+                                                "L_EXTENDEDPRICE",
+                                                "L_DISCOUNT",
+                                                "L_TAX",
+                                                "L_RETURNFLAG",
+                                                "L_LINESTATUS",
+                                                "L_SHIPDATE",
+                                                "L_COMMITDATE",
+                                                "L_RECEIPTDATE",
+                                                "L_SHIPINSTRUCT",
+                                                "L_SHIPMODE",
+                                                "L_COMMENT"
+                                              ],
+                                              "struct": {
+                                                "types": [
+                                                  {
+                                                    "i64": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  {
+                                                    "i64": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  {
+                                                    "i64": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  {
+                                                    "i32": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "decimal": {
+                                                      "scale": 0,
+                                                      "precision": 19,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "fixedChar": {
+                                                      "length": 1,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "fixedChar": {
+                                                      "length": 1,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "date": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "date": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "date": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "fixedChar": {
+                                                      "length": 25,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "fixedChar": {
+                                                      "length": 10,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  },
+                                                  {
+                                                    "varchar": {
+                                                      "length": 44,
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                    }
+                                                  }
+                                                ],
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_REQUIRED"
+                                              }
+                                            },
+                                            "local_files": {
+                                              "items": [
+                                                {
+                                                  "uri_file": "file://FILENAME_PLACEHOLDER_1",
+                                                  "parquet": {}
+                                                }
+                                              ]
+                                            }
+                                          }
+                                        },
+                                        "expression": {
+                                          "literal": {
+                                            "boolean": true,
+                                            "nullable": false,
+                                            "typeVariationReference": 0
+                                          }
+                                        },
+                                        "type": "JOIN_TYPE_INNER"
+                                      }
+                                    },
+                                    "right": {
+                                      "read": {
+                                        "common": {
+                                          "direct": {
+                                          }
+                                        },
+                                        "baseSchema": {
+                                          "names": [
+                                            "O_ORDERKEY",
+                                            "O_CUSTKEY",
+                                            "O_ORDERSTATUS",
+                                            "O_TOTALPRICE",
+                                            "O_ORDERDATE",
+                                            "O_ORDERPRIORITY",
+                                            "O_CLERK",
+                                            "O_SHIPPRIORITY",
+                                            "O_COMMENT"
+                                          ],
+                                          "struct": {
+                                            "types": [
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 1,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "decimal": {
+                                                  "scale": 0,
+                                                  "precision": 19,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "date": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "i32": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              },
+                                              {
+                                                "varchar": {
+                                                  "length": 79,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }
+                                            ],
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "local_files": {
+                                          "items": [
+                                            {
+                                              "uri_file": "file://FILENAME_PLACEHOLDER_2",
+                                              "parquet": {}
+                                            }
+                                          ]
+                                        }
+                                      }
+                                    },
+                                    "expression": {
+                                      "literal": {
+                                        "boolean": true,
+                                        "nullable": false,
+                                        "typeVariationReference": 0
+                                      }
+                                    },
+                                    "type": "JOIN_TYPE_INNER"
+                                  }
+                                },
+                                "right": {
+                                  "read": {
+                                    "common": {
+                                      "direct": {
+                                      }
+                                    },
+                                    "baseSchema": {
+                                      "names": [
+                                        "N_NATIONKEY",
+                                        "N_NAME",
+                                        "N_REGIONKEY",
+                                        "N_COMMENT"
+                                      ],
+                                      "struct": {
+                                        "types": [
+                                          {
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          },
+                                          {
+                                            "fixedChar": {
+                                              "length": 25,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          },
+                                          {
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          },
+                                          {
+                                            "varchar": {
+                                              "length": 152,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }
+                                        ],
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_REQUIRED"
+                                      }
+                                    },
+                                    "local_files": {
+                                      "items": [
+                                        {
+                                          "uri_file": "file://FILENAME_PLACEHOLDER_3",
+                                          "parquet": {}
+                                        }
+                                      ]
+                                    }
+                                  }
+                                },
+                                "expression": {
+                                  "literal": {
+                                    "boolean": true,
+                                    "nullable": false,
+                                    "typeVariationReference": 0
+                                  }
+                                },
+                                "type": "JOIN_TYPE_INNER"
+                              }
+                            },
+                            "condition": {
+                              "scalarFunction": {
+                                "functionReference": 0,
+                                "args": [],
+                                "outputType": {
+                                  "bool": {
+                                    "typeVariationReference": 0,
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                  }
+                                },
+                                "arguments": [
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 1,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 0
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 9
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 1,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 23
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 7
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 1,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 25
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "literal": {
+                                                "fixedChar": "F",
+                                                "nullable": false,
+                                                "typeVariationReference": 0
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 2,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 19
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 18
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "subquery": {
+                                        "setPredicate": {
+                                          "predicateOp": "PREDICATE_OP_EXISTS",
+                                          "tuples": {
+                                            "filter": {
+                                              "common": {
+                                                "direct": {
+                                                }
+                                              },
+                                              "input": {
+                                                "read": {
+                                                  "common": {
+                                                    "direct": {
+                                                    }
+                                                  },
+                                                  "baseSchema": {
+                                                    "names": [
+                                                      "L_ORDERKEY",
+                                                      "L_PARTKEY",
+                                                      "L_SUPPKEY",
+                                                      "L_LINENUMBER",
+                                                      "L_QUANTITY",
+                                                      "L_EXTENDEDPRICE",
+                                                      "L_DISCOUNT",
+                                                      "L_TAX",
+                                                      "L_RETURNFLAG",
+                                                      "L_LINESTATUS",
+                                                      "L_SHIPDATE",
+                                                      "L_COMMITDATE",
+                                                      "L_RECEIPTDATE",
+                                                      "L_SHIPINSTRUCT",
+                                                      "L_SHIPMODE",
+                                                      "L_COMMENT"
+                                                    ],
+                                                    "struct": {
+                                                      "types": [
+                                                        {
+                                                          "i64": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                          }
+                                                        },
+                                                        {
+                                                          "i64": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                          }
+                                                        },
+                                                        {
+                                                          "i64": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_REQUIRED"
+                                                          }
+                                                        },
+                                                        {
+                                                          "i32": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "decimal": {
+                                                            "scale": 0,
+                                                            "precision": 19,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "decimal": {
+                                                            "scale": 0,
+                                                            "precision": 19,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "decimal": {
+                                                            "scale": 0,
+                                                            "precision": 19,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "decimal": {
+                                                            "scale": 0,
+                                                            "precision": 19,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "fixedChar": {
+                                                            "length": 1,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "fixedChar": {
+                                                            "length": 1,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "date": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "date": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "date": {
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "fixedChar": {
+                                                            "length": 25,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "fixedChar": {
+                                                            "length": 10,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        {
+                                                          "varchar": {
+                                                            "length": 44,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        }
+                                                      ],
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  "local_files": {
+                                                    "items": [
+                                                      {
+                                                        "uri_file": "file://FILENAME_PLACEHOLDER_4",
+                                                        "parquet": {}
+                                                      }
+                                                    ]
+                                                  }
+                                                }
+                                              },
+                                              "condition": {
+                                                "scalarFunction": {
+                                                  "functionReference": 0,
+                                                  "args": [],
+                                                  "outputType": {
+                                                    "bool": {
+                                                      "typeVariationReference": 0,
+                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                    }
+                                                  },
+                                                  "arguments": [
+                                                    {
+                                                      "value": {
+                                                        "scalarFunction": {
+                                                          "functionReference": 1,
+                                                          "args": [],
+                                                          "outputType": {
+                                                            "bool": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_REQUIRED"
+                                                            }
+                                                          },
+                                                          "arguments": [
+                                                            {
+                                                              "value": {
+                                                                "selection": {
+                                                                  "directReference": {
+                                                                    "structField": {
+                                                                      "field": 0
+                                                                    }
+                                                                  },
+                                                                  "rootReference": {
+                                                                  }
+                                                                }
+                                                              }
+                                                            },
+                                                            {
+                                                              "value": {
+                                                                "selection": {
+                                                                  "directReference": {
+                                                                    "structField": {
+                                                                      "field": 7
+                                                                    }
+                                                                  },
+                                                                  "outerReference": {
+                                                                    "stepsOut": 1
+                                                                  }
+                                                                }
+                                                              }
+                                                            }
+                                                          ]
+                                                        }
+                                                      }
+                                                    },
+                                                    {
+                                                      "value": {
+                                                        "scalarFunction": {
+                                                          "functionReference": 3,
+                                                          "args": [],
+                                                          "outputType": {
+                                                            "bool": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_REQUIRED"
+                                                            }
+                                                          },
+                                                          "arguments": [
+                                                            {
+                                                              "value": {
+                                                                "selection": {
+                                                                  "directReference": {
+                                                                    "structField": {
+                                                                      "field": 2
+                                                                    }
+                                                                  },
+                                                                  "rootReference": {
+                                                                  }
+                                                                }
+                                                              }
+                                                            },
+                                                            {
+                                                              "value": {
+                                                                "selection": {
+                                                                  "directReference": {
+                                                                    "structField": {
+                                                                      "field": 9
+                                                                    }
+                                                                  },
+                                                                  "outerReference": {
+                                                                    "stepsOut": 1
+                                                                  }
+                                                                }
+                                                              }
+                                                            }
+                                                          ]
+                                                        }
+                                                      }
+                                                    }
+                                                  ]
+                                                }
+                                              }
+                                            }
+                                          }
+                                        }
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 4,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "subquery": {
+                                                "setPredicate": {
+                                                  "predicateOp": "PREDICATE_OP_EXISTS",
+                                                  "tuples": {
+                                                    "filter": {
+                                                      "common": {
+                                                        "direct": {
+                                                        }
+                                                      },
+                                                      "input": {
+                                                        "read": {
+                                                          "common": {
+                                                            "direct": {
+                                                            }
+                                                          },
+                                                          "baseSchema": {
+                                                            "names": [
+                                                              "L_ORDERKEY",
+                                                              "L_PARTKEY",
+                                                              "L_SUPPKEY",
+                                                              "L_LINENUMBER",
+                                                              "L_QUANTITY",
+                                                              "L_EXTENDEDPRICE",
+                                                              "L_DISCOUNT",
+                                                              "L_TAX",
+                                                              "L_RETURNFLAG",
+                                                              "L_LINESTATUS",
+                                                              "L_SHIPDATE",
+                                                              "L_COMMITDATE",
+                                                              "L_RECEIPTDATE",
+                                                              "L_SHIPINSTRUCT",
+                                                              "L_SHIPMODE",
+                                                              "L_COMMENT"
+                                                            ],
+                                                            "struct": {
+                                                              "types": [
+                                                                {
+                                                                  "i64": {
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "i64": {
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "i64": {
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "i32": {
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "decimal": {
+                                                                    "scale": 0,
+                                                                    "precision": 19,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "decimal": {
+                                                                    "scale": 0,
+                                                                    "precision": 19,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "decimal": {
+                                                                    "scale": 0,
+                                                                    "precision": 19,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "decimal": {
+                                                                    "scale": 0,
+                                                                    "precision": 19,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "fixedChar": {
+                                                                    "length": 1,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "fixedChar": {
+                                                                    "length": 1,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "date": {
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "date": {
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "date": {
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "fixedChar": {
+                                                                    "length": 25,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "fixedChar": {
+                                                                    "length": 10,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "varchar": {
+                                                                    "length": 44,
+                                                                    "typeVariationReference": 0,
+                                                                    "nullability": "NULLABILITY_NULLABLE"
+                                                                  }
+                                                                }
+                                                              ],
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_REQUIRED"
+                                                            }
+                                                          },
+                                                          "local_files": {
+                                                            "items": [
+                                                              {
+                                                                "uri_file": "file://FILENAME_PLACEHOLDER_5",
+                                                                "parquet": {}
+                                                              }
+                                                            ]
+                                                          }
+                                                        }
+                                                      },
+                                                      "condition": {
+                                                        "scalarFunction": {
+                                                          "functionReference": 0,
+                                                          "args": [],
+                                                          "outputType": {
+                                                            "bool": {
+                                                              "typeVariationReference": 0,
+                                                              "nullability": "NULLABILITY_NULLABLE"
+                                                            }
+                                                          },
+                                                          "arguments": [
+                                                            {
+                                                              "value": {
+                                                                "scalarFunction": {
+                                                                  "functionReference": 1,
+                                                                  "args": [],
+                                                                  "outputType": {
+                                                                    "bool": {
+                                                                      "typeVariationReference": 0,
+                                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                                    }
+                                                                  },
+                                                                  "arguments": [
+                                                                    {
+                                                                      "value": {
+                                                                        "selection": {
+                                                                          "directReference": {
+                                                                            "structField": {
+                                                                              "field": 0
+                                                                            }
+                                                                          },
+                                                                          "rootReference": {
+                                                                          }
+                                                                        }
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "value": {
+                                                                        "selection": {
+                                                                          "directReference": {
+                                                                            "structField": {
+                                                                              "field": 7
+                                                                            }
+                                                                          },
+                                                                          "outerReference": {
+                                                                            "stepsOut": 1
+                                                                          }
+                                                                        }
+                                                                      }
+                                                                    }
+                                                                  ]
+                                                                }
+                                                              }
+                                                            },
+                                                            {
+                                                              "value": {
+                                                                "scalarFunction": {
+                                                                  "functionReference": 3,
+                                                                  "args": [],
+                                                                  "outputType": {
+                                                                    "bool": {
+                                                                      "typeVariationReference": 0,
+                                                                      "nullability": "NULLABILITY_REQUIRED"
+                                                                    }
+                                                                  },
+                                                                  "arguments": [
+                                                                    {
+                                                                      "value": {
+                                                                        "selection": {
+                                                                          "directReference": {
+                                                                            "structField": {
+                                                                              "field": 2
+                                                                            }
+                                                                          },
+                                                                          "rootReference": {
+                                                                          }
+                                                                        }
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "value": {
+                                                                        "selection": {
+                                                                          "directReference": {
+                                                                            "structField": {
+                                                                              "field": 9
+                                                                            }
+                                                                          },
+                                                                          "outerReference": {
+                                                                            "stepsOut": 1
+                                                                          }
+                                                                        }
+                                                                      }
+                                                                    }
+                                                                  ]
+                                                                }
+                                                              }
+                                                            },
+                                                            {
+                                                              "value": {
+                                                                "scalarFunction": {
+                                                                  "functionReference": 2,
+                                                                  "args": [],
+                                                                  "outputType": {
+                                                                    "bool": {
+                                                                      "typeVariationReference": 0,
+                                                                      "nullability": "NULLABILITY_NULLABLE"
+                                                                    }
+                                                                  },
+                                                                  "arguments": [
+                                                                    {
+                                                                      "value": {
+                                                                        "selection": {
+                                                                          "directReference": {
+                                                                            "structField": {
+                                                                              "field": 12
+                                                                            }
+                                                                          },
+                                                                          "rootReference": {
+                                                                          }
+                                                                        }
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "value": {
+                                                                        "selection": {
+                                                                          "directReference": {
+                                                                            "structField": {
+                                                                              "field": 11
+                                                                            }
+                                                                          },
+                                                                          "rootReference": {
+                                                                          }
+                                                                        }
+                                                                      }
+                                                                    }
+                                                                  ]
+                                                                }
+                                                              }
+                                                            }
+                                                          ]
+                                                        }
+                                                      }
+                                                    }
+                                                  }
+                                                }
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 1,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_REQUIRED"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 3
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 32
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  },
+                                  {
+                                    "value": {
+                                      "scalarFunction": {
+                                        "functionReference": 1,
+                                        "args": [],
+                                        "outputType": {
+                                          "bool": {
+                                            "typeVariationReference": 0,
+                                            "nullability": "NULLABILITY_NULLABLE"
+                                          }
+                                        },
+                                        "arguments": [
+                                          {
+                                            "value": {
+                                              "selection": {
+                                                "directReference": {
+                                                  "structField": {
+                                                    "field": 33
+                                                  }
+                                                },
+                                                "rootReference": {
+                                                }
+                                              }
+                                            }
+                                          },
+                                          {
+                                            "value": {
+                                              "cast": {
+                                                "type": {
+                                                  "fixedChar": {
+                                                    "length": 25,
+                                                    "typeVariationReference": 0,
+                                                    "nullability": "NULLABILITY_REQUIRED"
+                                                  }
+                                                },
+                                                "input": {
+                                                  "literal": {
+                                                    "fixedChar": "SAUDI ARABIA",
+                                                    "nullable": false,
+                                                    "typeVariationReference": 0
+                                                  }
+                                                },
+                                                "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                              }
+                                            }
+                                          }
+                                        ]
+                                      }
+                                    }
+                                  }
+                                ]
+                              }
+                            }
+                          }
+                        },
+                        "expressions": [
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 1
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        ]
+                      }
+                    },
+                    "groupings": [
+                      {
+                        "groupingExpressions": [
+                          {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 0
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        ]
+                      }
+                    ],
+                    "measures": [
+                      {
+                        "measure": {
+                          "functionReference": 5,
+                          "args": [],
+                          "sorts": [],
+                          "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                          "outputType": {
+                            "i64": {
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_REQUIRED"
+                            }
+                          },
+                          "invocation": "AGGREGATION_INVOCATION_ALL",
+                          "arguments": []
+                        }
+                      }
+                    ]
+                  }
+                },
+                "sorts": [
+                  {
+                    "expr": {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 1
+                          }
+                        },
+                        "rootReference": {
+                        }
+                      }
+                    },
+                    "direction": "SORT_DIRECTION_DESC_NULLS_FIRST"
+                  },
+                  {
+                    "expr": {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 0
+                          }
+                        },
+                        "rootReference": {
+                        }
+                      }
+                    },
+                    "direction": "SORT_DIRECTION_ASC_NULLS_LAST"
+                  }
+                ]
+              }
+            },
+            "offset": "0",
+            "count": "100"
+          }
+        },
+        "names": [
+          "S_NAME",
+          "NUMWAIT"
+        ]
+      }
+    }
+  ],
+  "expectedTypeUrls": []
+}
diff --git a/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_22.json b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_22.json
new file mode 100644
index 0000000000000..9eb37da8e18e8
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/tpch_substrait_plans/query_22.json
@@ -0,0 +1,2034 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 5,
+      "uri": "/functions_aggregate_generic.yaml"
+    },
+    {
+      "extensionUriAnchor": 1,
+      "uri": "/functions_boolean.yaml"
+    },
+    {
+      "extensionUriAnchor": 3,
+      "uri": "/functions_string.yaml"
+    },
+    {
+      "extensionUriAnchor": 4,
+      "uri": "/functions_arithmetic_decimal.yaml"
+    },
+    {
+      "extensionUriAnchor": 2,
+      "uri": "/functions_comparison.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "and:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 1,
+        "name": "or:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 2,
+        "name": "equal:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 3,
+        "functionAnchor": 3,
+        "name": "substring:fchar_i32_i32"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 2,
+        "functionAnchor": 4,
+        "name": "gt:any1_any1"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 5,
+        "name": "avg:opt_decimal"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 6,
+        "name": "not:bool"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 5,
+        "functionAnchor": 7,
+        "name": "count:opt"
+      }
+    },
+    {
+      "extensionFunction": {
+        "extensionUriReference": 4,
+        "functionAnchor": 8,
+        "name": "sum:opt_decimal"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "sort": {
+            "common": {
+              "direct": {
+              }
+            },
+            "input": {
+              "aggregate": {
+                "common": {
+                  "direct": {
+                  }
+                },
+                "input": {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          8,
+                          9
+                        ]
+                      }
+                    },
+                    "input": {
+                      "filter": {
+                        "common": {
+                          "direct": {
+                          }
+                        },
+                        "input": {
+                          "read": {
+                            "common": {
+                              "direct": {
+                              }
+                            },
+                            "baseSchema": {
+                              "names": [
+                                "C_CUSTKEY",
+                                "C_NAME",
+                                "C_ADDRESS",
+                                "C_NATIONKEY",
+                                "C_PHONE",
+                                "C_ACCTBAL",
+                                "C_MKTSEGMENT",
+                                "C_COMMENT"
+                              ],
+                              "struct": {
+                                "types": [
+                                  {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  {
+                                    "varchar": {
+                                      "length": 25,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "varchar": {
+                                      "length": 40,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 15,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "fixedChar": {
+                                      "length": 10,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  {
+                                    "varchar": {
+                                      "length": 117,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  }
+                                ],
+                                "typeVariationReference": 0,
+                                "nullability": "NULLABILITY_REQUIRED"
+                              }
+                            },
+                            "local_files": {
+                              "items": [
+                                {
+                                  "uri_file": "file://FILENAME_PLACEHOLDER_0",
+                                  "parquet": {}
+                                }
+                              ]
+                            }
+                          }
+                        },
+                        "condition": {
+                          "scalarFunction": {
+                            "functionReference": 0,
+                            "args": [],
+                            "outputType": {
+                              "bool": {
+                                "typeVariationReference": 0,
+                                "nullability": "NULLABILITY_NULLABLE"
+                              }
+                            },
+                            "arguments": [
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 1,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_NULLABLE"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 2,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 3,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "arguments": [
+                                                      {
+                                                        "value": {
+                                                          "selection": {
+                                                            "directReference": {
+                                                              "structField": {
+                                                                "field": 4
+                                                              }
+                                                            },
+                                                            "rootReference": {
+                                                            }
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 1,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 2,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      }
+                                                    ]
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "cast": {
+                                                    "type": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "input": {
+                                                      "literal": {
+                                                        "fixedChar": "13",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    },
+                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 2,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 3,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "arguments": [
+                                                      {
+                                                        "value": {
+                                                          "selection": {
+                                                            "directReference": {
+                                                              "structField": {
+                                                                "field": 4
+                                                              }
+                                                            },
+                                                            "rootReference": {
+                                                            }
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 1,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 2,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      }
+                                                    ]
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "cast": {
+                                                    "type": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "input": {
+                                                      "literal": {
+                                                        "fixedChar": "31",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    },
+                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 2,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 3,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "arguments": [
+                                                      {
+                                                        "value": {
+                                                          "selection": {
+                                                            "directReference": {
+                                                              "structField": {
+                                                                "field": 4
+                                                              }
+                                                            },
+                                                            "rootReference": {
+                                                            }
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 1,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 2,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      }
+                                                    ]
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "cast": {
+                                                    "type": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "input": {
+                                                      "literal": {
+                                                        "fixedChar": "23",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    },
+                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 2,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 3,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "arguments": [
+                                                      {
+                                                        "value": {
+                                                          "selection": {
+                                                            "directReference": {
+                                                              "structField": {
+                                                                "field": 4
+                                                              }
+                                                            },
+                                                            "rootReference": {
+                                                            }
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 1,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 2,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      }
+                                                    ]
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "cast": {
+                                                    "type": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "input": {
+                                                      "literal": {
+                                                        "fixedChar": "29",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    },
+                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 2,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 3,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "arguments": [
+                                                      {
+                                                        "value": {
+                                                          "selection": {
+                                                            "directReference": {
+                                                              "structField": {
+                                                                "field": 4
+                                                              }
+                                                            },
+                                                            "rootReference": {
+                                                            }
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 1,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 2,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      }
+                                                    ]
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "cast": {
+                                                    "type": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "input": {
+                                                      "literal": {
+                                                        "fixedChar": "30",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    },
+                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 2,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 3,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "arguments": [
+                                                      {
+                                                        "value": {
+                                                          "selection": {
+                                                            "directReference": {
+                                                              "structField": {
+                                                                "field": 4
+                                                              }
+                                                            },
+                                                            "rootReference": {
+                                                            }
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 1,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 2,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      }
+                                                    ]
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "cast": {
+                                                    "type": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "input": {
+                                                      "literal": {
+                                                        "fixedChar": "18",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    },
+                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "scalarFunction": {
+                                            "functionReference": 2,
+                                            "args": [],
+                                            "outputType": {
+                                              "bool": {
+                                                "typeVariationReference": 0,
+                                                "nullability": "NULLABILITY_NULLABLE"
+                                              }
+                                            },
+                                            "arguments": [
+                                              {
+                                                "value": {
+                                                  "scalarFunction": {
+                                                    "functionReference": 3,
+                                                    "args": [],
+                                                    "outputType": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "arguments": [
+                                                      {
+                                                        "value": {
+                                                          "selection": {
+                                                            "directReference": {
+                                                              "structField": {
+                                                                "field": 4
+                                                              }
+                                                            },
+                                                            "rootReference": {
+                                                            }
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 1,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      },
+                                                      {
+                                                        "value": {
+                                                          "literal": {
+                                                            "i32": 2,
+                                                            "nullable": false,
+                                                            "typeVariationReference": 0
+                                                          }
+                                                        }
+                                                      }
+                                                    ]
+                                                  }
+                                                }
+                                              },
+                                              {
+                                                "value": {
+                                                  "cast": {
+                                                    "type": {
+                                                      "varchar": {
+                                                        "length": 15,
+                                                        "typeVariationReference": 0,
+                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                      }
+                                                    },
+                                                    "input": {
+                                                      "literal": {
+                                                        "fixedChar": "17",
+                                                        "nullable": false,
+                                                        "typeVariationReference": 0
+                                                      }
+                                                    },
+                                                    "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                  }
+                                                }
+                                              }
+                                            ]
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              },
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 4,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_NULLABLE"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "selection": {
+                                            "directReference": {
+                                              "structField": {
+                                                "field": 5
+                                              }
+                                            },
+                                            "rootReference": {
+                                            }
+                                          }
+                                        }
+                                      },
+                                      {
+                                        "value": {
+                                          "subquery": {
+                                            "scalar": {
+                                              "input": {
+                                                "aggregate": {
+                                                  "common": {
+                                                    "direct": {
+                                                    }
+                                                  },
+                                                  "input": {
+                                                    "project": {
+                                                      "common": {
+                                                        "emit": {
+                                                          "outputMapping": [
+                                                            8
+                                                          ]
+                                                        }
+                                                      },
+                                                      "input": {
+                                                        "filter": {
+                                                          "common": {
+                                                            "direct": {
+                                                            }
+                                                          },
+                                                          "input": {
+                                                            "read": {
+                                                              "common": {
+                                                                "direct": {
+                                                                }
+                                                              },
+                                                              "baseSchema": {
+                                                                "names": [
+                                                                  "C_CUSTKEY",
+                                                                  "C_NAME",
+                                                                  "C_ADDRESS",
+                                                                  "C_NATIONKEY",
+                                                                  "C_PHONE",
+                                                                  "C_ACCTBAL",
+                                                                  "C_MKTSEGMENT",
+                                                                  "C_COMMENT"
+                                                                ],
+                                                                "struct": {
+                                                                  "types": [
+                                                                    {
+                                                                      "i64": {
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "varchar": {
+                                                                        "length": 25,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "varchar": {
+                                                                        "length": 40,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "i64": {
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_REQUIRED"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "fixedChar": {
+                                                                        "length": 15,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "decimal": {
+                                                                        "scale": 0,
+                                                                        "precision": 19,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "fixedChar": {
+                                                                        "length": 10,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    },
+                                                                    {
+                                                                      "varchar": {
+                                                                        "length": 117,
+                                                                        "typeVariationReference": 0,
+                                                                        "nullability": "NULLABILITY_NULLABLE"
+                                                                      }
+                                                                    }
+                                                                  ],
+                                                                  "typeVariationReference": 0,
+                                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                                }
+                                                              },
+                                                              "local_files": {
+                                                                "items": [
+                                                                  {
+                                                                    "uri_file": "file://FILENAME_PLACEHOLDER_1",
+                                                                    "parquet": {}
+                                                                  }
+                                                                ]
+                                                              }
+                                                            }
+                                                          },
+                                                          "condition": {
+                                                            "scalarFunction": {
+                                                              "functionReference": 0,
+                                                              "args": [],
+                                                              "outputType": {
+                                                                "bool": {
+                                                                  "typeVariationReference": 0,
+                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                }
+                                                              },
+                                                              "arguments": [
+                                                                {
+                                                                  "value": {
+                                                                    "scalarFunction": {
+                                                                      "functionReference": 4,
+                                                                      "args": [],
+                                                                      "outputType": {
+                                                                        "bool": {
+                                                                          "typeVariationReference": 0,
+                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                        }
+                                                                      },
+                                                                      "arguments": [
+                                                                        {
+                                                                          "value": {
+                                                                            "selection": {
+                                                                              "directReference": {
+                                                                                "structField": {
+                                                                                  "field": 5
+                                                                                }
+                                                                              },
+                                                                              "rootReference": {
+                                                                              }
+                                                                            }
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "value": {
+                                                                            "literal": {
+                                                                              "decimal": {
+                                                                                "value": "AAAAAAAAAAAAAAAAAAAAAA==",
+                                                                                "precision": 3,
+                                                                                "scale": 2
+                                                                              },
+                                                                              "nullable": false,
+                                                                              "typeVariationReference": 0
+                                                                            }
+                                                                          }
+                                                                        }
+                                                                      ]
+                                                                    }
+                                                                  }
+                                                                },
+                                                                {
+                                                                  "value": {
+                                                                    "scalarFunction": {
+                                                                      "functionReference": 1,
+                                                                      "args": [],
+                                                                      "outputType": {
+                                                                        "bool": {
+                                                                          "typeVariationReference": 0,
+                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                        }
+                                                                      },
+                                                                      "arguments": [
+                                                                        {
+                                                                          "value": {
+                                                                            "scalarFunction": {
+                                                                              "functionReference": 2,
+                                                                              "args": [],
+                                                                              "outputType": {
+                                                                                "bool": {
+                                                                                  "typeVariationReference": 0,
+                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                }
+                                                                              },
+                                                                              "arguments": [
+                                                                                {
+                                                                                  "value": {
+                                                                                    "scalarFunction": {
+                                                                                      "functionReference": 3,
+                                                                                      "args": [],
+                                                                                      "outputType": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "arguments": [
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "selection": {
+                                                                                              "directReference": {
+                                                                                                "structField": {
+                                                                                                  "field": 4
+                                                                                                }
+                                                                                              },
+                                                                                              "rootReference": {
+                                                                                              }
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 1,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 2,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        }
+                                                                                      ]
+                                                                                    }
+                                                                                  }
+                                                                                },
+                                                                                {
+                                                                                  "value": {
+                                                                                    "cast": {
+                                                                                      "type": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "input": {
+                                                                                        "literal": {
+                                                                                          "fixedChar": "13",
+                                                                                          "nullable": false,
+                                                                                          "typeVariationReference": 0
+                                                                                        }
+                                                                                      },
+                                                                                      "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                                    }
+                                                                                  }
+                                                                                }
+                                                                              ]
+                                                                            }
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "value": {
+                                                                            "scalarFunction": {
+                                                                              "functionReference": 2,
+                                                                              "args": [],
+                                                                              "outputType": {
+                                                                                "bool": {
+                                                                                  "typeVariationReference": 0,
+                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                }
+                                                                              },
+                                                                              "arguments": [
+                                                                                {
+                                                                                  "value": {
+                                                                                    "scalarFunction": {
+                                                                                      "functionReference": 3,
+                                                                                      "args": [],
+                                                                                      "outputType": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "arguments": [
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "selection": {
+                                                                                              "directReference": {
+                                                                                                "structField": {
+                                                                                                  "field": 4
+                                                                                                }
+                                                                                              },
+                                                                                              "rootReference": {
+                                                                                              }
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 1,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 2,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        }
+                                                                                      ]
+                                                                                    }
+                                                                                  }
+                                                                                },
+                                                                                {
+                                                                                  "value": {
+                                                                                    "cast": {
+                                                                                      "type": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "input": {
+                                                                                        "literal": {
+                                                                                          "fixedChar": "31",
+                                                                                          "nullable": false,
+                                                                                          "typeVariationReference": 0
+                                                                                        }
+                                                                                      },
+                                                                                      "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                                    }
+                                                                                  }
+                                                                                }
+                                                                              ]
+                                                                            }
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "value": {
+                                                                            "scalarFunction": {
+                                                                              "functionReference": 2,
+                                                                              "args": [],
+                                                                              "outputType": {
+                                                                                "bool": {
+                                                                                  "typeVariationReference": 0,
+                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                }
+                                                                              },
+                                                                              "arguments": [
+                                                                                {
+                                                                                  "value": {
+                                                                                    "scalarFunction": {
+                                                                                      "functionReference": 3,
+                                                                                      "args": [],
+                                                                                      "outputType": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "arguments": [
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "selection": {
+                                                                                              "directReference": {
+                                                                                                "structField": {
+                                                                                                  "field": 4
+                                                                                                }
+                                                                                              },
+                                                                                              "rootReference": {
+                                                                                              }
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 1,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 2,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        }
+                                                                                      ]
+                                                                                    }
+                                                                                  }
+                                                                                },
+                                                                                {
+                                                                                  "value": {
+                                                                                    "cast": {
+                                                                                      "type": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "input": {
+                                                                                        "literal": {
+                                                                                          "fixedChar": "23",
+                                                                                          "nullable": false,
+                                                                                          "typeVariationReference": 0
+                                                                                        }
+                                                                                      },
+                                                                                      "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                                    }
+                                                                                  }
+                                                                                }
+                                                                              ]
+                                                                            }
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "value": {
+                                                                            "scalarFunction": {
+                                                                              "functionReference": 2,
+                                                                              "args": [],
+                                                                              "outputType": {
+                                                                                "bool": {
+                                                                                  "typeVariationReference": 0,
+                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                }
+                                                                              },
+                                                                              "arguments": [
+                                                                                {
+                                                                                  "value": {
+                                                                                    "scalarFunction": {
+                                                                                      "functionReference": 3,
+                                                                                      "args": [],
+                                                                                      "outputType": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "arguments": [
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "selection": {
+                                                                                              "directReference": {
+                                                                                                "structField": {
+                                                                                                  "field": 4
+                                                                                                }
+                                                                                              },
+                                                                                              "rootReference": {
+                                                                                              }
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 1,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 2,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        }
+                                                                                      ]
+                                                                                    }
+                                                                                  }
+                                                                                },
+                                                                                {
+                                                                                  "value": {
+                                                                                    "cast": {
+                                                                                      "type": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "input": {
+                                                                                        "literal": {
+                                                                                          "fixedChar": "29",
+                                                                                          "nullable": false,
+                                                                                          "typeVariationReference": 0
+                                                                                        }
+                                                                                      },
+                                                                                      "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                                    }
+                                                                                  }
+                                                                                }
+                                                                              ]
+                                                                            }
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "value": {
+                                                                            "scalarFunction": {
+                                                                              "functionReference": 2,
+                                                                              "args": [],
+                                                                              "outputType": {
+                                                                                "bool": {
+                                                                                  "typeVariationReference": 0,
+                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                }
+                                                                              },
+                                                                              "arguments": [
+                                                                                {
+                                                                                  "value": {
+                                                                                    "scalarFunction": {
+                                                                                      "functionReference": 3,
+                                                                                      "args": [],
+                                                                                      "outputType": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "arguments": [
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "selection": {
+                                                                                              "directReference": {
+                                                                                                "structField": {
+                                                                                                  "field": 4
+                                                                                                }
+                                                                                              },
+                                                                                              "rootReference": {
+                                                                                              }
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 1,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 2,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        }
+                                                                                      ]
+                                                                                    }
+                                                                                  }
+                                                                                },
+                                                                                {
+                                                                                  "value": {
+                                                                                    "cast": {
+                                                                                      "type": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "input": {
+                                                                                        "literal": {
+                                                                                          "fixedChar": "30",
+                                                                                          "nullable": false,
+                                                                                          "typeVariationReference": 0
+                                                                                        }
+                                                                                      },
+                                                                                      "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                                    }
+                                                                                  }
+                                                                                }
+                                                                              ]
+                                                                            }
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "value": {
+                                                                            "scalarFunction": {
+                                                                              "functionReference": 2,
+                                                                              "args": [],
+                                                                              "outputType": {
+                                                                                "bool": {
+                                                                                  "typeVariationReference": 0,
+                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                }
+                                                                              },
+                                                                              "arguments": [
+                                                                                {
+                                                                                  "value": {
+                                                                                    "scalarFunction": {
+                                                                                      "functionReference": 3,
+                                                                                      "args": [],
+                                                                                      "outputType": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "arguments": [
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "selection": {
+                                                                                              "directReference": {
+                                                                                                "structField": {
+                                                                                                  "field": 4
+                                                                                                }
+                                                                                              },
+                                                                                              "rootReference": {
+                                                                                              }
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 1,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 2,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        }
+                                                                                      ]
+                                                                                    }
+                                                                                  }
+                                                                                },
+                                                                                {
+                                                                                  "value": {
+                                                                                    "cast": {
+                                                                                      "type": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "input": {
+                                                                                        "literal": {
+                                                                                          "fixedChar": "18",
+                                                                                          "nullable": false,
+                                                                                          "typeVariationReference": 0
+                                                                                        }
+                                                                                      },
+                                                                                      "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                                    }
+                                                                                  }
+                                                                                }
+                                                                              ]
+                                                                            }
+                                                                          }
+                                                                        },
+                                                                        {
+                                                                          "value": {
+                                                                            "scalarFunction": {
+                                                                              "functionReference": 2,
+                                                                              "args": [],
+                                                                              "outputType": {
+                                                                                "bool": {
+                                                                                  "typeVariationReference": 0,
+                                                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                                                }
+                                                                              },
+                                                                              "arguments": [
+                                                                                {
+                                                                                  "value": {
+                                                                                    "scalarFunction": {
+                                                                                      "functionReference": 3,
+                                                                                      "args": [],
+                                                                                      "outputType": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "arguments": [
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "selection": {
+                                                                                              "directReference": {
+                                                                                                "structField": {
+                                                                                                  "field": 4
+                                                                                                }
+                                                                                              },
+                                                                                              "rootReference": {
+                                                                                              }
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 1,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        },
+                                                                                        {
+                                                                                          "value": {
+                                                                                            "literal": {
+                                                                                              "i32": 2,
+                                                                                              "nullable": false,
+                                                                                              "typeVariationReference": 0
+                                                                                            }
+                                                                                          }
+                                                                                        }
+                                                                                      ]
+                                                                                    }
+                                                                                  }
+                                                                                },
+                                                                                {
+                                                                                  "value": {
+                                                                                    "cast": {
+                                                                                      "type": {
+                                                                                        "varchar": {
+                                                                                          "length": 15,
+                                                                                          "typeVariationReference": 0,
+                                                                                          "nullability": "NULLABILITY_NULLABLE"
+                                                                                        }
+                                                                                      },
+                                                                                      "input": {
+                                                                                        "literal": {
+                                                                                          "fixedChar": "17",
+                                                                                          "nullable": false,
+                                                                                          "typeVariationReference": 0
+                                                                                        }
+                                                                                      },
+                                                                                      "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED"
+                                                                                    }
+                                                                                  }
+                                                                                }
+                                                                              ]
+                                                                            }
+                                                                          }
+                                                                        }
+                                                                      ]
+                                                                    }
+                                                                  }
+                                                                }
+                                                              ]
+                                                            }
+                                                          }
+                                                        }
+                                                      },
+                                                      "expressions": [
+                                                        {
+                                                          "selection": {
+                                                            "directReference": {
+                                                              "structField": {
+                                                                "field": 5
+                                                              }
+                                                            },
+                                                            "rootReference": {
+                                                            }
+                                                          }
+                                                        }
+                                                      ]
+                                                    }
+                                                  },
+                                                  "groupings": [
+                                                    {
+                                                      "groupingExpressions": []
+                                                    }
+                                                  ],
+                                                  "measures": [
+                                                    {
+                                                      "measure": {
+                                                        "functionReference": 5,
+                                                        "args": [],
+                                                        "sorts": [],
+                                                        "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                                                        "outputType": {
+                                                          "decimal": {
+                                                            "scale": 0,
+                                                            "precision": 19,
+                                                            "typeVariationReference": 0,
+                                                            "nullability": "NULLABILITY_NULLABLE"
+                                                          }
+                                                        },
+                                                        "invocation": "AGGREGATION_INVOCATION_ALL",
+                                                        "arguments": [
+                                                          {
+                                                            "value": {
+                                                              "selection": {
+                                                                "directReference": {
+                                                                  "structField": {
+                                                                    "field": 0
+                                                                  }
+                                                                },
+                                                                "rootReference": {
+                                                                }
+                                                              }
+                                                            }
+                                                          }
+                                                        ]
+                                                      }
+                                                    }
+                                                  ]
+                                                }
+                                              }
+                                            }
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              },
+                              {
+                                "value": {
+                                  "scalarFunction": {
+                                    "functionReference": 6,
+                                    "args": [],
+                                    "outputType": {
+                                      "bool": {
+                                        "typeVariationReference": 0,
+                                        "nullability": "NULLABILITY_REQUIRED"
+                                      }
+                                    },
+                                    "arguments": [
+                                      {
+                                        "value": {
+                                          "subquery": {
+                                            "setPredicate": {
+                                              "predicateOp": "PREDICATE_OP_EXISTS",
+                                              "tuples": {
+                                                "filter": {
+                                                  "common": {
+                                                    "direct": {
+                                                    }
+                                                  },
+                                                  "input": {
+                                                    "read": {
+                                                      "common": {
+                                                        "direct": {
+                                                        }
+                                                      },
+                                                      "baseSchema": {
+                                                        "names": [
+                                                          "O_ORDERKEY",
+                                                          "O_CUSTKEY",
+                                                          "O_ORDERSTATUS",
+                                                          "O_TOTALPRICE",
+                                                          "O_ORDERDATE",
+                                                          "O_ORDERPRIORITY",
+                                                          "O_CLERK",
+                                                          "O_SHIPPRIORITY",
+                                                          "O_COMMENT"
+                                                        ],
+                                                        "struct": {
+                                                          "types": [
+                                                            {
+                                                              "i64": {
+                                                                "typeVariationReference": 0,
+                                                                "nullability": "NULLABILITY_REQUIRED"
+                                                              }
+                                                            },
+                                                            {
+                                                              "i64": {
+                                                                "typeVariationReference": 0,
+                                                                "nullability": "NULLABILITY_REQUIRED"
+                                                              }
+                                                            },
+                                                            {
+                                                              "fixedChar": {
+                                                                "length": 1,
+                                                                "typeVariationReference": 0,
+                                                                "nullability": "NULLABILITY_NULLABLE"
+                                                              }
+                                                            },
+                                                            {
+                                                              "decimal": {
+                                                                "scale": 0,
+                                                                "precision": 19,
+                                                                "typeVariationReference": 0,
+                                                                "nullability": "NULLABILITY_NULLABLE"
+                                                              }
+                                                            },
+                                                            {
+                                                              "date": {
+                                                                "typeVariationReference": 0,
+                                                                "nullability": "NULLABILITY_NULLABLE"
+                                                              }
+                                                            },
+                                                            {
+                                                              "fixedChar": {
+                                                                "length": 15,
+                                                                "typeVariationReference": 0,
+                                                                "nullability": "NULLABILITY_NULLABLE"
+                                                              }
+                                                            },
+                                                            {
+                                                              "fixedChar": {
+                                                                "length": 15,
+                                                                "typeVariationReference": 0,
+                                                                "nullability": "NULLABILITY_NULLABLE"
+                                                              }
+                                                            },
+                                                            {
+                                                              "i32": {
+                                                                "typeVariationReference": 0,
+                                                                "nullability": "NULLABILITY_NULLABLE"
+                                                              }
+                                                            },
+                                                            {
+                                                              "varchar": {
+                                                                "length": 79,
+                                                                "typeVariationReference": 0,
+                                                                "nullability": "NULLABILITY_NULLABLE"
+                                                              }
+                                                            }
+                                                          ],
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_REQUIRED"
+                                                        }
+                                                      },
+                                                      "local_files": {
+                                                        "items": [
+                                                          {
+                                                            "uri_file": "file://FILENAME_PLACEHOLDER_2",
+                                                            "parquet": {}
+                                                          }
+                                                        ]
+                                                      }
+                                                    }
+                                                  },
+                                                  "condition": {
+                                                    "scalarFunction": {
+                                                      "functionReference": 2,
+                                                      "args": [],
+                                                      "outputType": {
+                                                        "bool": {
+                                                          "typeVariationReference": 0,
+                                                          "nullability": "NULLABILITY_REQUIRED"
+                                                        }
+                                                      },
+                                                      "arguments": [
+                                                        {
+                                                          "value": {
+                                                            "selection": {
+                                                              "directReference": {
+                                                                "structField": {
+                                                                  "field": 1
+                                                                }
+                                                              },
+                                                              "rootReference": {
+                                                              }
+                                                            }
+                                                          }
+                                                        },
+                                                        {
+                                                          "value": {
+                                                            "selection": {
+                                                              "directReference": {
+                                                                "structField": {
+                                                                  "field": 0
+                                                                }
+                                                              },
+                                                              "outerReference": {
+                                                                "stepsOut": 1
+                                                              }
+                                                            }
+                                                          }
+                                                        }
+                                                      ]
+                                                    }
+                                                  }
+                                                }
+                                              }
+                                            }
+                                          }
+                                        }
+                                      }
+                                    ]
+                                  }
+                                }
+                              }
+                            ]
+                          }
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "scalarFunction": {
+                          "functionReference": 3,
+                          "args": [],
+                          "outputType": {
+                            "varchar": {
+                              "length": 15,
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          },
+                          "arguments": [
+                            {
+                              "value": {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 4
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              }
+                            },
+                            {
+                              "value": {
+                                "literal": {
+                                  "i32": 1,
+                                  "nullable": false,
+                                  "typeVariationReference": 0
+                                }
+                              }
+                            },
+                            {
+                              "value": {
+                                "literal": {
+                                  "i32": 2,
+                                  "nullable": false,
+                                  "typeVariationReference": 0
+                                }
+                              }
+                            }
+                          ]
+                        }
+                      },
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 5
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      }
+                    ]
+                  }
+                },
+                "groupings": [
+                  {
+                    "groupingExpressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 0
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      }
+                    ]
+                  }
+                ],
+                "measures": [
+                  {
+                    "measure": {
+                      "functionReference": 7,
+                      "args": [],
+                      "sorts": [],
+                      "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                      "outputType": {
+                        "i64": {
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_REQUIRED"
+                        }
+                      },
+                      "invocation": "AGGREGATION_INVOCATION_ALL",
+                      "arguments": []
+                    }
+                  },
+                  {
+                    "measure": {
+                      "functionReference": 8,
+                      "args": [],
+                      "sorts": [],
+                      "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                      "outputType": {
+                        "decimal": {
+                          "scale": 0,
+                          "precision": 19,
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      "invocation": "AGGREGATION_INVOCATION_ALL",
+                      "arguments": [
+                        {
+                          "value": {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 1
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }
+                        }
+                      ]
+                    }
+                  }
+                ]
+              }
+            },
+            "sorts": [
+              {
+                "expr": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 0
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                },
+                "direction": "SORT_DIRECTION_ASC_NULLS_LAST"
+              }
+            ]
+          }
+        },
+        "names": [
+          "CNTRYCODE",
+          "NUMCUST",
+          "TOTACCTBAL"
+        ]
+      }
+    }
+  ],
+  "expectedTypeUrls": []
+}

From d01301d2ee9ea6d8e22e002bdfb5cf7b6ff6bd75 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Tue, 16 Jul 2024 01:55:49 +0800
Subject: [PATCH 052/357] Docs: Explain the usage of logical expressions for
 `create_aggregate_expr` (#11458)

* doc: comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/physical-expr-common/src/aggregate/mod.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
index 35666f199ace9..db4581a622acc 100644
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -43,6 +43,14 @@ use datafusion_expr::utils::AggregateOrderSensitivity;
 
 /// Creates a physical expression of the UDAF, that includes all necessary type coercion.
 /// This function errors when `args`' can't be coerced to a valid argument type of the UDAF.
+///
+/// `input_exprs` and `sort_exprs` are used for customizing Accumulator
+/// whose behavior depends on arguments such as the `ORDER BY`.
+///
+/// For example to call `ARRAY_AGG(x ORDER BY y)` would pass `y` to `sort_exprs`, `x` to `input_exprs`
+///
+/// `input_exprs` and `sort_exprs` are used for customizing Accumulator as the arguments in `AccumulatorArgs`,
+/// if you don't need them it is fine to pass empty slice `&[]`.
 #[allow(clippy::too_many_arguments)]
 pub fn create_aggregate_expr(
     fun: &AggregateUDF,

From 0965455486b7dcbd8c9a5efa8d2370ca5460bb9f Mon Sep 17 00:00:00 2001
From: kamille <caoruiqiu.crq@antgroup.com>
Date: Tue, 16 Jul 2024 02:06:38 +0800
Subject: [PATCH 053/357] Return scalar result when all inputs are constants in
 `map` and `make_map` (#11461)

* return scalar result when all inputs are constants.

* support convert map array to scalar.

* disable the const evaluate for Map type before impl its hash calculation.

* add tests in map.slt.

* improve error return.

* fix error.

* fix remove unused import.

* remove duplicated testcase.

* remove inline.
---
 datafusion/common/src/scalar/mod.rs           |  5 +-
 datafusion/functions/src/core/map.rs          | 34 +++++++-
 .../simplify_expressions/expr_simplifier.rs   | 27 +++++-
 datafusion/sqllogictest/test_files/map.slt    | 84 +++++++++++++++++++
 4 files changed, 143 insertions(+), 7 deletions(-)

diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 6c03e8698e80b..c891e85aa59bb 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -2678,7 +2678,10 @@ impl ScalarValue {
             DataType::Duration(TimeUnit::Nanosecond) => {
                 typed_cast!(array, index, DurationNanosecondArray, DurationNanosecond)?
             }
-
+            DataType::Map(_, _) => {
+                let a = array.slice(index, 1);
+                Self::Map(Arc::new(a.as_map().to_owned()))
+            }
             other => {
                 return _not_impl_err!(
                     "Can't create a scalar from array of type \"{other:?}\""
diff --git a/datafusion/functions/src/core/map.rs b/datafusion/functions/src/core/map.rs
index 8a8a19d7af52b..6626831c8034f 100644
--- a/datafusion/functions/src/core/map.rs
+++ b/datafusion/functions/src/core/map.rs
@@ -28,7 +28,21 @@ use datafusion_common::{exec_err, internal_err, ScalarValue};
 use datafusion_common::{not_impl_err, Result};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
+/// Check if we can evaluate the expr to constant directly.
+///
+/// # Example
+/// ```sql
+/// SELECT make_map('type', 'test') from test
+/// ```
+/// We can evaluate the result of `make_map` directly.
+fn can_evaluate_to_const(args: &[ColumnarValue]) -> bool {
+    args.iter()
+        .all(|arg| matches!(arg, ColumnarValue::Scalar(_)))
+}
+
 fn make_map(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    let can_evaluate_to_const = can_evaluate_to_const(args);
+
     let (key, value): (Vec<_>, Vec<_>) = args
         .chunks_exact(2)
         .map(|chunk| {
@@ -58,7 +72,7 @@ fn make_map(args: &[ColumnarValue]) -> Result<ColumnarValue> {
         Ok(value) => value,
         Err(e) => return internal_err!("Error concatenating values: {}", e),
     };
-    make_map_batch_internal(key, value)
+    make_map_batch_internal(key, value, can_evaluate_to_const)
 }
 
 fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
@@ -68,9 +82,12 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
             args.len()
         );
     }
+
+    let can_evaluate_to_const = can_evaluate_to_const(args);
+
     let key = get_first_array_ref(&args[0])?;
     let value = get_first_array_ref(&args[1])?;
-    make_map_batch_internal(key, value)
+    make_map_batch_internal(key, value, can_evaluate_to_const)
 }
 
 fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result<ArrayRef> {
@@ -85,7 +102,11 @@ fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result<ArrayRef> {
     }
 }
 
-fn make_map_batch_internal(keys: ArrayRef, values: ArrayRef) -> Result<ColumnarValue> {
+fn make_map_batch_internal(
+    keys: ArrayRef,
+    values: ArrayRef,
+    can_evaluate_to_const: bool,
+) -> Result<ColumnarValue> {
     if keys.null_count() > 0 {
         return exec_err!("map key cannot be null");
     }
@@ -124,8 +145,13 @@ fn make_map_batch_internal(keys: ArrayRef, values: ArrayRef) -> Result<ColumnarV
         .add_buffer(entry_offsets_buffer)
         .add_child_data(entry_struct.to_data())
         .build()?;
+    let map_array = Arc::new(MapArray::from(map_data));
 
-    Ok(ColumnarValue::Array(Arc::new(MapArray::from(map_data))))
+    Ok(if can_evaluate_to_const {
+        ColumnarValue::Scalar(ScalarValue::try_from_array(map_array.as_ref(), 0)?)
+    } else {
+        ColumnarValue::Array(map_array)
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 17855e17bef8b..8414f39f30607 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -656,12 +656,35 @@ impl<'a> ConstEvaluator<'a> {
                 } else {
                     // Non-ListArray
                     match ScalarValue::try_from_array(&a, 0) {
-                        Ok(s) => ConstSimplifyResult::Simplified(s),
+                        Ok(s) => {
+                            // TODO: support the optimization for `Map` type after support impl hash for it
+                            if matches!(&s, ScalarValue::Map(_)) {
+                                ConstSimplifyResult::SimplifyRuntimeError(
+                                    DataFusionError::NotImplemented("Const evaluate for Map type is still not supported".to_string()),
+                                    expr,
+                                )
+                            } else {
+                                ConstSimplifyResult::Simplified(s)
+                            }
+                        }
                         Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr),
                     }
                 }
             }
-            ColumnarValue::Scalar(s) => ConstSimplifyResult::Simplified(s),
+            ColumnarValue::Scalar(s) => {
+                // TODO: support the optimization for `Map` type after support impl hash for it
+                if matches!(&s, ScalarValue::Map(_)) {
+                    ConstSimplifyResult::SimplifyRuntimeError(
+                        DataFusionError::NotImplemented(
+                            "Const evaluate for Map type is still not supported"
+                                .to_string(),
+                        ),
+                        expr,
+                    )
+                } else {
+                    ConstSimplifyResult::Simplified(s)
+                }
+            }
         }
     }
 }
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index abf5b2ebbf98e..fb8917a5f4fee 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -212,3 +212,87 @@ SELECT map(column5, column6) FROM t;
 # {k1:1, k2:2}
 # {k3: 3}
 # {k5: 5}
+
+query ?
+SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', 30, 'OPTION', 29, 'GET', 27, 'PUT', 25, 'DELETE', 24) AS method_count from t;
+----
+{POST: 41, HEAD: 33, PATCH: 30, OPTION: 29, GET: 27, PUT: 25, DELETE: 24}
+{POST: 41, HEAD: 33, PATCH: 30, OPTION: 29, GET: 27, PUT: 25, DELETE: 24}
+{POST: 41, HEAD: 33, PATCH: 30, OPTION: 29, GET: 27, PUT: 25, DELETE: 24}
+
+query I
+SELECT MAKE_MAP('POST', 41, 'HEAD', 33)['POST']  from t;
+----
+41
+41
+41
+
+query ?
+SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null) from t;
+----
+{POST: 41, HEAD: 33, PATCH: }
+{POST: 41, HEAD: 33, PATCH: }
+{POST: 41, HEAD: 33, PATCH: }
+
+query ?
+SELECT MAKE_MAP('POST', null, 'HEAD', 33, 'PATCH', null) from t;
+----
+{POST: , HEAD: 33, PATCH: }
+{POST: , HEAD: 33, PATCH: }
+{POST: , HEAD: 33, PATCH: }
+
+query ?
+SELECT MAKE_MAP(1, null, 2, 33, 3, null) from t;
+----
+{1: , 2: 33, 3: }
+{1: , 2: 33, 3: }
+{1: , 2: 33, 3: }
+
+query ?
+SELECT MAKE_MAP([1,2], ['a', 'b'], [3,4], ['b']) from t;
+----
+{[1, 2]: [a, b], [3, 4]: [b]}
+{[1, 2]: [a, b], [3, 4]: [b]}
+{[1, 2]: [a, b], [3, 4]: [b]}
+
+query ?
+SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, 30]) from t;
+----
+{POST: 41, HEAD: 33, PATCH: 30}
+{POST: 41, HEAD: 33, PATCH: 30}
+{POST: 41, HEAD: 33, PATCH: 30}
+
+query ?
+SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]) from t;
+----
+{POST: 41, HEAD: 33, PATCH: }
+{POST: 41, HEAD: 33, PATCH: }
+{POST: 41, HEAD: 33, PATCH: }
+
+query ?
+SELECT MAP([[1,2], [3,4]], ['a', 'b']) from t;
+----
+{[1, 2]: a, [3, 4]: b}
+{[1, 2]: a, [3, 4]: b}
+{[1, 2]: a, [3, 4]: b}
+
+query ?
+SELECT MAP(make_array('POST', 'HEAD', 'PATCH'), make_array(41, 33, 30)) from t;
+----
+{POST: 41, HEAD: 33, PATCH: 30}
+{POST: 41, HEAD: 33, PATCH: 30}
+{POST: 41, HEAD: 33, PATCH: 30}
+
+query ?
+SELECT MAP(arrow_cast(make_array('POST', 'HEAD', 'PATCH'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array(41, 33, 30), 'FixedSizeList(3, Int64)'))  from t;
+----
+{POST: 41, HEAD: 33, PATCH: 30}
+{POST: 41, HEAD: 33, PATCH: 30}
+{POST: 41, HEAD: 33, PATCH: 30}
+
+query ?
+SELECT MAP(arrow_cast(make_array('POST', 'HEAD', 'PATCH'), 'LargeList(Utf8)'), arrow_cast(make_array(41, 33, 30), 'LargeList(Int64)')) from t;
+----
+{POST: 41, HEAD: 33, PATCH: 30}
+{POST: 41, HEAD: 33, PATCH: 30}
+{POST: 41, HEAD: 33, PATCH: 30}

From 7bd0e74aaa7aad3e436f01000fd4f973d5724f50 Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Tue, 16 Jul 2024 02:53:37 +0800
Subject: [PATCH 054/357] fix: `regexp_replace` fails when pattern or
 replacement is a scalar `NULL` (#11459)

* fix: gexp_replace fails when pattern or replacement is a scalar NULL

* chore
---
 .../functions/src/regex/regexpreplace.rs      | 31 +++++++++++++------
 datafusion/sqllogictest/test_files/regexp.slt | 10 ++++++
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs
index 201eebde22bb9..378b6ced076c3 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -282,22 +282,23 @@ pub fn regexp_replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
 
 fn _regexp_replace_early_abort<T: OffsetSizeTrait>(
     input_array: &GenericStringArray<T>,
+    sz: usize,
 ) -> Result<ArrayRef> {
     // Mimicking the existing behavior of regexp_replace, if any of the scalar arguments
-    // are actually null, then the result will be an array of the same size but with nulls.
+    // are actually null, then the result will be an array of the same size as the first argument with all nulls.
     //
     // Also acts like an early abort mechanism when the input array is empty.
-    Ok(new_null_array(input_array.data_type(), input_array.len()))
+    Ok(new_null_array(input_array.data_type(), sz))
 }
 /// Get the first argument from the given string array.
 ///
 /// Note: If the array is empty or the first argument is null,
 /// then calls the given early abort function.
 macro_rules! fetch_string_arg {
-    ($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident) => {{
+    ($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) => {{
         let array = as_generic_string_array::<T>($ARG)?;
         if array.len() == 0 || array.is_null(0) {
-            return $EARLY_ABORT(array);
+            return $EARLY_ABORT(array, $ARRAY_SIZE);
         } else {
             array.value(0)
         }
@@ -313,12 +314,24 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
     args: &[ArrayRef],
 ) -> Result<ArrayRef> {
     let string_array = as_generic_string_array::<T>(&args[0])?;
-    let pattern = fetch_string_arg!(&args[1], "pattern", T, _regexp_replace_early_abort);
-    let replacement =
-        fetch_string_arg!(&args[2], "replacement", T, _regexp_replace_early_abort);
+    let array_size = string_array.len();
+    let pattern = fetch_string_arg!(
+        &args[1],
+        "pattern",
+        T,
+        _regexp_replace_early_abort,
+        array_size
+    );
+    let replacement = fetch_string_arg!(
+        &args[2],
+        "replacement",
+        T,
+        _regexp_replace_early_abort,
+        array_size
+    );
     let flags = match args.len() {
         3 => None,
-        4 => Some(fetch_string_arg!(&args[3], "flags", T, _regexp_replace_early_abort)),
+        4 => Some(fetch_string_arg!(&args[3], "flags", T, _regexp_replace_early_abort, array_size)),
         other => {
             return exec_err!(
                 "regexp_replace was called with {other} arguments. It requires at least 3 and at most 4."
@@ -351,7 +364,7 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
         let offsets = string_array.value_offsets();
         (offsets[string_array.len()] - offsets[0])
             .to_usize()
-            .unwrap()
+            .expect("Failed to convert usize")
     });
     let mut new_offsets = BufferBuilder::<T>::new(string_array.len() + 1);
     new_offsets.append(T::zero());
diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt
index fed7ac31712ce..f5349fc659f6a 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -309,6 +309,16 @@ SELECT regexp_replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'x
 ----
 fooxx
 
+query TTT
+select
+    regexp_replace(col, NULL, 'c'),
+    regexp_replace(col, 'a', NULL),
+    regexp_replace(col, 'a', 'c', NULL)
+from (values ('a'), ('b')) as tbl(col);
+----
+NULL NULL NULL
+NULL NULL NULL
+
 # multiline string
 query B
 SELECT 'foo\nbar\nbaz' ~ 'bar';

From f204869ff55bb3e39cf23fc0a34ebd5021e6773f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Tue, 16 Jul 2024 02:54:10 +0800
Subject: [PATCH 055/357] Enable `clone_on_ref_ptr` clippy lint on functions*
 (#11468)

* Enable clone_on_ref_ptr clippy lint on functions

* Remove unnecessary Arc::clone
---
 .../functions-aggregate/src/correlation.rs    | 21 +++++++++++-----
 .../functions-aggregate/src/first_last.rs     | 16 ++++++-------
 datafusion/functions-aggregate/src/lib.rs     |  2 ++
 datafusion/functions-array/src/array_has.rs   |  4 ++--
 datafusion/functions-array/src/concat.rs      |  2 +-
 datafusion/functions-array/src/flatten.rs     |  4 ++--
 datafusion/functions-array/src/lib.rs         |  2 ++
 datafusion/functions-array/src/resize.rs      |  8 +++----
 datafusion/functions-array/src/reverse.rs     |  4 ++--
 datafusion/functions-array/src/set_ops.rs     | 12 +++++-----
 datafusion/functions-array/src/sort.rs        |  2 +-
 datafusion/functions-array/src/string.rs      |  2 +-
 datafusion/functions-array/src/utils.rs       | 14 ++++++-----
 datafusion/functions/benches/concat.rs        |  3 ++-
 datafusion/functions/benches/regx.rs          | 24 ++++++++++++-------
 datafusion/functions/src/core/getfield.rs     |  5 ++--
 datafusion/functions/src/core/map.rs          |  6 ++---
 datafusion/functions/src/core/nvl.rs          |  7 +++---
 datafusion/functions/src/core/nvl2.rs         |  3 ++-
 datafusion/functions/src/core/struct.rs       | 17 ++++---------
 .../functions/src/datetime/date_part.rs       |  2 +-
 .../functions/src/datetime/to_timestamp.rs    | 21 ++++++++--------
 datafusion/functions/src/lib.rs               |  2 ++
 datafusion/functions/src/math/abs.rs          |  2 +-
 datafusion/functions/src/math/log.rs          |  2 +-
 datafusion/functions/src/math/round.rs        |  2 +-
 datafusion/functions/src/math/trunc.rs        |  2 +-
 27 files changed, 106 insertions(+), 85 deletions(-)

diff --git a/datafusion/functions-aggregate/src/correlation.rs b/datafusion/functions-aggregate/src/correlation.rs
index 10d5563086154..c2d7a89081d66 100644
--- a/datafusion/functions-aggregate/src/correlation.rs
+++ b/datafusion/functions-aggregate/src/correlation.rs
@@ -19,6 +19,7 @@
 
 use std::any::Any;
 use std::fmt::Debug;
+use std::sync::Arc;
 
 use arrow::compute::{and, filter, is_not_null};
 use arrow::{
@@ -192,13 +193,21 @@ impl Accumulator for CorrelationAccumulator {
 
     fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
         let states_c = [
-            states[0].clone(),
-            states[1].clone(),
-            states[3].clone(),
-            states[5].clone(),
+            Arc::clone(&states[0]),
+            Arc::clone(&states[1]),
+            Arc::clone(&states[3]),
+            Arc::clone(&states[5]),
+        ];
+        let states_s1 = [
+            Arc::clone(&states[0]),
+            Arc::clone(&states[1]),
+            Arc::clone(&states[2]),
+        ];
+        let states_s2 = [
+            Arc::clone(&states[0]),
+            Arc::clone(&states[3]),
+            Arc::clone(&states[4]),
         ];
-        let states_s1 = [states[0].clone(), states[1].clone(), states[2].clone()];
-        let states_s2 = [states[0].clone(), states[3].clone(), states[4].clone()];
 
         self.covar.merge_batch(&states_c)?;
         self.stddev1.merge_batch(&states_s1)?;
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
index dd38e34872643..0e619bacef824 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -247,7 +247,7 @@ impl FirstValueAccumulator {
             .iter()
             .zip(self.ordering_req.iter())
             .map(|(values, req)| SortColumn {
-                values: values.clone(),
+                values: Arc::clone(values),
                 options: Some(req.options),
             })
             .collect::<Vec<_>>();
@@ -547,7 +547,7 @@ impl LastValueAccumulator {
                 // Take the reverse ordering requirement. This enables us to
                 // use "fetch = 1" to get the last value.
                 SortColumn {
-                    values: values.clone(),
+                    values: Arc::clone(values),
                     options: Some(!req.options),
                 }
             })
@@ -676,7 +676,7 @@ fn convert_to_sort_cols(
     arrs.iter()
         .zip(sort_exprs.iter())
         .map(|(item, sort_expr)| SortColumn {
-            values: item.clone(),
+            values: Arc::clone(item),
             options: Some(sort_expr.options),
         })
         .collect::<Vec<_>>()
@@ -707,7 +707,7 @@ mod tests {
         for arr in arrs {
             // Once first_value is set, accumulator should remember it.
             // It shouldn't update first_value for each new batch
-            first_accumulator.update_batch(&[arr.clone()])?;
+            first_accumulator.update_batch(&[Arc::clone(&arr)])?;
             // last_value should be updated for each new batch.
             last_accumulator.update_batch(&[arr])?;
         }
@@ -733,12 +733,12 @@ mod tests {
         let mut first_accumulator =
             FirstValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
 
-        first_accumulator.update_batch(&[arrs[0].clone()])?;
+        first_accumulator.update_batch(&[Arc::clone(&arrs[0])])?;
         let state1 = first_accumulator.state()?;
 
         let mut first_accumulator =
             FirstValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
-        first_accumulator.update_batch(&[arrs[1].clone()])?;
+        first_accumulator.update_batch(&[Arc::clone(&arrs[1])])?;
         let state2 = first_accumulator.state()?;
 
         assert_eq!(state1.len(), state2.len());
@@ -763,12 +763,12 @@ mod tests {
         let mut last_accumulator =
             LastValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
 
-        last_accumulator.update_batch(&[arrs[0].clone()])?;
+        last_accumulator.update_batch(&[Arc::clone(&arrs[0])])?;
         let state1 = last_accumulator.state()?;
 
         let mut last_accumulator =
             LastValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
-        last_accumulator.update_batch(&[arrs[1].clone()])?;
+        last_accumulator.update_batch(&[Arc::clone(&arrs[1])])?;
         let state2 = last_accumulator.state()?;
 
         assert_eq!(state1.len(), state2.len());
diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs
index 6ae2dfb3697ce..a3808a08b0074 100644
--- a/datafusion/functions-aggregate/src/lib.rs
+++ b/datafusion/functions-aggregate/src/lib.rs
@@ -14,6 +14,8 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
 //! Aggregate Function packages for [DataFusion].
 //!
diff --git a/datafusion/functions-array/src/array_has.rs b/datafusion/functions-array/src/array_has.rs
index 136c6e7691207..bdda5a565947e 100644
--- a/datafusion/functions-array/src/array_has.rs
+++ b/datafusion/functions-array/src/array_has.rs
@@ -279,7 +279,7 @@ fn general_array_has_dispatch<O: OffsetSizeTrait>(
 
     let converter = RowConverter::new(vec![SortField::new(array.value_type())])?;
 
-    let element = sub_array.clone();
+    let element = Arc::clone(sub_array);
     let sub_array = if comparison_type != ComparisonType::Single {
         as_generic_list_array::<O>(sub_array)?
     } else {
@@ -292,7 +292,7 @@ fn general_array_has_dispatch<O: OffsetSizeTrait>(
                 let sub_arr_values = if comparison_type != ComparisonType::Single {
                     converter.convert_columns(&[sub_arr])?
                 } else {
-                    converter.convert_columns(&[element.clone()])?
+                    converter.convert_columns(&[Arc::clone(&element)])?
                 };
 
                 let mut res = match comparison_type {
diff --git a/datafusion/functions-array/src/concat.rs b/datafusion/functions-array/src/concat.rs
index 330c50f5b055d..c52118d0a5e2b 100644
--- a/datafusion/functions-array/src/concat.rs
+++ b/datafusion/functions-array/src/concat.rs
@@ -249,7 +249,7 @@ pub(crate) fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             return not_impl_err!("Array is not type '{base_type:?}'.");
         }
         if !base_type.eq(&DataType::Null) {
-            new_args.push(arg.clone());
+            new_args.push(Arc::clone(arg));
         }
     }
 
diff --git a/datafusion/functions-array/src/flatten.rs b/datafusion/functions-array/src/flatten.rs
index a495c3ade96f3..2b383af3d456f 100644
--- a/datafusion/functions-array/src/flatten.rs
+++ b/datafusion/functions-array/src/flatten.rs
@@ -77,7 +77,7 @@ impl ScalarUDFImpl for Flatten {
                     get_base_type(field.data_type())
                 }
                 Null | List(_) | LargeList(_) => Ok(data_type.to_owned()),
-                FixedSizeList(field, _) => Ok(List(field.clone())),
+                FixedSizeList(field, _) => Ok(List(Arc::clone(field))),
                 _ => exec_err!(
                     "Not reachable, data_type should be List, LargeList or FixedSizeList"
                 ),
@@ -115,7 +115,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             let flattened_array = flatten_internal::<i64>(list_arr.clone(), None)?;
             Ok(Arc::new(flattened_array) as ArrayRef)
         }
-        Null => Ok(args[0].clone()),
+        Null => Ok(Arc::clone(&args[0])),
         _ => {
             exec_err!("flatten does not support type '{array_type:?}'")
         }
diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs
index 814127be806b1..9717d29883fd5 100644
--- a/datafusion/functions-array/src/lib.rs
+++ b/datafusion/functions-array/src/lib.rs
@@ -14,6 +14,8 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
 //! Array Functions for [DataFusion].
 //!
diff --git a/datafusion/functions-array/src/resize.rs b/datafusion/functions-array/src/resize.rs
index 078ec7766aac8..83c545a26eb24 100644
--- a/datafusion/functions-array/src/resize.rs
+++ b/datafusion/functions-array/src/resize.rs
@@ -67,8 +67,8 @@ impl ScalarUDFImpl for ArrayResize {
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         match &arg_types[0] {
-            List(field) | FixedSizeList(field, _) => Ok(List(field.clone())),
-            LargeList(field) => Ok(LargeList(field.clone())),
+            List(field) | FixedSizeList(field, _) => Ok(List(Arc::clone(field))),
+            LargeList(field) => Ok(LargeList(Arc::clone(field))),
             _ => exec_err!(
                 "Not reachable, data_type should be List, LargeList or FixedSizeList"
             ),
@@ -92,7 +92,7 @@ pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
 
     let new_len = as_int64_array(&arg[1])?;
     let new_element = if arg.len() == 3 {
-        Some(arg[2].clone())
+        Some(Arc::clone(&arg[2]))
     } else {
         None
     };
@@ -168,7 +168,7 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
 
     let data = mutable.freeze();
     Ok(Arc::new(GenericListArray::<O>::try_new(
-        field.clone(),
+        Arc::clone(field),
         OffsetBuffer::<O>::new(offsets.into()),
         arrow_array::make_array(data),
         None,
diff --git a/datafusion/functions-array/src/reverse.rs b/datafusion/functions-array/src/reverse.rs
index b462be40209bc..581caf5daf2b8 100644
--- a/datafusion/functions-array/src/reverse.rs
+++ b/datafusion/functions-array/src/reverse.rs
@@ -93,7 +93,7 @@ pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
             let array = as_large_list_array(&arg[0])?;
             general_array_reverse::<i64>(array, field)
         }
-        Null => Ok(arg[0].clone()),
+        Null => Ok(Arc::clone(&arg[0])),
         array_type => exec_err!("array_reverse does not support type '{array_type:?}'."),
     }
 }
@@ -137,7 +137,7 @@ fn general_array_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
 
     let data = mutable.freeze();
     Ok(Arc::new(GenericListArray::<O>::try_new(
-        field.clone(),
+        Arc::clone(field),
         OffsetBuffer::<O>::new(offsets.into()),
         arrow_array::make_array(data),
         Some(nulls.into()),
diff --git a/datafusion/functions-array/src/set_ops.rs b/datafusion/functions-array/src/set_ops.rs
index a843a175f3a08..1de9c264ddc2c 100644
--- a/datafusion/functions-array/src/set_ops.rs
+++ b/datafusion/functions-array/src/set_ops.rs
@@ -213,7 +213,7 @@ fn array_distinct_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
 
     // handle null
     if args[0].data_type() == &Null {
-        return Ok(args[0].clone());
+        return Ok(Arc::clone(&args[0]));
     }
 
     // handle for list & largelist
@@ -314,7 +314,7 @@ fn generic_set_lists<OffsetSize: OffsetSizeTrait>(
             offsets.push(last_offset + OffsetSize::usize_as(rows.len()));
             let arrays = converter.convert_rows(rows)?;
             let array = match arrays.first() {
-                Some(array) => array.clone(),
+                Some(array) => Arc::clone(array),
                 None => {
                     return internal_err!("{set_op}: failed to get array from rows");
                 }
@@ -370,12 +370,12 @@ fn general_set_op(
         (List(field), List(_)) => {
             let array1 = as_list_array(&array1)?;
             let array2 = as_list_array(&array2)?;
-            generic_set_lists::<i32>(array1, array2, field.clone(), set_op)
+            generic_set_lists::<i32>(array1, array2, Arc::clone(field), set_op)
         }
         (LargeList(field), LargeList(_)) => {
             let array1 = as_large_list_array(&array1)?;
             let array2 = as_large_list_array(&array2)?;
-            generic_set_lists::<i64>(array1, array2, field.clone(), set_op)
+            generic_set_lists::<i64>(array1, array2, Arc::clone(field), set_op)
         }
         (data_type1, data_type2) => {
             internal_err!(
@@ -426,7 +426,7 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
         offsets.push(last_offset + OffsetSize::usize_as(rows.len()));
         let arrays = converter.convert_rows(rows)?;
         let array = match arrays.first() {
-            Some(array) => array.clone(),
+            Some(array) => Arc::clone(array),
             None => {
                 return internal_err!("array_distinct: failed to get array from rows")
             }
@@ -437,7 +437,7 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
     let new_arrays_ref = new_arrays.iter().map(|v| v.as_ref()).collect::<Vec<_>>();
     let values = compute::concat(&new_arrays_ref)?;
     Ok(Arc::new(GenericListArray::<OffsetSize>::try_new(
-        field.clone(),
+        Arc::clone(field),
         offsets,
         values,
         None,
diff --git a/datafusion/functions-array/src/sort.rs b/datafusion/functions-array/src/sort.rs
index c82dbd37be04d..9c1ae507636c9 100644
--- a/datafusion/functions-array/src/sort.rs
+++ b/datafusion/functions-array/src/sort.rs
@@ -121,7 +121,7 @@ pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let list_array = as_list_array(&args[0])?;
     let row_count = list_array.len();
     if row_count == 0 {
-        return Ok(args[0].clone());
+        return Ok(Arc::clone(&args[0]));
     }
 
     let mut array_lengths = vec![];
diff --git a/datafusion/functions-array/src/string.rs b/datafusion/functions-array/src/string.rs
index d02c863db8b7e..2dc0a55e69519 100644
--- a/datafusion/functions-array/src/string.rs
+++ b/datafusion/functions-array/src/string.rs
@@ -381,7 +381,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             let delimiter = delimiters[0].unwrap();
             let s = compute_array_to_string(
                 &mut arg,
-                arr.clone(),
+                Arc::clone(arr),
                 delimiter.to_string(),
                 null_string,
                 with_null_string,
diff --git a/datafusion/functions-array/src/utils.rs b/datafusion/functions-array/src/utils.rs
index 3ecccf3c87137..f396c3b22581c 100644
--- a/datafusion/functions-array/src/utils.rs
+++ b/datafusion/functions-array/src/utils.rs
@@ -105,7 +105,7 @@ pub(crate) fn align_array_dimensions<O: OffsetSizeTrait>(
         .zip(args_ndim.iter())
         .map(|(array, ndim)| {
             if ndim < max_ndim {
-                let mut aligned_array = array.clone();
+                let mut aligned_array = Arc::clone(&array);
                 for _ in 0..(max_ndim - ndim) {
                     let data_type = aligned_array.data_type().to_owned();
                     let array_lengths = vec![1; aligned_array.len()];
@@ -120,7 +120,7 @@ pub(crate) fn align_array_dimensions<O: OffsetSizeTrait>(
                 }
                 Ok(aligned_array)
             } else {
-                Ok(array.clone())
+                Ok(Arc::clone(&array))
             }
         })
         .collect();
@@ -277,10 +277,12 @@ mod tests {
                 Some(vec![Some(6), Some(7), Some(8)]),
             ]));
 
-        let array2d_1 =
-            Arc::new(array_into_list_array_nullable(array1d_1.clone())) as ArrayRef;
-        let array2d_2 =
-            Arc::new(array_into_list_array_nullable(array1d_2.clone())) as ArrayRef;
+        let array2d_1 = Arc::new(array_into_list_array_nullable(
+            Arc::clone(&array1d_1) as ArrayRef
+        )) as ArrayRef;
+        let array2d_2 = Arc::new(array_into_list_array_nullable(
+            Arc::clone(&array1d_2) as ArrayRef
+        )) as ArrayRef;
 
         let res = align_array_dimensions::<i32>(vec![
             array1d_1.to_owned(),
diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs
index e7b00a6d540ad..91c46ac775a8b 100644
--- a/datafusion/functions/benches/concat.rs
+++ b/datafusion/functions/benches/concat.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow::array::ArrayRef;
 use arrow::util::bench_util::create_string_array_with_len;
 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
 use datafusion_common::ScalarValue;
@@ -26,7 +27,7 @@ fn create_args(size: usize, str_len: usize) -> Vec<ColumnarValue> {
     let array = Arc::new(create_string_array_with_len::<i32>(size, 0.2, str_len));
     let scalar = ScalarValue::Utf8(Some(", ".to_string()));
     vec![
-        ColumnarValue::Array(array.clone()),
+        ColumnarValue::Array(Arc::clone(&array) as ArrayRef),
         ColumnarValue::Scalar(scalar),
         ColumnarValue::Array(array),
     ]
diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs
index da4882381e76f..23d57f38efae2 100644
--- a/datafusion/functions/benches/regx.rs
+++ b/datafusion/functions/benches/regx.rs
@@ -83,8 +83,12 @@ fn criterion_benchmark(c: &mut Criterion) {
 
         b.iter(|| {
             black_box(
-                regexp_like::<i32>(&[data.clone(), regex.clone(), flags.clone()])
-                    .expect("regexp_like should work on valid values"),
+                regexp_like::<i32>(&[
+                    Arc::clone(&data),
+                    Arc::clone(&regex),
+                    Arc::clone(&flags),
+                ])
+                .expect("regexp_like should work on valid values"),
             )
         })
     });
@@ -97,8 +101,12 @@ fn criterion_benchmark(c: &mut Criterion) {
 
         b.iter(|| {
             black_box(
-                regexp_match::<i32>(&[data.clone(), regex.clone(), flags.clone()])
-                    .expect("regexp_match should work on valid values"),
+                regexp_match::<i32>(&[
+                    Arc::clone(&data),
+                    Arc::clone(&regex),
+                    Arc::clone(&flags),
+                ])
+                .expect("regexp_match should work on valid values"),
             )
         })
     });
@@ -115,10 +123,10 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             black_box(
                 regexp_replace::<i32>(&[
-                    data.clone(),
-                    regex.clone(),
-                    replacement.clone(),
-                    flags.clone(),
+                    Arc::clone(&data),
+                    Arc::clone(&regex),
+                    Arc::clone(&replacement),
+                    Arc::clone(&flags),
                 ])
                 .expect("regexp_replace should work on valid values"),
             )
diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs
index b76da15c52ca1..2c2e36b91b13a 100644
--- a/datafusion/functions/src/core/getfield.rs
+++ b/datafusion/functions/src/core/getfield.rs
@@ -26,6 +26,7 @@ use datafusion_common::{
 use datafusion_expr::{ColumnarValue, Expr, ExprSchemable};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
+use std::sync::Arc;
 
 #[derive(Debug)]
 pub struct GetFieldFunc {
@@ -151,7 +152,7 @@ impl ScalarUDFImpl for GetFieldFunc {
         }
 
         let arrays = ColumnarValue::values_to_arrays(args)?;
-        let array = arrays[0].clone();
+        let array = Arc::clone(&arrays[0]);
 
         let name = match &args[1] {
             ColumnarValue::Scalar(name) => name,
@@ -199,7 +200,7 @@ impl ScalarUDFImpl for GetFieldFunc {
                 let as_struct_array = as_struct_array(&array)?;
                 match as_struct_array.column_by_name(k) {
                     None => exec_err!("get indexed field {k} not found in struct"),
-                    Some(col) => Ok(ColumnarValue::Array(col.clone())),
+                    Some(col) => Ok(ColumnarValue::Array(Arc::clone(col))),
                 }
             }
             (DataType::Struct(_), name) => exec_err!(
diff --git a/datafusion/functions/src/core/map.rs b/datafusion/functions/src/core/map.rs
index 6626831c8034f..1834c7ac6060f 100644
--- a/datafusion/functions/src/core/map.rs
+++ b/datafusion/functions/src/core/map.rs
@@ -93,9 +93,9 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result<ArrayRef> {
     match columnar_value {
         ColumnarValue::Scalar(value) => match value {
-            ScalarValue::List(array) => Ok(array.value(0).clone()),
-            ScalarValue::LargeList(array) => Ok(array.value(0).clone()),
-            ScalarValue::FixedSizeList(array) => Ok(array.value(0).clone()),
+            ScalarValue::List(array) => Ok(array.value(0)),
+            ScalarValue::LargeList(array) => Ok(array.value(0)),
+            ScalarValue::FixedSizeList(array) => Ok(array.value(0)),
             _ => exec_err!("Expected array, got {:?}", value),
         },
         ColumnarValue::Array(array) => exec_err!("Expected scalar, got {:?}", array),
diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs
index 05515c6e925c8..a09224acefcdf 100644
--- a/datafusion/functions/src/core/nvl.rs
+++ b/datafusion/functions/src/core/nvl.rs
@@ -21,6 +21,7 @@ use arrow::compute::kernels::zip::zip;
 use arrow::datatypes::DataType;
 use datafusion_common::{internal_err, Result};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::sync::Arc;
 
 #[derive(Debug)]
 pub struct NVLFunc {
@@ -101,13 +102,13 @@ fn nvl_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     }
     let (lhs_array, rhs_array) = match (&args[0], &args[1]) {
         (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
-            (lhs.clone(), rhs.to_array_of_size(lhs.len())?)
+            (Arc::clone(lhs), rhs.to_array_of_size(lhs.len())?)
         }
         (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => {
-            (lhs.clone(), rhs.clone())
+            (Arc::clone(lhs), Arc::clone(rhs))
         }
         (ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => {
-            (lhs.to_array_of_size(rhs.len())?, rhs.clone())
+            (lhs.to_array_of_size(rhs.len())?, Arc::clone(rhs))
         }
         (ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {
             let mut current_value = lhs;
diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs
index 573ac72425fb4..1144dc0fb7c56 100644
--- a/datafusion/functions/src/core/nvl2.rs
+++ b/datafusion/functions/src/core/nvl2.rs
@@ -24,6 +24,7 @@ use datafusion_expr::{
     type_coercion::binary::comparison_coercion, ColumnarValue, ScalarUDFImpl, Signature,
     Volatility,
 };
+use std::sync::Arc;
 
 #[derive(Debug)]
 pub struct NVL2Func {
@@ -112,7 +113,7 @@ fn nvl2_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
             .iter()
             .map(|arg| match arg {
                 ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(len),
-                ColumnarValue::Array(array) => Ok(array.clone()),
+                ColumnarValue::Array(array) => Ok(Arc::clone(array)),
             })
             .collect::<Result<Vec<_>>>()?;
         let to_apply = is_not_null(&args[0])?;
diff --git a/datafusion/functions/src/core/struct.rs b/datafusion/functions/src/core/struct.rs
index 9d4b2e4a0b8b6..c3dee8b1ccb40 100644
--- a/datafusion/functions/src/core/struct.rs
+++ b/datafusion/functions/src/core/struct.rs
@@ -40,7 +40,7 @@ fn array_struct(args: &[ArrayRef]) -> Result<ArrayRef> {
                     arg.data_type().clone(),
                     true,
                 )),
-                arg.clone(),
+                Arc::clone(arg),
             ))
         })
         .collect::<Result<Vec<_>>>()?;
@@ -121,30 +121,21 @@ mod tests {
             as_struct_array(&struc).expect("failed to initialize function struct");
         assert_eq!(
             &Int64Array::from(vec![1]),
-            result
-                .column_by_name("c0")
-                .unwrap()
-                .clone()
+            Arc::clone(result.column_by_name("c0").unwrap())
                 .as_any()
                 .downcast_ref::<Int64Array>()
                 .unwrap()
         );
         assert_eq!(
             &Int64Array::from(vec![2]),
-            result
-                .column_by_name("c1")
-                .unwrap()
-                .clone()
+            Arc::clone(result.column_by_name("c1").unwrap())
                 .as_any()
                 .downcast_ref::<Int64Array>()
                 .unwrap()
         );
         assert_eq!(
             &Int64Array::from(vec![3]),
-            result
-                .column_by_name("c2")
-                .unwrap()
-                .clone()
+            Arc::clone(result.column_by_name("c2").unwrap())
                 .as_any()
                 .downcast_ref::<Int64Array>()
                 .unwrap()
diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs
index 4906cdc9601d3..e1efb4811ec0d 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -123,7 +123,7 @@ impl ScalarUDFImpl for DatePartFunc {
         let is_scalar = matches!(array, ColumnarValue::Scalar(_));
 
         let array = match array {
-            ColumnarValue::Array(array) => array.clone(),
+            ColumnarValue::Array(array) => Arc::clone(array),
             ColumnarValue::Scalar(scalar) => scalar.to_array()?,
         };
 
diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs
index 4cb91447f3867..cbb6f37603d27 100644
--- a/datafusion/functions/src/datetime/to_timestamp.rs
+++ b/datafusion/functions/src/datetime/to_timestamp.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use std::any::Any;
+use std::sync::Arc;
 
 use arrow::datatypes::DataType::Timestamp;
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
@@ -387,7 +388,7 @@ impl ScalarUDFImpl for ToTimestampNanosFunc {
 /// the timezone if it exists.
 fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
     match arg {
-        Timestamp(_, Some(tz)) => Timestamp(unit, Some(tz.clone())),
+        Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))),
         _ => Timestamp(unit, None),
     }
 }
@@ -794,10 +795,10 @@ mod tests {
             Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
 
         let arrays = &[
-            ColumnarValue::Array(nanos_timestamps.clone()),
-            ColumnarValue::Array(millis_timestamps.clone()),
-            ColumnarValue::Array(micros_timestamps.clone()),
-            ColumnarValue::Array(sec_timestamps.clone()),
+            ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
+            ColumnarValue::Array(Arc::clone(&millis_timestamps)),
+            ColumnarValue::Array(Arc::clone(&micros_timestamps)),
+            ColumnarValue::Array(Arc::clone(&sec_timestamps)),
         ];
 
         for udf in &udfs {
@@ -836,11 +837,11 @@ mod tests {
         let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
 
         let arrays = &[
-            ColumnarValue::Array(nanos_timestamps.clone()),
-            ColumnarValue::Array(millis_timestamps.clone()),
-            ColumnarValue::Array(micros_timestamps.clone()),
-            ColumnarValue::Array(sec_timestamps.clone()),
-            ColumnarValue::Array(i64_timestamps.clone()),
+            ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
+            ColumnarValue::Array(Arc::clone(&millis_timestamps)),
+            ColumnarValue::Array(Arc::clone(&micros_timestamps)),
+            ColumnarValue::Array(Arc::clone(&sec_timestamps)),
+            ColumnarValue::Array(Arc::clone(&i64_timestamps)),
         ];
 
         for udf in &udfs {
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index 433a4f90d95b7..b1c55c843f71d 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -14,6 +14,8 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
 //! Function packages for [DataFusion].
 //!
diff --git a/datafusion/functions/src/math/abs.rs b/datafusion/functions/src/math/abs.rs
index 6d07b14f866e3..f7a17f0caf947 100644
--- a/datafusion/functions/src/math/abs.rs
+++ b/datafusion/functions/src/math/abs.rs
@@ -91,7 +91,7 @@ fn create_abs_function(input_data_type: &DataType) -> Result<MathArrayFunction>
         | DataType::UInt8
         | DataType::UInt16
         | DataType::UInt32
-        | DataType::UInt64 => Ok(|args: &Vec<ArrayRef>| Ok(args[0].clone())),
+        | DataType::UInt64 => Ok(|args: &Vec<ArrayRef>| Ok(Arc::clone(&args[0]))),
 
         // Decimal types
         DataType::Decimal128(_, _) => Ok(make_decimal_abs_function!(Decimal128Array)),
diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs
index 0791561539e1e..ea424c14749e8 100644
--- a/datafusion/functions/src/math/log.rs
+++ b/datafusion/functions/src/math/log.rs
@@ -109,7 +109,7 @@ impl ScalarUDFImpl for LogFunc {
         let mut x = &args[0];
         if args.len() == 2 {
             x = &args[1];
-            base = ColumnarValue::Array(args[0].clone());
+            base = ColumnarValue::Array(Arc::clone(&args[0]));
         }
         // note in f64::log params order is different than in sql. e.g in sql log(base, x) == f64::log(x, base)
         let arr: ArrayRef = match args[0].data_type() {
diff --git a/datafusion/functions/src/math/round.rs b/datafusion/functions/src/math/round.rs
index 71ab7c1b43502..89554a76febba 100644
--- a/datafusion/functions/src/math/round.rs
+++ b/datafusion/functions/src/math/round.rs
@@ -111,7 +111,7 @@ pub fn round(args: &[ArrayRef]) -> Result<ArrayRef> {
     let mut decimal_places = ColumnarValue::Scalar(ScalarValue::Int64(Some(0)));
 
     if args.len() == 2 {
-        decimal_places = ColumnarValue::Array(args[1].clone());
+        decimal_places = ColumnarValue::Array(Arc::clone(&args[1]));
     }
 
     match args[0].data_type() {
diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs
index f980e583365f7..3344438454c4b 100644
--- a/datafusion/functions/src/math/trunc.rs
+++ b/datafusion/functions/src/math/trunc.rs
@@ -117,7 +117,7 @@ fn trunc(args: &[ArrayRef]) -> Result<ArrayRef> {
     let precision = if args.len() == 1 {
         ColumnarValue::Scalar(Int64(Some(0)))
     } else {
-        ColumnarValue::Array(args[1].clone())
+        ColumnarValue::Array(Arc::clone(&args[1]))
     };
 
     match args[0].data_type() {

From 2837e02b7ec7dfbca576451e63db25b84ed2c97d Mon Sep 17 00:00:00 2001
From: Eduard Karacharov <eduard.karacharov@gmail.com>
Date: Tue, 16 Jul 2024 13:23:48 +0300
Subject: [PATCH 056/357] minor: split repartition time and send time metrics
 (#11440)

---
 datafusion/physical-plan/src/repartition/mod.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 3d4d3058393e6..e5c506403ff66 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -261,6 +261,7 @@ impl BatchPartitioner {
                     num_partitions: partitions,
                     hash_buffer,
                 } => {
+                    // Tracking time required for distributing indexes across output partitions
                     let timer = self.timer.timer();
 
                     let arrays = exprs
@@ -282,6 +283,11 @@ impl BatchPartitioner {
                             .append_value(index as u64);
                     }
 
+                    // Finished building index-arrays for output partitions
+                    timer.done();
+
+                    // Borrowing partitioner timer to prevent moving `self` to closure
+                    let partitioner_timer = &self.timer;
                     let it = indices
                         .into_iter()
                         .enumerate()
@@ -290,6 +296,9 @@ impl BatchPartitioner {
                             (!indices.is_empty()).then_some((partition, indices))
                         })
                         .map(move |(partition, indices)| {
+                            // Tracking time required for repartitioned batches construction
+                            let _timer = partitioner_timer.timer();
+
                             // Produce batches based on indices
                             let columns = batch
                                 .columns()
@@ -303,9 +312,6 @@ impl BatchPartitioner {
                             let batch =
                                 RecordBatch::try_new(batch.schema(), columns).unwrap();
 
-                            // bind timer so it drops w/ this iterator
-                            let _ = &timer;
-
                             Ok((partition, batch))
                         });
 

From 133128840ca3dbea200dcfe84050cb7b82bf94a8 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 16 Jul 2024 07:19:25 -0400
Subject: [PATCH 057/357] Docs: Document creating new extension APIs (#11425)

* Docs: Document creating new extension APIs

* fix

* Add clarification about extension APIs. Thanks @ozankabak

* Apply suggestions from code review

Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>

* Add a paragraph on datafusion-contrib

* prettier

---------

Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
---
 datafusion/core/src/lib.rs                    |  2 +-
 docs/source/contributor-guide/architecture.md | 74 +++++++++++++++++++
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 63dbe824c2314..81c1c4629a3ad 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -174,7 +174,7 @@
 //!
 //! DataFusion is designed to be highly extensible, so you can
 //! start with a working, full featured engine, and then
-//! specialize any behavior for their usecase. For example,
+//! specialize any behavior for your usecase. For example,
 //! some projects may add custom [`ExecutionPlan`] operators, or create their own
 //! query language that directly creates [`LogicalPlan`] rather than using the
 //! built in SQL planner, [`SqlToRel`].
diff --git a/docs/source/contributor-guide/architecture.md b/docs/source/contributor-guide/architecture.md
index 68541f8777689..55c8a1d980df5 100644
--- a/docs/source/contributor-guide/architecture.md
+++ b/docs/source/contributor-guide/architecture.md
@@ -25,3 +25,77 @@ possible. You can find the most up to date version in the [source code].
 
 [crates.io documentation]: https://docs.rs/datafusion/latest/datafusion/index.html#architecture
 [source code]: https://github.com/apache/datafusion/blob/main/datafusion/core/src/lib.rs
+
+## Forks vs Extension APIs
+
+DataFusion is a fast moving project, which results in frequent internal changes.
+This benefits DataFusion by allowing it to evolve and respond quickly to
+requests, but also means that maintaining a fork with major modifications
+sometimes requires non trivial work.
+
+The public API (what is accessible if you use the DataFusion releases from
+crates.io) is typically much more stable (though it does change from release to
+release as well).
+
+Thus, rather than forks, we recommend using one of the many extension APIs (such
+as `TableProvider`, `OptimizerRule`, or `ExecutionPlan`) to customize
+DataFusion. If you can not do what you want with the existing APIs, we would
+welcome you working with us to add new APIs to enable your use case, as
+described in the next section.
+
+## `datafusion-contrib`
+
+While DataFusions comes with enough features "out of the box" to quickly start
+with a working system, it can't include everything useful feature (e.g.
+`TableProvider`s for all data formats). The [`datafusion-contrib`] project
+contains a collection of community maintained extensions that are not part of
+the core DataFusion project, and not under Apache Software Foundation governance
+but may be useful to others in the community. If you are interested adding a
+feature to DataFusion, a new extension in `datafusion-contrib` is likely a good
+place to start. Please [contact] us via github issue, slack, or Discord and
+we'll gladly set up a new repository for your extension.
+
+[`datafusion-contrib`]: https://github.com/datafusion-contrib
+[contact]: ../contributor-guide/communication.md
+
+## Creating new Extension APIs
+
+DataFusion aims to be a general-purpose query engine, and thus the core crates
+contain features that are useful for a wide range of use cases. Use case specific
+functionality (such as very specific time series or stream processing features)
+are typically implemented using the extension APIs.
+
+If have a use case that is not covered by the existing APIs, we would love to
+work with you to design a new general purpose API. There are often others who are
+interested in similar extensions and the act of defining the API often improves
+the code overall for everyone.
+
+Extension APIs that provide "safe" default behaviors are more likely to be
+suitable for inclusion in DataFusion, while APIs that require major changes to
+built-in operators are less likely. For example, it might make less sense
+to add an API to support a stream processing feature if that would result in
+slower performance for built-in operators. It may still make sense to add
+extension APIs for such features, but leave implementation of such operators in
+downstream projects.
+
+The process to create a new extension API is typically:
+
+- Look for an existing issue describing what you want to do, and file one if it
+  doesn't yet exist.
+- Discuss what the API would look like. Feel free to ask contributors (via `@`
+  mentions) for feedback (you can find such people by looking at the most
+  recently changed PRs and issues)
+- Prototype the new API, typically by adding an example (in
+  `datafusion-examples` or refactoring existing code) to show how it would work
+- Create a PR with the new API, and work with the community to get it merged
+
+Some benefits of using an example based approach are
+
+- Any future API changes will also keep your example going ensuring no
+  regression in functionality
+- There will be a blue print of any needed changes to your code if the APIs do change
+  (just look at what changed in your example)
+
+An example of this process was [creating a SQL Extension Planning API].
+
+[creating a sql extension planning api]: https://github.com/apache/datafusion/issues/11207

From 5f0993cf58a1c004c88120eea974554666332213 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 16 Jul 2024 10:15:36 -0400
Subject: [PATCH 058/357] Minor: rename `row_groups.rs` to
 `row_group_filter.rs` (#11481)

---
 datafusion/core/src/datasource/physical_plan/parquet/mod.rs     | 2 +-
 datafusion/core/src/datasource/physical_plan/parquet/opener.rs  | 2 +-
 .../parquet/{row_groups.rs => row_group_filter.rs}              | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename datafusion/core/src/datasource/physical_plan/parquet/{row_groups.rs => row_group_filter.rs} (100%)

diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 9d5c64719e759..ed0fc5f0169ee 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -53,7 +53,7 @@ mod opener;
 mod page_filter;
 mod reader;
 mod row_filter;
-mod row_groups;
+mod row_group_filter;
 mod statistics;
 mod writer;
 
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs
index 36335863032c1..c97b0282626a7 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs
@@ -18,7 +18,7 @@
 //! [`ParquetOpener`] for opening Parquet files
 
 use crate::datasource::physical_plan::parquet::page_filter::PagePruningPredicate;
-use crate::datasource::physical_plan::parquet::row_groups::RowGroupAccessPlanFilter;
+use crate::datasource::physical_plan::parquet::row_group_filter::RowGroupAccessPlanFilter;
 use crate::datasource::physical_plan::parquet::{
     row_filter, should_enable_page_index, ParquetAccessPlan,
 };
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
similarity index 100%
rename from datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
rename to datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs

From 55b792a608be881439fd20dafdd803478346186a Mon Sep 17 00:00:00 2001
From: yfu <fevin86@gmail.com>
Date: Wed, 17 Jul 2024 00:17:32 +1000
Subject: [PATCH 059/357] Support alternate formats for unparsing `datetime` to
 `timestamp` and `interval` (#11466)

* Unparser rule for datatime cast (#10)

* use timestamp as the identifier for date64

* rename

* implement CustomDialectBuilder

* fix

* dialect with interval style (#11)

---------

Co-authored-by: Phillip LeBlanc <phillip@leblanc.tech>

* fmt

* clippy

* doc

* Update datafusion/sql/src/unparser/expr.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* update the doc for CustomDialectBuilder

* fix doc test

---------

Co-authored-by: Phillip LeBlanc <phillip@leblanc.tech>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion-examples/examples/plan_to_sql.rs |   6 +-
 datafusion/sql/src/unparser/dialect.rs      | 140 ++++++++
 datafusion/sql/src/unparser/expr.rs         | 339 ++++++++++++++++----
 3 files changed, 420 insertions(+), 65 deletions(-)

diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs
index f719a33fb6249..8ea7c2951223d 100644
--- a/datafusion-examples/examples/plan_to_sql.rs
+++ b/datafusion-examples/examples/plan_to_sql.rs
@@ -19,7 +19,7 @@ use datafusion::error::Result;
 
 use datafusion::prelude::*;
 use datafusion::sql::unparser::expr_to_sql;
-use datafusion_sql::unparser::dialect::CustomDialect;
+use datafusion_sql::unparser::dialect::CustomDialectBuilder;
 use datafusion_sql::unparser::{plan_to_sql, Unparser};
 
 /// This example demonstrates the programmatic construction of SQL strings using
@@ -80,7 +80,9 @@ fn simple_expr_to_pretty_sql_demo() -> Result<()> {
 /// using a custom dialect and an explicit unparser
 fn simple_expr_to_sql_demo_escape_mysql_style() -> Result<()> {
     let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
-    let dialect = CustomDialect::new(Some('`'));
+    let dialect = CustomDialectBuilder::new()
+        .with_identifier_quote_style('`')
+        .build();
     let unparser = Unparser::new(&dialect);
     let sql = unparser.expr_to_sql(&expr)?.to_string();
     assert_eq!(sql, r#"((`a` < 5) OR (`a` = 8))"#);
diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index e8cbde0585666..eca2eb4fd0ec7 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -35,7 +35,33 @@ pub trait Dialect {
     fn supports_nulls_first_in_sort(&self) -> bool {
         true
     }
+
+    // Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
+    // E.g. Trino, Athena and Dremio does not have DATETIME data type
+    fn use_timestamp_for_date64(&self) -> bool {
+        false
+    }
+
+    fn interval_style(&self) -> IntervalStyle {
+        IntervalStyle::PostgresVerbose
+    }
 }
+
+/// `IntervalStyle` to use for unparsing
+///
+/// <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT>
+/// different DBMS follows different standards, popular ones are:
+/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is
+/// compatible with arrow display format, as well as duckdb
+/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time
+/// <https://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt>
+#[derive(Clone, Copy)]
+pub enum IntervalStyle {
+    PostgresVerbose,
+    SQLStandard,
+    MySQL,
+}
+
 pub struct DefaultDialect {}
 
 impl Dialect for DefaultDialect {
@@ -57,6 +83,10 @@ impl Dialect for PostgreSqlDialect {
     fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('"')
     }
+
+    fn interval_style(&self) -> IntervalStyle {
+        IntervalStyle::PostgresVerbose
+    }
 }
 
 pub struct MySqlDialect {}
@@ -69,6 +99,10 @@ impl Dialect for MySqlDialect {
     fn supports_nulls_first_in_sort(&self) -> bool {
         false
     }
+
+    fn interval_style(&self) -> IntervalStyle {
+        IntervalStyle::MySQL
+    }
 }
 
 pub struct SqliteDialect {}
@@ -81,12 +115,29 @@ impl Dialect for SqliteDialect {
 
 pub struct CustomDialect {
     identifier_quote_style: Option<char>,
+    supports_nulls_first_in_sort: bool,
+    use_timestamp_for_date64: bool,
+    interval_style: IntervalStyle,
+}
+
+impl Default for CustomDialect {
+    fn default() -> Self {
+        Self {
+            identifier_quote_style: None,
+            supports_nulls_first_in_sort: true,
+            use_timestamp_for_date64: false,
+            interval_style: IntervalStyle::SQLStandard,
+        }
+    }
 }
 
 impl CustomDialect {
+    // create a CustomDialect
+    #[deprecated(note = "please use `CustomDialectBuilder` instead")]
     pub fn new(identifier_quote_style: Option<char>) -> Self {
         Self {
             identifier_quote_style,
+            ..Default::default()
         }
     }
 }
@@ -95,4 +146,93 @@ impl Dialect for CustomDialect {
     fn identifier_quote_style(&self, _: &str) -> Option<char> {
         self.identifier_quote_style
     }
+
+    fn supports_nulls_first_in_sort(&self) -> bool {
+        self.supports_nulls_first_in_sort
+    }
+
+    fn use_timestamp_for_date64(&self) -> bool {
+        self.use_timestamp_for_date64
+    }
+
+    fn interval_style(&self) -> IntervalStyle {
+        self.interval_style
+    }
+}
+
+/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
+///
+///
+/// # Examples
+///
+/// Building a custom dialect with all default options set in CustomDialectBuilder::new()
+/// but with `use_timestamp_for_date64` overridden to `true`
+///
+/// ```
+/// use datafusion_sql::unparser::dialect::CustomDialectBuilder;
+/// let dialect = CustomDialectBuilder::new()
+///     .with_use_timestamp_for_date64(true)
+///     .build();
+/// ```
+pub struct CustomDialectBuilder {
+    identifier_quote_style: Option<char>,
+    supports_nulls_first_in_sort: bool,
+    use_timestamp_for_date64: bool,
+    interval_style: IntervalStyle,
+}
+
+impl Default for CustomDialectBuilder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl CustomDialectBuilder {
+    pub fn new() -> Self {
+        Self {
+            identifier_quote_style: None,
+            supports_nulls_first_in_sort: true,
+            use_timestamp_for_date64: false,
+            interval_style: IntervalStyle::PostgresVerbose,
+        }
+    }
+
+    pub fn build(self) -> CustomDialect {
+        CustomDialect {
+            identifier_quote_style: self.identifier_quote_style,
+            supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
+            use_timestamp_for_date64: self.use_timestamp_for_date64,
+            interval_style: self.interval_style,
+        }
+    }
+
+    /// Customize the dialect with a specific identifier quote style, e.g. '`', '"'
+    pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
+        self.identifier_quote_style = Some(identifier_quote_style);
+        self
+    }
+
+    /// Customize the dialect to supports `NULLS FIRST` in `ORDER BY` clauses
+    pub fn with_supports_nulls_first_in_sort(
+        mut self,
+        supports_nulls_first_in_sort: bool,
+    ) -> Self {
+        self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
+        self
+    }
+
+    /// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME
+    pub fn with_use_timestamp_for_date64(
+        mut self,
+        use_timestamp_for_date64: bool,
+    ) -> Self {
+        self.use_timestamp_for_date64 = use_timestamp_for_date64;
+        self
+    }
+
+    /// Customize the dialect with a specific interval style listed in `IntervalStyle`
+    pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
+        self.interval_style = interval_style;
+        self
+    }
 }
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index eb149c819c8b0..6b7775ee3d4db 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -43,6 +43,7 @@ use datafusion_expr::{
     Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast,
 };
 
+use super::dialect::IntervalStyle;
 use super::Unparser;
 
 /// DataFusion's Exprs can represent either an `Expr` or an `OrderByExpr`
@@ -541,6 +542,14 @@ impl Unparser<'_> {
         }
     }
 
+    fn ast_type_for_date64_in_cast(&self) -> ast::DataType {
+        if self.dialect.use_timestamp_for_date64() {
+            ast::DataType::Timestamp(None, ast::TimezoneInfo::None)
+        } else {
+            ast::DataType::Datetime(None)
+        }
+    }
+
     fn col_to_sql(&self, col: &Column) -> Result<ast::Expr> {
         if let Some(table_ref) = &col.relation {
             let mut id = table_ref.to_vec();
@@ -1003,7 +1012,7 @@ impl Unparser<'_> {
                     expr: Box::new(ast::Expr::Value(ast::Value::SingleQuotedString(
                         datetime.to_string(),
                     ))),
-                    data_type: ast::DataType::Datetime(None),
+                    data_type: self.ast_type_for_date64_in_cast(),
                     format: None,
                 })
             }
@@ -1055,22 +1064,7 @@ impl Unparser<'_> {
             ScalarValue::IntervalYearMonth(Some(_))
             | ScalarValue::IntervalDayTime(Some(_))
             | ScalarValue::IntervalMonthDayNano(Some(_)) => {
-                let wrap_array = v.to_array()?;
-                let Some(result) = array_value_to_string(&wrap_array, 0).ok() else {
-                    return internal_err!(
-                        "Unable to convert interval scalar value to string"
-                    );
-                };
-                let interval = Interval {
-                    value: Box::new(ast::Expr::Value(SingleQuotedString(
-                        result.to_uppercase(),
-                    ))),
-                    leading_field: None,
-                    leading_precision: None,
-                    last_field: None,
-                    fractional_seconds_precision: None,
-                };
-                Ok(ast::Expr::Interval(interval))
+                self.interval_scalar_to_sql(v)
             }
             ScalarValue::IntervalYearMonth(None) => {
                 Ok(ast::Expr::Value(ast::Value::Null))
@@ -1108,6 +1102,123 @@ impl Unparser<'_> {
         }
     }
 
+    fn interval_scalar_to_sql(&self, v: &ScalarValue) -> Result<ast::Expr> {
+        match self.dialect.interval_style() {
+            IntervalStyle::PostgresVerbose => {
+                let wrap_array = v.to_array()?;
+                let Some(result) = array_value_to_string(&wrap_array, 0).ok() else {
+                    return internal_err!(
+                        "Unable to convert interval scalar value to string"
+                    );
+                };
+                let interval = Interval {
+                    value: Box::new(ast::Expr::Value(SingleQuotedString(
+                        result.to_uppercase(),
+                    ))),
+                    leading_field: None,
+                    leading_precision: None,
+                    last_field: None,
+                    fractional_seconds_precision: None,
+                };
+                Ok(ast::Expr::Interval(interval))
+            }
+            // If the interval standard is SQLStandard, implement a simple unparse logic
+            IntervalStyle::SQLStandard => match v {
+                ScalarValue::IntervalYearMonth(v) => {
+                    let Some(v) = v else {
+                        return Ok(ast::Expr::Value(ast::Value::Null));
+                    };
+                    let interval = Interval {
+                        value: Box::new(ast::Expr::Value(
+                            ast::Value::SingleQuotedString(v.to_string()),
+                        )),
+                        leading_field: Some(ast::DateTimeField::Month),
+                        leading_precision: None,
+                        last_field: None,
+                        fractional_seconds_precision: None,
+                    };
+                    Ok(ast::Expr::Interval(interval))
+                }
+                ScalarValue::IntervalDayTime(v) => {
+                    let Some(v) = v else {
+                        return Ok(ast::Expr::Value(ast::Value::Null));
+                    };
+                    let days = v.days;
+                    let secs = v.milliseconds / 1_000;
+                    let mins = secs / 60;
+                    let hours = mins / 60;
+
+                    let secs = secs - (mins * 60);
+                    let mins = mins - (hours * 60);
+
+                    let millis = v.milliseconds % 1_000;
+                    let interval = Interval {
+                        value: Box::new(ast::Expr::Value(
+                            ast::Value::SingleQuotedString(format!(
+                                "{days} {hours}:{mins}:{secs}.{millis:3}"
+                            )),
+                        )),
+                        leading_field: Some(ast::DateTimeField::Day),
+                        leading_precision: None,
+                        last_field: Some(ast::DateTimeField::Second),
+                        fractional_seconds_precision: None,
+                    };
+                    Ok(ast::Expr::Interval(interval))
+                }
+                ScalarValue::IntervalMonthDayNano(v) => {
+                    let Some(v) = v else {
+                        return Ok(ast::Expr::Value(ast::Value::Null));
+                    };
+
+                    if v.months >= 0 && v.days == 0 && v.nanoseconds == 0 {
+                        let interval = Interval {
+                            value: Box::new(ast::Expr::Value(
+                                ast::Value::SingleQuotedString(v.months.to_string()),
+                            )),
+                            leading_field: Some(ast::DateTimeField::Month),
+                            leading_precision: None,
+                            last_field: None,
+                            fractional_seconds_precision: None,
+                        };
+                        Ok(ast::Expr::Interval(interval))
+                    } else if v.months == 0
+                        && v.days >= 0
+                        && v.nanoseconds % 1_000_000 == 0
+                    {
+                        let days = v.days;
+                        let secs = v.nanoseconds / 1_000_000_000;
+                        let mins = secs / 60;
+                        let hours = mins / 60;
+
+                        let secs = secs - (mins * 60);
+                        let mins = mins - (hours * 60);
+
+                        let millis = (v.nanoseconds % 1_000_000_000) / 1_000_000;
+
+                        let interval = Interval {
+                            value: Box::new(ast::Expr::Value(
+                                ast::Value::SingleQuotedString(format!(
+                                    "{days} {hours}:{mins}:{secs}.{millis:03}"
+                                )),
+                            )),
+                            leading_field: Some(ast::DateTimeField::Day),
+                            leading_precision: None,
+                            last_field: Some(ast::DateTimeField::Second),
+                            fractional_seconds_precision: None,
+                        };
+                        Ok(ast::Expr::Interval(interval))
+                    } else {
+                        not_impl_err!("Unsupported IntervalMonthDayNano scalar with both Month and DayTime for IntervalStyle::SQLStandard")
+                    }
+                }
+                _ => Ok(ast::Expr::Value(ast::Value::Null)),
+            },
+            IntervalStyle::MySQL => {
+                not_impl_err!("Unsupported interval scalar for IntervalStyle::MySQL")
+            }
+        }
+    }
+
     fn arrow_dtype_to_ast_dtype(&self, data_type: &DataType) -> Result<ast::DataType> {
         match data_type {
             DataType::Null => {
@@ -1136,7 +1247,7 @@ impl Unparser<'_> {
                 Ok(ast::DataType::Timestamp(None, tz_info))
             }
             DataType::Date32 => Ok(ast::DataType::Date),
-            DataType::Date64 => Ok(ast::DataType::Datetime(None)),
+            DataType::Date64 => Ok(self.ast_type_for_date64_in_cast()),
             DataType::Time32(_) => {
                 not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
             }
@@ -1232,7 +1343,7 @@ mod tests {
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_functions_aggregate::expr_fn::sum;
 
-    use crate::unparser::dialect::CustomDialect;
+    use crate::unparser::dialect::{CustomDialect, CustomDialectBuilder};
 
     use super::*;
 
@@ -1595,46 +1706,7 @@ mod tests {
             ),
             (col("need-quoted").eq(lit(1)), r#"("need-quoted" = 1)"#),
             (col("need quoted").eq(lit(1)), r#"("need quoted" = 1)"#),
-            (
-                interval_month_day_nano_lit(
-                    "1 YEAR 1 MONTH 1 DAY 3 HOUR 10 MINUTE 20 SECOND",
-                ),
-                r#"INTERVAL '0 YEARS 13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#,
-            ),
-            (
-                interval_month_day_nano_lit("1.5 MONTH"),
-                r#"INTERVAL '0 YEARS 1 MONS 15 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#,
-            ),
-            (
-                interval_month_day_nano_lit("-3 MONTH"),
-                r#"INTERVAL '0 YEARS -3 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#,
-            ),
-            (
-                interval_month_day_nano_lit("1 MONTH")
-                    .add(interval_month_day_nano_lit("1 DAY")),
-                r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' + INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#,
-            ),
-            (
-                interval_month_day_nano_lit("1 MONTH")
-                    .sub(interval_month_day_nano_lit("1 DAY")),
-                r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' - INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#,
-            ),
-            (
-                interval_datetime_lit("10 DAY 1 HOUR 10 MINUTE 20 SECOND"),
-                r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#,
-            ),
-            (
-                interval_datetime_lit("10 DAY 1.5 HOUR 10 MINUTE 20 SECOND"),
-                r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#,
-            ),
-            (
-                interval_year_month_lit("1 YEAR 1 MONTH"),
-                r#"INTERVAL '1 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
-            ),
-            (
-                interval_year_month_lit("1.5 YEAR 1 MONTH"),
-                r#"INTERVAL '1 YEARS 7 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
-            ),
+            // See test_interval_scalar_to_expr for interval literals
             (
                 (col("a") + col("b")).gt(Expr::Literal(ScalarValue::Decimal128(
                     Some(100123),
@@ -1690,8 +1762,10 @@ mod tests {
     }
 
     #[test]
-    fn custom_dialect() -> Result<()> {
-        let dialect = CustomDialect::new(Some('\''));
+    fn custom_dialect_with_identifier_quote_style() -> Result<()> {
+        let dialect = CustomDialectBuilder::new()
+            .with_identifier_quote_style('\'')
+            .build();
         let unparser = Unparser::new(&dialect);
 
         let expr = col("a").gt(lit(4));
@@ -1706,8 +1780,8 @@ mod tests {
     }
 
     #[test]
-    fn custom_dialect_none() -> Result<()> {
-        let dialect = CustomDialect::new(None);
+    fn custom_dialect_without_identifier_quote_style() -> Result<()> {
+        let dialect = CustomDialect::default();
         let unparser = Unparser::new(&dialect);
 
         let expr = col("a").gt(lit(4));
@@ -1720,4 +1794,143 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn custom_dialect_use_timestamp_for_date64() -> Result<()> {
+        for (use_timestamp_for_date64, identifier) in
+            [(false, "DATETIME"), (true, "TIMESTAMP")]
+        {
+            let dialect = CustomDialectBuilder::new()
+                .with_use_timestamp_for_date64(use_timestamp_for_date64)
+                .build();
+            let unparser = Unparser::new(&dialect);
+
+            let expr = Expr::Cast(Cast {
+                expr: Box::new(col("a")),
+                data_type: DataType::Date64,
+            });
+            let ast = unparser.expr_to_sql(&expr)?;
+
+            let actual = format!("{}", ast);
+
+            let expected = format!(r#"CAST(a AS {identifier})"#);
+            assert_eq!(actual, expected);
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn customer_dialect_support_nulls_first_in_ort() -> Result<()> {
+        let tests: Vec<(Expr, &str, bool)> = vec![
+            (col("a").sort(true, true), r#"a ASC NULLS FIRST"#, true),
+            (col("a").sort(true, true), r#"a ASC"#, false),
+        ];
+
+        for (expr, expected, supports_nulls_first_in_sort) in tests {
+            let dialect = CustomDialectBuilder::new()
+                .with_supports_nulls_first_in_sort(supports_nulls_first_in_sort)
+                .build();
+            let unparser = Unparser::new(&dialect);
+            let ast = unparser.expr_to_unparsed(&expr)?;
+
+            let actual = format!("{}", ast);
+
+            assert_eq!(actual, expected);
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_interval_scalar_to_expr() {
+        let tests = [
+            (
+                interval_month_day_nano_lit("1 MONTH"),
+                IntervalStyle::SQLStandard,
+                "INTERVAL '1' MONTH",
+            ),
+            (
+                interval_month_day_nano_lit("1.5 DAY"),
+                IntervalStyle::SQLStandard,
+                "INTERVAL '1 12:0:0.000' DAY TO SECOND",
+            ),
+            (
+                interval_month_day_nano_lit("1.51234 DAY"),
+                IntervalStyle::SQLStandard,
+                "INTERVAL '1 12:17:46.176' DAY TO SECOND",
+            ),
+            (
+                interval_datetime_lit("1.51234 DAY"),
+                IntervalStyle::SQLStandard,
+                "INTERVAL '1 12:17:46.176' DAY TO SECOND",
+            ),
+            (
+                interval_year_month_lit("1 YEAR"),
+                IntervalStyle::SQLStandard,
+                "INTERVAL '12' MONTH",
+            ),
+            (
+                interval_month_day_nano_lit(
+                    "1 YEAR 1 MONTH 1 DAY 3 HOUR 10 MINUTE 20 SECOND",
+                ),
+                IntervalStyle::PostgresVerbose,
+                r#"INTERVAL '0 YEARS 13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#,
+            ),
+            (
+                interval_month_day_nano_lit("1.5 MONTH"),
+                IntervalStyle::PostgresVerbose,
+                r#"INTERVAL '0 YEARS 1 MONS 15 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#,
+            ),
+            (
+                interval_month_day_nano_lit("-3 MONTH"),
+                IntervalStyle::PostgresVerbose,
+                r#"INTERVAL '0 YEARS -3 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#,
+            ),
+            (
+                interval_month_day_nano_lit("1 MONTH")
+                    .add(interval_month_day_nano_lit("1 DAY")),
+                IntervalStyle::PostgresVerbose,
+                r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' + INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#,
+            ),
+            (
+                interval_month_day_nano_lit("1 MONTH")
+                    .sub(interval_month_day_nano_lit("1 DAY")),
+                IntervalStyle::PostgresVerbose,
+                r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' - INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#,
+            ),
+            (
+                interval_datetime_lit("10 DAY 1 HOUR 10 MINUTE 20 SECOND"),
+                IntervalStyle::PostgresVerbose,
+                r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#,
+            ),
+            (
+                interval_datetime_lit("10 DAY 1.5 HOUR 10 MINUTE 20 SECOND"),
+                IntervalStyle::PostgresVerbose,
+                r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#,
+            ),
+            (
+                interval_year_month_lit("1 YEAR 1 MONTH"),
+                IntervalStyle::PostgresVerbose,
+                r#"INTERVAL '1 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
+            ),
+            (
+                interval_year_month_lit("1.5 YEAR 1 MONTH"),
+                IntervalStyle::PostgresVerbose,
+                r#"INTERVAL '1 YEARS 7 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
+            ),
+        ];
+
+        for (value, style, expected) in tests {
+            let dialect = CustomDialectBuilder::new()
+                .with_interval_style(style)
+                .build();
+            let unparser = Unparser::new(&dialect);
+
+            let ast = unparser.expr_to_sql(&value).expect("to be unparsed");
+
+            let actual = format!("{ast}");
+
+            assert_eq!(actual, expected);
+        }
+    }
 }

From f11bdf08b2fea5465d3b120dce4e49c7d0ff45ae Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Tue, 16 Jul 2024 08:50:09 -0600
Subject: [PATCH 060/357] add criterion benchmark for CaseExpr (#11482)

---
 datafusion/physical-expr/Cargo.toml           |  4 +
 datafusion/physical-expr/benches/case_when.rs | 94 +++++++++++++++++++
 2 files changed, 98 insertions(+)
 create mode 100644 datafusion/physical-expr/benches/case_when.rs

diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index d8dbe636d90cf..067617a697a98 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -77,3 +77,7 @@ tokio = { workspace = true, features = ["rt-multi-thread"] }
 [[bench]]
 harness = false
 name = "in_list"
+
+[[bench]]
+harness = false
+name = "case_when"
diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
new file mode 100644
index 0000000000000..9cc7bdc465fb5
--- /dev/null
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{Field, Schema};
+use arrow::record_batch::RecordBatch;
+use arrow_array::builder::{Int32Builder, StringBuilder};
+use arrow_schema::DataType;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_common::ScalarValue;
+use datafusion_expr::Operator;
+use datafusion_physical_expr::expressions::{BinaryExpr, CaseExpr};
+use datafusion_physical_expr_common::expressions::column::Column;
+use datafusion_physical_expr_common::expressions::Literal;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use std::sync::Arc;
+
+fn make_col(name: &str, index: usize) -> Arc<dyn PhysicalExpr> {
+    Arc::new(Column::new(name, index))
+}
+
+fn make_lit_i32(n: i32) -> Arc<dyn PhysicalExpr> {
+    Arc::new(Literal::new(ScalarValue::Int32(Some(n))))
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    // create input data
+    let mut c1 = Int32Builder::new();
+    let mut c2 = StringBuilder::new();
+    for i in 0..1000 {
+        c1.append_value(i);
+        if i % 7 == 0 {
+            c2.append_null();
+        } else {
+            c2.append_value(&format!("string {i}"));
+        }
+    }
+    let c1 = Arc::new(c1.finish());
+    let c2 = Arc::new(c2.finish());
+    let schema = Schema::new(vec![
+        Field::new("c1", DataType::Int32, true),
+        Field::new("c2", DataType::Utf8, true),
+    ]);
+    let batch = RecordBatch::try_new(Arc::new(schema), vec![c1, c2]).unwrap();
+
+    // use same predicate for all benchmarks
+    let predicate = Arc::new(BinaryExpr::new(
+        make_col("c1", 0),
+        Operator::LtEq,
+        make_lit_i32(500),
+    ));
+
+    // CASE WHEN expr THEN 1 ELSE 0 END
+    c.bench_function("case_when: scalar or scalar", |b| {
+        let expr = Arc::new(
+            CaseExpr::try_new(
+                None,
+                vec![(predicate.clone(), make_lit_i32(1))],
+                Some(make_lit_i32(0)),
+            )
+            .unwrap(),
+        );
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+
+    // CASE WHEN expr THEN col ELSE null END
+    c.bench_function("case_when: column or null", |b| {
+        let expr = Arc::new(
+            CaseExpr::try_new(
+                None,
+                vec![(predicate.clone(), make_col("c2", 1))],
+                Some(Arc::new(Literal::new(ScalarValue::Utf8(None)))),
+            )
+            .unwrap(),
+        );
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);

From ccb4baf0fc6b4dee983bb29f2282b9c19510a481 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 16 Jul 2024 15:52:35 -0400
Subject: [PATCH 061/357] Initial support for `StringView`, merge changes from
 `string-view` development branch (#11402)

* Update `string-view` branch to arrow-rs main (#10966)

* Pin to arrow main

* Fix clippy with latest arrow

* Uncomment test that needs new arrow-rs to work

* Update datafusion-cli Cargo.lock

* Update Cargo.lock

* tapelo

* feat: Implement equality = and inequality <> support for StringView (#10985)

* feat: Implement equality = and inequality <> support for StringView

* chore: Add tests for the StringView

* chore

* chore: Update tests for NULL

* fix: Used build_array_string!

* chore: Update string_coercion function to handle Utf8View type in binary.rs

* chore: add tests

* chore: ci

* Add more StringView comparison test coverage (#10997)

* Add more StringView comparison test coverage

* add reference

* Add another test showing casting on columns works correctly

* feat: Implement equality = and inequality <> support for BinaryView (#11004)

* feat: Implement equality = and inequality <> support for BinaryView

Signed-off-by: Chojan Shang <psiace@apache.org>

* chore: make fmt happy

Signed-off-by: Chojan Shang <psiace@apache.org>

---------

Signed-off-by: Chojan Shang <psiace@apache.org>

* Implement support for LargeString and LargeBinary for StringView and BinaryView (#11034)

* implement large binary

* add tests for large string

* better comments for string coercion

* Improve filter predicates with `Utf8View` literals (#11043)

* refactor: Improve type coercion logic in TypeCoercionRewriter

* refactor: Improve type coercion logic in TypeCoercionRewriter

* chore

* chore: Update test

* refactor: Improve type coercion logic in TypeCoercionRewriter

* refactor: Remove unused import and update code formatting in unwrap_cast_in_comparison.rs

* Remove arrow-patch

---------

Signed-off-by: Chojan Shang <psiace@apache.org>
Co-authored-by: Alex Huang <huangweijun1001@gmail.com>
Co-authored-by: Chojan Shang <psiace@apache.org>
Co-authored-by: Xiangpeng Hao <haoxiangpeng123@gmail.com>
---
 datafusion/common/src/scalar/mod.rs           |   8 +-
 datafusion/expr/src/type_coercion/binary.rs   |  36 +-
 .../src/unwrap_cast_in_comparison.rs          |  26 +-
 .../sqllogictest/test_files/binary_view.slt   | 202 +++++++++++
 .../sqllogictest/test_files/string_view.slt   | 326 ++++++++++++++++++
 5 files changed, 566 insertions(+), 32 deletions(-)
 create mode 100644 datafusion/sqllogictest/test_files/binary_view.slt
 create mode 100644 datafusion/sqllogictest/test_files/string_view.slt

diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index c891e85aa59bb..38f70e4c1466c 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -1682,8 +1682,10 @@ impl ScalarValue {
             DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
             DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
             DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
+            DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
             DataType::Utf8 => build_array_string!(StringArray, Utf8),
             DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
+            DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
             DataType::Binary => build_array_string!(BinaryArray, Binary),
             DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
             DataType::Date32 => build_array_primitive!(Date32Array, Date32),
@@ -1841,8 +1843,6 @@ impl ScalarValue {
             | DataType::Time64(TimeUnit::Millisecond)
             | DataType::Map(_, _)
             | DataType::RunEndEncoded(_, _)
-            | DataType::Utf8View
-            | DataType::BinaryView
             | DataType::ListView(_)
             | DataType::LargeListView(_) => {
                 return _internal_err!(
@@ -5695,16 +5695,12 @@ mod tests {
             DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
         );
 
-        // needs https://github.com/apache/arrow-rs/issues/5893
-        /*
         check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
         check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
         check_scalar_cast(
             ScalarValue::from("larger than 12 bytes string"),
             DataType::Utf8View,
         );
-
-         */
     }
 
     // mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index 4f79f3fa2b220..70139aaa4a0cc 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -919,16 +919,21 @@ fn string_concat_internal_coercion(
     }
 }
 
-/// Coercion rules for string types (Utf8/LargeUtf8): If at least one argument is
-/// a string type and both arguments can be coerced into a string type, coerce
-/// to string type.
+/// Coercion rules for string view types (Utf8/LargeUtf8/Utf8View):
+/// If at least one argument is a string view, we coerce to string view
+/// based on the observation that StringArray to StringViewArray is cheap but not vice versa.
+///
+/// Between Utf8 and LargeUtf8, we coerce to LargeUtf8.
 fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
+        // If Utf8View is in any side, we coerce to Utf8View.
+        (Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View) => {
+            Some(Utf8View)
+        }
+        // Then, if LargeUtf8 is in any side, we coerce to LargeUtf8.
+        (LargeUtf8, Utf8 | LargeUtf8) | (Utf8, LargeUtf8) => Some(LargeUtf8),
         (Utf8, Utf8) => Some(Utf8),
-        (LargeUtf8, Utf8) => Some(LargeUtf8),
-        (Utf8, LargeUtf8) => Some(LargeUtf8),
-        (LargeUtf8, LargeUtf8) => Some(LargeUtf8),
         _ => None,
     }
 }
@@ -975,15 +980,26 @@ fn binary_to_string_coercion(
     }
 }
 
-/// Coercion rules for binary types (Binary/LargeBinary): If at least one argument is
+/// Coercion rules for binary types (Binary/LargeBinary/BinaryView): If at least one argument is
 /// a binary type and both arguments can be coerced into a binary type, coerce
 /// to binary type.
 fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
-        (Binary | Utf8, Binary) | (Binary, Utf8) => Some(Binary),
-        (LargeBinary | Binary | Utf8 | LargeUtf8, LargeBinary)
-        | (LargeBinary, Binary | Utf8 | LargeUtf8) => Some(LargeBinary),
+        // If BinaryView is in any side, we coerce to BinaryView.
+        (BinaryView, BinaryView | Binary | LargeBinary | Utf8 | LargeUtf8 | Utf8View)
+        | (LargeBinary | Binary | Utf8 | LargeUtf8 | Utf8View, BinaryView) => {
+            Some(BinaryView)
+        }
+        // Prefer LargeBinary over Binary
+        (LargeBinary | Binary | Utf8 | LargeUtf8 | Utf8View, LargeBinary)
+        | (LargeBinary, Binary | Utf8 | LargeUtf8 | Utf8View) => Some(LargeBinary),
+
+        // If Utf8View/LargeUtf8 presents need to be large Binary
+        (Utf8View | LargeUtf8, Binary) | (Binary, Utf8View | LargeUtf8) => {
+            Some(LargeBinary)
+        }
+        (Binary, Utf8) | (Utf8, Binary) => Some(Binary),
         _ => None,
     }
 }
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 9941da9dd65e0..7238dd5bbd97e 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -33,7 +33,7 @@ use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
 use datafusion_common::{internal_err, DFSchema, DFSchemaRef, Result, ScalarValue};
 use datafusion_expr::expr::{BinaryExpr, Cast, InList, TryCast};
 use datafusion_expr::utils::merge_schema;
-use datafusion_expr::{lit, Expr, ExprSchemable, LogicalPlan, Operator};
+use datafusion_expr::{lit, Expr, ExprSchemable, LogicalPlan};
 
 /// [`UnwrapCastInComparison`] attempts to remove casts from
 /// comparisons to literals ([`ScalarValue`]s) by applying the casts
@@ -146,7 +146,7 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter {
                     };
                     is_supported_type(&left_type)
                         && is_supported_type(&right_type)
-                        && is_comparison_op(op)
+                        && op.is_comparison_operator()
                 } =>
             {
                 match (left.as_mut(), right.as_mut()) {
@@ -262,18 +262,6 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter {
     }
 }
 
-fn is_comparison_op(op: &Operator) -> bool {
-    matches!(
-        op,
-        Operator::Eq
-            | Operator::NotEq
-            | Operator::Gt
-            | Operator::GtEq
-            | Operator::Lt
-            | Operator::LtEq
-    )
-}
-
 /// Returns true if [UnwrapCastExprRewriter] supports this data type
 fn is_supported_type(data_type: &DataType) -> bool {
     is_supported_numeric_type(data_type)
@@ -300,7 +288,10 @@ fn is_supported_numeric_type(data_type: &DataType) -> bool {
 
 /// Returns true if [UnwrapCastExprRewriter] supports casting this value as a string
 fn is_supported_string_type(data_type: &DataType) -> bool {
-    matches!(data_type, DataType::Utf8 | DataType::LargeUtf8)
+    matches!(
+        data_type,
+        DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
+    )
 }
 
 /// Returns true if [UnwrapCastExprRewriter] supports casting this value as a dictionary
@@ -473,12 +464,15 @@ fn try_cast_string_literal(
     target_type: &DataType,
 ) -> Option<ScalarValue> {
     let string_value = match lit_value {
-        ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) => s.clone(),
+        ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) | ScalarValue::Utf8View(s) => {
+            s.clone()
+        }
         _ => return None,
     };
     let scalar_value = match target_type {
         DataType::Utf8 => ScalarValue::Utf8(string_value),
         DataType::LargeUtf8 => ScalarValue::LargeUtf8(string_value),
+        DataType::Utf8View => ScalarValue::Utf8View(string_value),
         _ => return None,
     };
     Some(scalar_value)
diff --git a/datafusion/sqllogictest/test_files/binary_view.slt b/datafusion/sqllogictest/test_files/binary_view.slt
new file mode 100644
index 0000000000000..de0f0bea7ffb5
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/binary_view.slt
@@ -0,0 +1,202 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########
+## Test setup
+########
+
+statement ok
+create table test_source as values
+    ('Andrew', 'X'),
+    ('Xiangpeng', 'Xiangpeng'),
+    ('Raphael', 'R'),
+    (NULL, 'R')
+;
+
+# Table with the different combination of column types
+statement ok
+CREATE TABLE test AS
+SELECT
+  arrow_cast(column1, 'Utf8') as column1_utf8,
+  arrow_cast(column2, 'Utf8') as column2_utf8,
+  arrow_cast(column1, 'Binary') AS column1_binary,
+  arrow_cast(column2, 'Binary') AS column2_binary,
+  arrow_cast(column1, 'LargeBinary') AS column1_large_binary,
+  arrow_cast(column2, 'LargeBinary') AS column2_large_binary,
+  arrow_cast(arrow_cast(column1, 'Binary'), 'BinaryView') AS column1_binaryview,
+  arrow_cast(arrow_cast(column2, 'Binary'), 'BinaryView') AS column2_binaryview,
+  arrow_cast(column1, 'Dictionary(Int32, Binary)') AS column1_dict,
+  arrow_cast(column2, 'Dictionary(Int32, Binary)') AS column2_dict
+FROM test_source;
+
+statement ok
+drop table test_source
+
+########
+## BinaryView to BinaryView
+########
+
+# BinaryView scalar to BinaryView scalar
+
+query BBBB
+SELECT
+  arrow_cast(arrow_cast('NULL', 'Binary'), 'BinaryView') = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison1,
+  arrow_cast(arrow_cast('NULL', 'Binary'), 'BinaryView') <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison2,
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison3,
+  arrow_cast(arrow_cast('Xiangpeng', 'Binary'), 'BinaryView') <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison4;
+----
+false true true true
+
+
+# BinaryView column to BinaryView column comparison as filters
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_binaryview = column2_binaryview;
+----
+Xiangpeng Xiangpeng
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_binaryview <> column2_binaryview;
+----
+Andrew X
+Raphael R
+
+# BinaryView column to BinaryView column
+query TTBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview = column2_binaryview,
+  column1_binaryview <> column2_binaryview
+from test;
+----
+Andrew X false true
+Xiangpeng Xiangpeng true false
+Raphael R false true
+NULL R NULL NULL
+
+# BinaryView column to BinaryView scalar comparison
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview                 = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_binaryview,
+  column1_binaryview                 <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_binaryview
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## BinaryView to Binary
+########
+
+# test BinaryViewArray with Binary columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview  = column2_binary,
+  column2_binary      = column1_binaryview,
+  column1_binaryview <> column2_binary,
+  column2_binary     <> column1_binaryview
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# test BinaryViewArray with LargeBinary columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview  = column2_large_binary,
+  column2_large_binary      = column1_binaryview,
+  column1_binaryview <> column2_large_binary,
+  column2_large_binary     <> column1_binaryview
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# BinaryView column to Binary scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview                 = arrow_cast('Andrew', 'Binary'),
+  arrow_cast('Andrew', 'Binary')     = column1_binaryview,
+  column1_binaryview                <> arrow_cast('Andrew', 'Binary'),
+  arrow_cast('Andrew', 'Binary')     <> column1_binaryview
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# BinaryView column to LargeBinary scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview                 = arrow_cast('Andrew', 'LargeBinary'),
+  arrow_cast('Andrew', 'LargeBinary')     = column1_binaryview,
+  column1_binaryview                <> arrow_cast('Andrew', 'LargeBinary'),
+  arrow_cast('Andrew', 'LargeBinary')     <> column1_binaryview
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# Binary column to BinaryView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binary                     = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_binary,
+  column1_binary                     <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_binary
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+# LargeBinary column to BinaryView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_large_binary                     = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_large_binary,
+  column1_large_binary                     <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_large_binary
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+statement ok
+drop table test;
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
new file mode 100644
index 0000000000000..3ba4e271c2f64
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -0,0 +1,326 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########
+## Test setup
+########
+
+statement ok
+create table test_source as values
+    ('Andrew', 'X'),
+    ('Xiangpeng', 'Xiangpeng'),
+    ('Raphael', 'R'),
+    (NULL, 'R')
+;
+
+# Table with the different combination of column types
+statement ok
+create table test as
+SELECT
+  arrow_cast(column1, 'Utf8') as column1_utf8,
+  arrow_cast(column2, 'Utf8') as column2_utf8,
+  arrow_cast(column1, 'LargeUtf8') as column1_large_utf8,
+  arrow_cast(column2, 'LargeUtf8') as column2_large_utf8,
+  arrow_cast(column1, 'Utf8View') as column1_utf8view,
+  arrow_cast(column2, 'Utf8View') as column2_utf8view,
+  arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict,
+  arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict
+FROM test_source;
+
+statement ok
+drop table test_source
+
+########
+## StringView to StringView
+########
+
+# StringView scalar to StringView scalar
+
+query BBBB
+select
+  arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+----
+false true true true
+
+
+# StringView column to StringView column comparison as filters
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_utf8view = column2_utf8view;
+----
+Xiangpeng Xiangpeng
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_utf8view <> column2_utf8view;
+----
+Andrew X
+Raphael R
+
+# StringView column to StringView column
+query TTBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view = column2_utf8view,
+  column1_utf8view <> column2_utf8view
+from test;
+----
+Andrew X false true
+Xiangpeng Xiangpeng true false
+Raphael R false true
+NULL R NULL NULL
+
+# StringView column to StringView scalar comparison
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## StringView to String
+########
+
+# test StringViewArray with Utf8 columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_utf8,
+  column2_utf8      = column1_utf8view,
+  column1_utf8view <> column2_utf8,
+  column2_utf8     <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# test StringViewArray with LargeUtf8 columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_large_utf8,
+  column2_large_utf8      = column1_utf8view,
+  column1_utf8view <> column2_large_utf8,
+  column2_large_utf8     <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+# StringView column to String scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'Utf8'),
+  arrow_cast('Andrew', 'Utf8')     = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'Utf8'),
+  arrow_cast('Andrew', 'Utf8')     <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# StringView column to LargeString scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'LargeUtf8'),
+  arrow_cast('Andrew', 'LargeUtf8')     = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'LargeUtf8'),
+  arrow_cast('Andrew', 'LargeUtf8')     <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# String column to StringView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8                     = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_utf8,
+  column1_utf8                    <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_utf8
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# LargeString column to StringView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_large_utf8                     = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_large_utf8,
+  column1_large_utf8                    <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_large_utf8
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## StringView to Dictionary
+########
+
+# test StringViewArray with Dictionary columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_dict,
+  column2_dict      = column1_utf8view,
+  column1_utf8view <> column2_dict,
+  column2_dict     <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# StringView column to Dict scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'),
+  arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')     = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'),
+  arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')     <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# Dict column to StringView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_dict                     = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_dict,
+  column1_dict                    <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_dict
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+########
+## Coercion Rules
+########
+
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+
+# Filter should have a StringView literal and no column cast
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew';
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# reverse order should be the same
+query TT
+explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column1_utf8 = arrow_cast('Andrew', 'Utf8View');
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Utf8View') = column1_utf8;
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32, Utf8)');
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# compare string / stringview
+# Should cast string -> stringview (which is cheap), not stringview -> string (which is not)
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View)
+03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
+03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view]
+
+
+statement ok
+drop table test;

From 0c39b4d2ffefcd1e0e77389e493291aaa315d628 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Wed, 17 Jul 2024 04:33:12 +0800
Subject: [PATCH 062/357] Replace to_lowercase with to_string in sql exmaple
 (#11486)

---
 datafusion/sql/examples/sql.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/datafusion/sql/examples/sql.rs b/datafusion/sql/examples/sql.rs
index 1b92a7e116b16..b724afabaf097 100644
--- a/datafusion/sql/examples/sql.rs
+++ b/datafusion/sql/examples/sql.rs
@@ -69,8 +69,7 @@ struct MyContextProvider {
 
 impl MyContextProvider {
     fn with_udaf(mut self, udaf: Arc<AggregateUDF>) -> Self {
-        // TODO: change to to_string() if all the function name is converted to lowercase
-        self.udafs.insert(udaf.name().to_lowercase(), udaf);
+        self.udafs.insert(udaf.name().to_string(), udaf);
         self
     }
 

From 169a0d338cd1b3247da199a03add2119b5289d61 Mon Sep 17 00:00:00 2001
From: Arttu <Blizzara@users.noreply.github.com>
Date: Tue, 16 Jul 2024 22:33:28 +0200
Subject: [PATCH 063/357] chore: switch to using proper Substrait types for
 IntervalYearMonth and IntervalDayTime (#11471)

also clean up IntervalMonthDayNano type - the type itself needs no parameters
---
 .../substrait/src/logical_plan/consumer.rs    |  46 ++++--
 .../substrait/src/logical_plan/producer.rs    | 140 ++++++------------
 2 files changed, 79 insertions(+), 107 deletions(-)

diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index a4f7242024754..991aa61fbf159 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -65,7 +65,7 @@ use std::str::FromStr;
 use std::sync::Arc;
 use substrait::proto::exchange_rel::ExchangeKind;
 use substrait::proto::expression::literal::user_defined::Val;
-use substrait::proto::expression::literal::IntervalDayToSecond;
+use substrait::proto::expression::literal::{IntervalDayToSecond, IntervalYearToMonth};
 use substrait::proto::expression::subquery::SubqueryType;
 use substrait::proto::expression::{self, FieldReference, Literal, ScalarFunction};
 use substrait::proto::read_rel::local_files::file_or_files::PathType::UriFile;
@@ -1414,7 +1414,7 @@ fn from_substrait_type(
                 })?;
                 let field = Arc::new(Field::new_list_field(
                     from_substrait_type(inner_type, dfs_names, name_idx)?,
-                    // We ignore Substrait's nullability here to match to_substrait_literal 
+                    // We ignore Substrait's nullability here to match to_substrait_literal
                     // which always creates nullable lists
                     true,
                 ));
@@ -1445,12 +1445,15 @@ fn from_substrait_type(
                 ));
                 match map.type_variation_reference {
                     DEFAULT_CONTAINER_TYPE_VARIATION_REF => {
-                        Ok(DataType::Map(Arc::new(Field::new_struct(
-                            "entries",
-                            [key_field, value_field],
-                            false, // The inner map field is always non-nullable (Arrow #1697),
-                        )), false))
-                    },
+                        Ok(DataType::Map(
+                            Arc::new(Field::new_struct(
+                                "entries",
+                                [key_field, value_field],
+                                false, // The inner map field is always non-nullable (Arrow #1697),
+                            )),
+                            false,
+                        ))
+                    }
                     v => not_impl_err!(
                         "Unsupported Substrait type variation {v} of type {s_kind:?}"
                     )?,
@@ -1467,14 +1470,33 @@ fn from_substrait_type(
                     "Unsupported Substrait type variation {v} of type {s_kind:?}"
                 ),
             },
+            r#type::Kind::IntervalYear(i) => match i.type_variation_reference {
+                DEFAULT_TYPE_VARIATION_REF => {
+                    Ok(DataType::Interval(IntervalUnit::YearMonth))
+                }
+                v => not_impl_err!(
+                    "Unsupported Substrait type variation {v} of type {s_kind:?}"
+                ),
+            },
+            r#type::Kind::IntervalDay(i) => match i.type_variation_reference {
+                DEFAULT_TYPE_VARIATION_REF => {
+                    Ok(DataType::Interval(IntervalUnit::DayTime))
+                }
+                v => not_impl_err!(
+                    "Unsupported Substrait type variation {v} of type {s_kind:?}"
+                ),
+            },
             r#type::Kind::UserDefined(u) => {
                 match u.type_reference {
+                    // Kept for backwards compatibility, use IntervalYear instead
                     INTERVAL_YEAR_MONTH_TYPE_REF => {
                         Ok(DataType::Interval(IntervalUnit::YearMonth))
                     }
+                    // Kept for backwards compatibility, use IntervalDay instead
                     INTERVAL_DAY_TIME_TYPE_REF => {
                         Ok(DataType::Interval(IntervalUnit::DayTime))
                     }
+                    // Not supported yet by Substrait
                     INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
                         Ok(DataType::Interval(IntervalUnit::MonthDayNano))
                     }
@@ -1484,7 +1506,7 @@ fn from_substrait_type(
                         u.type_variation_reference
                     ),
                 }
-            },
+            }
             r#type::Kind::Struct(s) => Ok(DataType::Struct(from_substrait_struct_type(
                 s, dfs_names, name_idx,
             )?)),
@@ -1753,11 +1775,16 @@ fn from_substrait_literal(
             seconds,
             microseconds,
         })) => {
+            // DF only supports millisecond precision, so we lose the micros here
             ScalarValue::new_interval_dt(*days, (seconds * 1000) + (microseconds / 1000))
         }
+        Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years, months })) => {
+            ScalarValue::new_interval_ym(*years, *months)
+        }
         Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
         Some(LiteralType::UserDefined(user_defined)) => {
             match user_defined.type_reference {
+                // Kept for backwards compatibility, use IntervalYearToMonth instead
                 INTERVAL_YEAR_MONTH_TYPE_REF => {
                     let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
                         return substrait_err!("Interval year month value is empty");
@@ -1770,6 +1797,7 @@ fn from_substrait_literal(
                         })?;
                     ScalarValue::IntervalYearMonth(Some(i32::from_le_bytes(value_slice)))
                 }
+                // Kept for backwards compatibility, use IntervalDayToSecond instead
                 INTERVAL_DAY_TIME_TYPE_REF => {
                     let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
                         return substrait_err!("Interval day time value is empty");
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index 8d039a0502494..7849d0bd431e6 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -48,12 +48,11 @@ use datafusion::logical_expr::{expr, Between, JoinConstraint, LogicalPlan, Opera
 use datafusion::prelude::Expr;
 use pbjson_types::Any as ProtoAny;
 use substrait::proto::exchange_rel::{ExchangeKind, RoundRobin, ScatterFields};
-use substrait::proto::expression::literal::user_defined::Val;
-use substrait::proto::expression::literal::UserDefined;
-use substrait::proto::expression::literal::{List, Struct};
+use substrait::proto::expression::literal::{
+    user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Struct, UserDefined,
+};
 use substrait::proto::expression::subquery::InPredicate;
 use substrait::proto::expression::window_function::BoundsType;
-use substrait::proto::r#type::{parameter, Parameter};
 use substrait::proto::read_rel::VirtualTable;
 use substrait::proto::{CrossRel, ExchangeRel};
 use substrait::{
@@ -95,9 +94,7 @@ use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
-    INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_DAY_TIME_TYPE_URL,
     INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_URL,
-    INTERVAL_YEAR_MONTH_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_URL,
     LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
     TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
     TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
@@ -1534,47 +1531,31 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
             })),
         }),
         DataType::Interval(interval_unit) => {
-            // define two type parameters for convenience
-            let i32_param = Parameter {
-                parameter: Some(parameter::Parameter::DataType(substrait::proto::Type {
-                    kind: Some(r#type::Kind::I32(r#type::I32 {
+            match interval_unit {
+                IntervalUnit::YearMonth => Ok(substrait::proto::Type {
+                    kind: Some(r#type::Kind::IntervalYear(r#type::IntervalYear {
                         type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
-                        nullability: r#type::Nullability::Unspecified as i32,
+                        nullability,
                     })),
-                })),
-            };
-            let i64_param = Parameter {
-                parameter: Some(parameter::Parameter::DataType(substrait::proto::Type {
-                    kind: Some(r#type::Kind::I64(r#type::I64 {
+                }),
+                IntervalUnit::DayTime => Ok(substrait::proto::Type {
+                    kind: Some(r#type::Kind::IntervalDay(r#type::IntervalDay {
                         type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
-                        nullability: r#type::Nullability::Unspecified as i32,
+                        nullability,
                     })),
-                })),
-            };
-
-            let (type_parameters, type_reference) = match interval_unit {
-                IntervalUnit::YearMonth => {
-                    let type_parameters = vec![i32_param];
-                    (type_parameters, INTERVAL_YEAR_MONTH_TYPE_REF)
-                }
-                IntervalUnit::DayTime => {
-                    let type_parameters = vec![i64_param];
-                    (type_parameters, INTERVAL_DAY_TIME_TYPE_REF)
-                }
+                }),
                 IntervalUnit::MonthDayNano => {
-                    // use 2 `i64` as `i128`
-                    let type_parameters = vec![i64_param.clone(), i64_param];
-                    (type_parameters, INTERVAL_MONTH_DAY_NANO_TYPE_REF)
+                    // Substrait doesn't currently support this type, so we represent it as a UDT
+                    Ok(substrait::proto::Type {
+                        kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
+                            type_reference: INTERVAL_MONTH_DAY_NANO_TYPE_REF,
+                            type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
+                            nullability,
+                            type_parameters: vec![],
+                        })),
+                    })
                 }
-            };
-            Ok(substrait::proto::Type {
-                kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
-                    type_reference,
-                    type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
-                    nullability,
-                    type_parameters,
-                })),
-            })
+            }
         }
         DataType::Binary => Ok(substrait::proto::Type {
             kind: Some(r#type::Kind::Binary(r#type::Binary {
@@ -1954,45 +1935,23 @@ fn to_substrait_literal(value: &ScalarValue) -> Result<Literal> {
             (LiteralType::Date(*d), DATE_32_TYPE_VARIATION_REF)
         }
         // Date64 literal is not supported in Substrait
-        ScalarValue::IntervalYearMonth(Some(i)) => {
-            let bytes = i.to_le_bytes();
-            (
-                LiteralType::UserDefined(UserDefined {
-                    type_reference: INTERVAL_YEAR_MONTH_TYPE_REF,
-                    type_parameters: vec![Parameter {
-                        parameter: Some(parameter::Parameter::DataType(
-                            substrait::proto::Type {
-                                kind: Some(r#type::Kind::I32(r#type::I32 {
-                                    type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
-                                    nullability: r#type::Nullability::Required as i32,
-                                })),
-                            },
-                        )),
-                    }],
-                    val: Some(Val::Value(ProtoAny {
-                        type_url: INTERVAL_YEAR_MONTH_TYPE_URL.to_string(),
-                        value: bytes.to_vec().into(),
-                    })),
-                }),
-                INTERVAL_YEAR_MONTH_TYPE_REF,
-            )
-        }
+        ScalarValue::IntervalYearMonth(Some(i)) => (
+            LiteralType::IntervalYearToMonth(IntervalYearToMonth {
+                // DF only tracks total months, but there should always be 12 months in a year
+                years: *i / 12,
+                months: *i % 12,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
+        ),
         ScalarValue::IntervalMonthDayNano(Some(i)) => {
-            // treat `i128` as two contiguous `i64`
+            // IntervalMonthDayNano is internally represented as a 128-bit integer, containing
+            // months (32bit), days (32bit), and nanoseconds (64bit)
             let bytes = i.to_byte_slice();
-            let i64_param = Parameter {
-                parameter: Some(parameter::Parameter::DataType(substrait::proto::Type {
-                    kind: Some(r#type::Kind::I64(r#type::I64 {
-                        type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
-                        nullability: r#type::Nullability::Required as i32,
-                    })),
-                })),
-            };
             (
                 LiteralType::UserDefined(UserDefined {
                     type_reference: INTERVAL_MONTH_DAY_NANO_TYPE_REF,
-                    type_parameters: vec![i64_param.clone(), i64_param],
-                    val: Some(Val::Value(ProtoAny {
+                    type_parameters: vec![],
+                    val: Some(user_defined::Val::Value(ProtoAny {
                         type_url: INTERVAL_MONTH_DAY_NANO_TYPE_URL.to_string(),
                         value: bytes.to_vec().into(),
                     })),
@@ -2000,29 +1959,14 @@ fn to_substrait_literal(value: &ScalarValue) -> Result<Literal> {
                 INTERVAL_MONTH_DAY_NANO_TYPE_REF,
             )
         }
-        ScalarValue::IntervalDayTime(Some(i)) => {
-            let bytes = i.to_byte_slice();
-            (
-                LiteralType::UserDefined(UserDefined {
-                    type_reference: INTERVAL_DAY_TIME_TYPE_REF,
-                    type_parameters: vec![Parameter {
-                        parameter: Some(parameter::Parameter::DataType(
-                            substrait::proto::Type {
-                                kind: Some(r#type::Kind::I64(r#type::I64 {
-                                    type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
-                                    nullability: r#type::Nullability::Required as i32,
-                                })),
-                            },
-                        )),
-                    }],
-                    val: Some(Val::Value(ProtoAny {
-                        type_url: INTERVAL_DAY_TIME_TYPE_URL.to_string(),
-                        value: bytes.to_vec().into(),
-                    })),
-                }),
-                INTERVAL_DAY_TIME_TYPE_REF,
-            )
-        }
+        ScalarValue::IntervalDayTime(Some(i)) => (
+            LiteralType::IntervalDayToSecond(IntervalDayToSecond {
+                days: i.days,
+                seconds: i.milliseconds / 1000,
+                microseconds: (i.milliseconds % 1000) * 1000,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
+        ),
         ScalarValue::Binary(Some(b)) => (
             LiteralType::Binary(b.clone()),
             DEFAULT_CONTAINER_TYPE_VARIATION_REF,

From 82fd6a7de310fef4e365c333b0f7fc2a3e4ed12e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Berkay=20=C5=9Eahin?=
 <124376117+berkaysynnada@users.noreply.github.com>
Date: Tue, 16 Jul 2024 23:34:01 +0300
Subject: [PATCH 064/357] Move execute_input_stream (#11449)

---
 datafusion/physical-plan/src/insert.rs | 77 +++-----------------
 datafusion/physical-plan/src/lib.rs    | 97 +++++++++++++++++++++++++-
 2 files changed, 103 insertions(+), 71 deletions(-)

diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs
index 1c21991d93c55..5cd864125e29c 100644
--- a/datafusion/physical-plan/src/insert.rs
+++ b/datafusion/physical-plan/src/insert.rs
@@ -23,8 +23,8 @@ use std::fmt::Debug;
 use std::sync::Arc;
 
 use super::{
-    DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning,
-    PlanProperties, SendableRecordBatchStream,
+    execute_input_stream, DisplayAs, DisplayFormatType, ExecutionPlan,
+    ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream,
 };
 use crate::metrics::MetricsSet;
 use crate::stream::RecordBatchStreamAdapter;
@@ -33,7 +33,7 @@ use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use arrow_array::{ArrayRef, UInt64Array};
 use arrow_schema::{DataType, Field, Schema};
-use datafusion_common::{exec_err, internal_err, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{
     Distribution, EquivalenceProperties, PhysicalSortRequirement,
@@ -120,46 +120,6 @@ impl DataSinkExec {
         }
     }
 
-    fn execute_input_stream(
-        &self,
-        partition: usize,
-        context: Arc<TaskContext>,
-    ) -> Result<SendableRecordBatchStream> {
-        let input_stream = self.input.execute(partition, context)?;
-
-        debug_assert_eq!(
-            self.sink_schema.fields().len(),
-            self.input.schema().fields().len()
-        );
-
-        // Find input columns that may violate the not null constraint.
-        let risky_columns: Vec<_> = self
-            .sink_schema
-            .fields()
-            .iter()
-            .zip(self.input.schema().fields().iter())
-            .enumerate()
-            .filter_map(|(i, (sink_field, input_field))| {
-                if !sink_field.is_nullable() && input_field.is_nullable() {
-                    Some(i)
-                } else {
-                    None
-                }
-            })
-            .collect();
-
-        if risky_columns.is_empty() {
-            Ok(input_stream)
-        } else {
-            // Check not null constraint on the input stream
-            Ok(Box::pin(RecordBatchStreamAdapter::new(
-                Arc::clone(&self.sink_schema),
-                input_stream
-                    .map(move |batch| check_not_null_contraits(batch?, &risky_columns)),
-            )))
-        }
-    }
-
     /// Input execution plan
     pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
         &self.input
@@ -269,7 +229,12 @@ impl ExecutionPlan for DataSinkExec {
         if partition != 0 {
             return internal_err!("DataSinkExec can only be called on partition 0!");
         }
-        let data = self.execute_input_stream(0, Arc::clone(&context))?;
+        let data = execute_input_stream(
+            Arc::clone(&self.input),
+            Arc::clone(&self.sink_schema),
+            0,
+            Arc::clone(&context),
+        )?;
 
         let count_schema = Arc::clone(&self.count_schema);
         let sink = Arc::clone(&self.sink);
@@ -314,27 +279,3 @@ fn make_count_schema() -> SchemaRef {
         false,
     )]))
 }
-
-fn check_not_null_contraits(
-    batch: RecordBatch,
-    column_indices: &Vec<usize>,
-) -> Result<RecordBatch> {
-    for &index in column_indices {
-        if batch.num_columns() <= index {
-            return exec_err!(
-                "Invalid batch column count {} expected > {}",
-                batch.num_columns(),
-                index
-            );
-        }
-
-        if batch.column(index).null_count() > 0 {
-            return exec_err!(
-                "Invalid batch column at '{}' has null but schema specifies non-nullable",
-                index
-            );
-        }
-    }
-
-    Ok(batch)
-}
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index f3a709ff76703..dc736993a4533 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -36,13 +36,13 @@ use arrow::datatypes::SchemaRef;
 use arrow::ipc::reader::FileReader;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{exec_datafusion_err, Result};
+use datafusion_common::{exec_datafusion_err, exec_err, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{
     EquivalenceProperties, LexOrdering, PhysicalSortExpr, PhysicalSortRequirement,
 };
 
-use futures::stream::TryStreamExt;
+use futures::stream::{StreamExt, TryStreamExt};
 use log::debug;
 use tokio::sync::mpsc::Sender;
 use tokio::task::JoinSet;
@@ -97,7 +97,7 @@ pub use datafusion_physical_expr::{
 // Backwards compatibility
 use crate::common::IPCWriter;
 pub use crate::stream::EmptyRecordBatchStream;
-use crate::stream::RecordBatchReceiverStream;
+use crate::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter};
 use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::human_readable_size;
 pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
@@ -805,6 +805,97 @@ pub fn execute_stream_partitioned(
     Ok(streams)
 }
 
+/// Executes an input stream and ensures that the resulting stream adheres to
+/// the `not null` constraints specified in the `sink_schema`.
+///
+/// # Arguments
+///
+/// * `input` - An execution plan
+/// * `sink_schema` - The schema to be applied to the output stream
+/// * `partition` - The partition index to be executed
+/// * `context` - The task context
+///
+/// # Returns
+///
+/// * `Result<SendableRecordBatchStream>` - A stream of `RecordBatch`es if successful
+///
+/// This function first executes the given input plan for the specified partition
+/// and context. It then checks if there are any columns in the input that might
+/// violate the `not null` constraints specified in the `sink_schema`. If there are
+/// such columns, it wraps the resulting stream to enforce the `not null` constraints
+/// by invoking the `check_not_null_contraits` function on each batch of the stream.
+pub fn execute_input_stream(
+    input: Arc<dyn ExecutionPlan>,
+    sink_schema: SchemaRef,
+    partition: usize,
+    context: Arc<TaskContext>,
+) -> Result<SendableRecordBatchStream> {
+    let input_stream = input.execute(partition, context)?;
+
+    debug_assert_eq!(sink_schema.fields().len(), input.schema().fields().len());
+
+    // Find input columns that may violate the not null constraint.
+    let risky_columns: Vec<_> = sink_schema
+        .fields()
+        .iter()
+        .zip(input.schema().fields().iter())
+        .enumerate()
+        .filter_map(|(idx, (sink_field, input_field))| {
+            (!sink_field.is_nullable() && input_field.is_nullable()).then_some(idx)
+        })
+        .collect();
+
+    if risky_columns.is_empty() {
+        Ok(input_stream)
+    } else {
+        // Check not null constraint on the input stream
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            sink_schema,
+            input_stream
+                .map(move |batch| check_not_null_contraits(batch?, &risky_columns)),
+        )))
+    }
+}
+
+/// Checks a `RecordBatch` for `not null` constraints on specified columns.
+///
+/// # Arguments
+///
+/// * `batch` - The `RecordBatch` to be checked
+/// * `column_indices` - A vector of column indices that should be checked for
+///   `not null` constraints.
+///
+/// # Returns
+///
+/// * `Result<RecordBatch>` - The original `RecordBatch` if all constraints are met
+///
+/// This function iterates over the specified column indices and ensures that none
+/// of the columns contain null values. If any column contains null values, an error
+/// is returned.
+pub fn check_not_null_contraits(
+    batch: RecordBatch,
+    column_indices: &Vec<usize>,
+) -> Result<RecordBatch> {
+    for &index in column_indices {
+        if batch.num_columns() <= index {
+            return exec_err!(
+                "Invalid batch column count {} expected > {}",
+                batch.num_columns(),
+                index
+            );
+        }
+
+        if batch.column(index).null_count() > 0 {
+            return exec_err!(
+                "Invalid batch column at '{}' has null but schema specifies non-nullable",
+                index
+            );
+        }
+    }
+
+    Ok(batch)
+}
+
 /// Utility function yielding a string representation of the given [`ExecutionPlan`].
 pub fn get_plan_string(plan: &Arc<dyn ExecutionPlan>) -> Vec<String> {
     let formatted = displayable(plan.as_ref()).indent(true).to_string();

From c54a638585715410fefbe07fd23552e3871bd4f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Wed, 17 Jul 2024 04:35:12 +0800
Subject: [PATCH 065/357] Enable clone_on_ref_ptr clippy lints on proto
 (#11465)

---
 datafusion/proto-common/src/from_proto/mod.rs  |  2 +-
 datafusion/proto-common/src/lib.rs             |  2 ++
 datafusion/proto/src/lib.rs                    |  2 ++
 .../proto/src/physical_plan/from_proto.rs      |  2 +-
 datafusion/proto/src/physical_plan/mod.rs      | 18 +++++++++---------
 datafusion/proto/src/physical_plan/to_proto.rs | 12 ++++++------
 6 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index df673de4e1191..52ca5781dc963 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -448,7 +448,7 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
                                 None,
                                 &message.version(),
                             )?;
-                            Ok(record_batch.column(0).clone())
+                            Ok(Arc::clone(record_batch.column(0)))
                         }
                         _ => Err(Error::General("dictionary id not found in schema while deserializing ScalarValue::List".to_string())),
                     }?;
diff --git a/datafusion/proto-common/src/lib.rs b/datafusion/proto-common/src/lib.rs
index 474db652df992..91e3939154424 100644
--- a/datafusion/proto-common/src/lib.rs
+++ b/datafusion/proto-common/src/lib.rs
@@ -14,6 +14,8 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
 //! Serialize / Deserialize DataFusion Primitive Types to bytes
 //!
diff --git a/datafusion/proto/src/lib.rs b/datafusion/proto/src/lib.rs
index 57a1236ba8f4f..bac31850c875b 100644
--- a/datafusion/proto/src/lib.rs
+++ b/datafusion/proto/src/lib.rs
@@ -14,6 +14,8 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
 //! Serialize / Deserialize DataFusion Plans to bytes
 //!
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index 52fbd5cbdcf64..b7311c694d4c9 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -365,7 +365,7 @@ pub fn parse_physical_expr(
                 Some(buf) => codec.try_decode_udf(&e.name, buf)?,
                 None => registry.udf(e.name.as_str())?,
             };
-            let scalar_fun_def = udf.clone();
+            let scalar_fun_def = Arc::clone(&udf);
 
             let args = parse_physical_exprs(&e.args, registry, input_schema, codec)?;
 
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index e5429945e97ef..948a39bfe0be7 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -1101,7 +1101,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
     where
         Self: Sized,
     {
-        let plan_clone = plan.clone();
+        let plan_clone = Arc::clone(&plan);
         let plan = plan.as_any();
 
         if let Some(exec) = plan.downcast_ref::<ExplainExec>() {
@@ -1128,7 +1128,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
             let expr = exec
                 .expr()
                 .iter()
-                .map(|expr| serialize_physical_expr(expr.0.clone(), extension_codec))
+                .map(|expr| serialize_physical_expr(Arc::clone(&expr.0), extension_codec))
                 .collect::<Result<Vec<_>>>()?;
             let expr_name = exec.expr().iter().map(|expr| expr.1.clone()).collect();
             return Ok(protobuf::PhysicalPlanNode {
@@ -1169,7 +1169,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                     protobuf::FilterExecNode {
                         input: Some(Box::new(input)),
                         expr: Some(serialize_physical_expr(
-                            exec.predicate().clone(),
+                            Arc::clone(exec.predicate()),
                             extension_codec,
                         )?),
                         default_filter_selectivity: exec.default_selectivity() as u32,
@@ -1585,7 +1585,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
         if let Some(exec) = plan.downcast_ref::<ParquetExec>() {
             let predicate = exec
                 .predicate()
-                .map(|pred| serialize_physical_expr(pred.clone(), extension_codec))
+                .map(|pred| serialize_physical_expr(Arc::clone(pred), extension_codec))
                 .transpose()?;
             return Ok(protobuf::PhysicalPlanNode {
                 physical_plan_type: Some(PhysicalPlanType::ParquetScan(
@@ -1810,13 +1810,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
             let window_expr = exec
                 .window_expr()
                 .iter()
-                .map(|e| serialize_physical_window_expr(e.clone(), extension_codec))
+                .map(|e| serialize_physical_window_expr(Arc::clone(e), extension_codec))
                 .collect::<Result<Vec<protobuf::PhysicalWindowExprNode>>>()?;
 
             let partition_keys = exec
                 .partition_keys
                 .iter()
-                .map(|e| serialize_physical_expr(e.clone(), extension_codec))
+                .map(|e| serialize_physical_expr(Arc::clone(e), extension_codec))
                 .collect::<Result<Vec<protobuf::PhysicalExprNode>>>()?;
 
             return Ok(protobuf::PhysicalPlanNode {
@@ -1840,13 +1840,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
             let window_expr = exec
                 .window_expr()
                 .iter()
-                .map(|e| serialize_physical_window_expr(e.clone(), extension_codec))
+                .map(|e| serialize_physical_window_expr(Arc::clone(e), extension_codec))
                 .collect::<Result<Vec<protobuf::PhysicalWindowExprNode>>>()?;
 
             let partition_keys = exec
                 .partition_keys
                 .iter()
-                .map(|e| serialize_physical_expr(e.clone(), extension_codec))
+                .map(|e| serialize_physical_expr(Arc::clone(e), extension_codec))
                 .collect::<Result<Vec<protobuf::PhysicalExprNode>>>()?;
 
             let input_order_mode = match &exec.input_order_mode {
@@ -1949,7 +1949,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
         }
 
         let mut buf: Vec<u8> = vec![];
-        match extension_codec.try_encode(plan_clone.clone(), &mut buf) {
+        match extension_codec.try_encode(Arc::clone(&plan_clone), &mut buf) {
             Ok(_) => {
                 let inputs: Vec<protobuf::PhysicalPlanNode> = plan_clone
                     .children()
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 9c95acc1dcf47..d8d0291e1ca52 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -323,11 +323,11 @@ pub fn serialize_physical_expr(
     } else if let Some(expr) = expr.downcast_ref::<BinaryExpr>() {
         let binary_expr = Box::new(protobuf::PhysicalBinaryExprNode {
             l: Some(Box::new(serialize_physical_expr(
-                expr.left().clone(),
+                Arc::clone(expr.left()),
                 codec,
             )?)),
             r: Some(Box::new(serialize_physical_expr(
-                expr.right().clone(),
+                Arc::clone(expr.right()),
                 codec,
             )?)),
             op: format!("{:?}", expr.op()),
@@ -347,7 +347,7 @@ pub fn serialize_physical_expr(
                             expr: expr
                                 .expr()
                                 .map(|exp| {
-                                    serialize_physical_expr(exp.clone(), codec)
+                                    serialize_physical_expr(Arc::clone(exp), codec)
                                         .map(Box::new)
                                 })
                                 .transpose()?,
@@ -364,7 +364,7 @@ pub fn serialize_physical_expr(
                             else_expr: expr
                                 .else_expr()
                                 .map(|a| {
-                                    serialize_physical_expr(a.clone(), codec)
+                                    serialize_physical_expr(Arc::clone(a), codec)
                                         .map(Box::new)
                                 })
                                 .transpose()?,
@@ -552,8 +552,8 @@ fn serialize_when_then_expr(
     codec: &dyn PhysicalExtensionCodec,
 ) -> Result<protobuf::PhysicalWhenThen> {
     Ok(protobuf::PhysicalWhenThen {
-        when_expr: Some(serialize_physical_expr(when_expr.clone(), codec)?),
-        then_expr: Some(serialize_physical_expr(then_expr.clone(), codec)?),
+        when_expr: Some(serialize_physical_expr(Arc::clone(when_expr), codec)?),
+        then_expr: Some(serialize_physical_expr(Arc::clone(then_expr), codec)?),
     })
 }
 

From 382bf4f3c7a730828684b9e4ce01369b89717e19 Mon Sep 17 00:00:00 2001
From: Mohamed Abdeen <83442793+MohamedAbdeen21@users.noreply.github.com>
Date: Tue, 16 Jul 2024 23:41:20 +0300
Subject: [PATCH 066/357] upgrade sqlparser 0.47 -> 0.48 (#11453)

* upgrade sqlparser 0.47 -> 0.48

* clean imports and qualified imports

* update df-cli cargo lock

* fix trailing commas in slt tests

* update slt tests results

* restore rowsort in slt tests

* fix slt tests

* rerun CI

* reset unchanged slt files

* Revert "clean imports and qualified imports"

This reverts commit 7be2263793be7730615c52fec79ca3397eefb40f.

* update non-windows systems stack size

* update windows stack size

* remove windows-only unused import

* use same test main for all systems

* Reapply "clean imports and qualified imports"

This reverts commit 4fc036a9112528ec96926df93b1301465829bbcc.
---
 Cargo.toml                                    |  2 +-
 datafusion-cli/Cargo.lock                     |  4 +-
 datafusion/sql/src/expr/function.rs           |  1 +
 datafusion/sql/src/parser.rs                  | 18 +++---
 datafusion/sql/src/planner.rs                 | 21 +++++++
 datafusion/sql/src/select.rs                  |  2 +-
 datafusion/sql/src/statement.rs               | 60 ++++++++++---------
 datafusion/sql/src/unparser/ast.rs            |  5 +-
 datafusion/sql/src/unparser/expr.rs           |  3 +
 datafusion/sql/src/unparser/plan.rs           |  1 +
 datafusion/sql/tests/sql_integration.rs       |  6 +-
 datafusion/sqllogictest/bin/sqllogictests.rs  | 30 +++-------
 .../sqllogictest/test_files/aggregate.slt     |  2 +-
 .../sqllogictest/test_files/arrow_typeof.slt  |  2 +-
 .../sqllogictest/test_files/coalesce.slt      |  2 +-
 datafusion/sqllogictest/test_files/copy.slt   |  4 +-
 .../test_files/create_external_table.slt      | 14 ++---
 .../sqllogictest/test_files/csv_files.slt     |  2 +-
 .../sqllogictest/test_files/encoding.slt      |  2 +-
 datafusion/sqllogictest/test_files/expr.slt   |  2 +-
 .../sqllogictest/test_files/group_by.slt      |  2 +-
 datafusion/sqllogictest/test_files/joins.slt  |  5 +-
 datafusion/sqllogictest/test_files/math.slt   |  6 +-
 datafusion/sqllogictest/test_files/misc.slt   |  2 +-
 .../sqllogictest/test_files/predicates.slt    |  4 +-
 datafusion/sqllogictest/test_files/scalar.slt |  2 +-
 datafusion/sqllogictest/test_files/select.slt |  8 +--
 .../sqllogictest/test_files/strings.slt       |  2 +-
 datafusion/sqllogictest/test_files/struct.slt |  2 +-
 datafusion/sqllogictest/test_files/union.slt  |  4 +-
 datafusion/sqllogictest/test_files/unnest.slt |  6 +-
 datafusion/sqllogictest/test_files/window.slt |  2 +-
 32 files changed, 123 insertions(+), 105 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 6dd434abc87c9..f61ed7e58fe37 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -123,7 +123,7 @@ rand = "0.8"
 regex = "1.8"
 rstest = "0.21.0"
 serde_json = "1"
-sqlparser = { version = "0.47", features = ["visitor"] }
+sqlparser = { version = "0.48", features = ["visitor"] }
 tempfile = "3"
 thiserror = "1.0.44"
 tokio = { version = "1.36", features = ["macros", "rt", "sync"] }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 7da9cc427c37d..e48c6b081e1a5 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -3438,9 +3438,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
 [[package]]
 name = "sqlparser"
-version = "0.47.0"
+version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25"
+checksum = "749780d15ad1ee15fd74f5f84b0665560b6abb913de744c2b69155770f9601da"
 dependencies = [
  "log",
  "sqlparser_derive",
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index d9ddf57eb192c..dab328cc49080 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -109,6 +109,7 @@ impl FunctionArgs {
             filter,
             mut null_treatment,
             within_group,
+            ..
         } = function;
 
         // Handle no argument form (aka `current_time`  as opposed to `current_time()`)
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 8147092c34aba..bc13484235c39 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -1006,14 +1006,15 @@ mod tests {
         expect_parse_ok(sql, expected)?;
 
         // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
-        let sqls = vec![
-             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
+        let sqls =
+            vec![
+             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
              ('format.compression' 'GZIP')", "GZIP"),
-             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
+             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
              ('format.compression' 'BZIP2')", "BZIP2"),
-             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
+             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
              ('format.compression' 'XZ')", "XZ"),
-             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
+             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
              ('format.compression' 'ZSTD')", "ZSTD"),
          ];
         for (sql, compression) in sqls {
@@ -1123,7 +1124,10 @@ mod tests {
         // negative case: mixed column defs and column names in `PARTITIONED BY` clause
         let sql =
             "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
-        expect_parse_error(sql, "sql parser error: Expected a data type name, found: )");
+        expect_parse_error(
+            sql,
+            "sql parser error: Expected: a data type name, found: )",
+        );
 
         // negative case: mixed column defs and column names in `PARTITIONED BY` clause
         let sql =
@@ -1291,7 +1295,7 @@ mod tests {
             LOCATION 'foo.parquet'
             OPTIONS ('format.compression' 'zstd',
                      'format.delimiter' '*',
-                     'ROW_GROUP_SIZE' '1024', 
+                     'ROW_GROUP_SIZE' '1024',
                      'TRUNCATE' 'NO',
                      'format.has_header' 'true')";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index a77f0003f7380..be04f51f4f2c9 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -468,6 +468,27 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             | SQLDataType::Float64
             | SQLDataType::JSONB
             | SQLDataType::Unspecified
+            // Clickhouse datatypes
+            | SQLDataType::Int16
+            | SQLDataType::Int32
+            | SQLDataType::Int128
+            | SQLDataType::Int256
+            | SQLDataType::UInt8
+            | SQLDataType::UInt16
+            | SQLDataType::UInt32
+            | SQLDataType::UInt64
+            | SQLDataType::UInt128
+            | SQLDataType::UInt256
+            | SQLDataType::Float32
+            | SQLDataType::Date32
+            | SQLDataType::Datetime64(_, _)
+            | SQLDataType::FixedString(_)
+            | SQLDataType::Map(_, _)
+            | SQLDataType::Tuple(_)
+            | SQLDataType::Nested(_)
+            | SQLDataType::Union(_)
+            | SQLDataType::Nullable(_)
+            | SQLDataType::LowCardinality(_)
             => not_impl_err!(
                 "Unsupported SQL type {sql_type:?}"
             ),
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 236403e83d74e..a5891e655a052 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -149,7 +149,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let aggr_exprs = find_aggregate_exprs(&aggr_expr_haystack);
 
         // All of the group by expressions
-        let group_by_exprs = if let GroupByExpr::Expressions(exprs) = select.group_by {
+        let group_by_exprs = if let GroupByExpr::Expressions(exprs, _) = select.group_by {
             exprs
                 .into_iter()
                 .map(|e| {
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 1acfac79acc0b..6df25086305d2 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -46,18 +46,19 @@ use datafusion_expr::{
     cast, col, Analyze, CreateCatalog, CreateCatalogSchema,
     CreateExternalTable as PlanCreateExternalTable, CreateFunction, CreateFunctionBody,
     CreateMemoryTable, CreateView, DescribeTable, DmlStatement, DropCatalogSchema,
-    DropFunction, DropTable, DropView, EmptyRelation, Explain, ExprSchemable, Filter,
-    LogicalPlan, LogicalPlanBuilder, OperateFunctionArg, PlanType, Prepare, SetVariable,
-    Statement as PlanStatement, ToStringifiedPlan, TransactionAccessMode,
+    DropFunction, DropTable, DropView, EmptyRelation, Explain, Expr, ExprSchemable,
+    Filter, LogicalPlan, LogicalPlanBuilder, OperateFunctionArg, PlanType, Prepare,
+    SetVariable, Statement as PlanStatement, ToStringifiedPlan, TransactionAccessMode,
     TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart,
     Volatility, WriteOp,
 };
 use sqlparser::ast;
 use sqlparser::ast::{
-    Assignment, ColumnDef, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr,
-    Expr, FromTable, Ident, Insert, ObjectName, ObjectType, OneOrManyWithParens, Query,
-    SchemaName, SetExpr, ShowCreateObject, ShowStatementFilter, Statement,
-    TableConstraint, TableFactor, TableWithJoins, TransactionMode, UnaryOperator, Value,
+    Assignment, AssignmentTarget, ColumnDef, CreateTable, CreateTableOptions, Delete,
+    DescribeAlias, Expr as SQLExpr, FromTable, Ident, Insert, ObjectName, ObjectType,
+    OneOrManyWithParens, Query, SchemaName, SetExpr, ShowCreateObject,
+    ShowStatementFilter, Statement, TableConstraint, TableFactor, TableWithJoins,
+    TransactionMode, UnaryOperator, Value,
 };
 use sqlparser::parser::ParserError::ParserError;
 
@@ -240,7 +241,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 value,
             } => self.set_variable_to_plan(local, hivevar, &variables, value),
 
-            Statement::CreateTable {
+            Statement::CreateTable(CreateTable {
                 query,
                 name,
                 columns,
@@ -250,7 +251,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 if_not_exists,
                 or_replace,
                 ..
-            } if table_properties.is_empty() && with_options.is_empty() => {
+            }) if table_properties.is_empty() && with_options.is_empty() => {
                 // Merge inline constraints and existing constraints
                 let mut all_constraints = constraints;
                 let inline_constraints = calc_inline_constraints_from_columns(&columns);
@@ -954,7 +955,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         order_exprs: Vec<LexOrdering>,
         schema: &DFSchemaRef,
         planner_context: &mut PlannerContext,
-    ) -> Result<Vec<Vec<datafusion_expr::Expr>>> {
+    ) -> Result<Vec<Vec<Expr>>> {
         // Ask user to provide a schema if schema is empty.
         if !order_exprs.is_empty() && schema.fields().is_empty() {
             return plan_err!(
@@ -1159,7 +1160,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         local: bool,
         hivevar: bool,
         variables: &OneOrManyWithParens<ObjectName>,
-        value: Vec<Expr>,
+        value: Vec<SQLExpr>,
     ) -> Result<LogicalPlan> {
         if local {
             return not_impl_err!("LOCAL is not supported");
@@ -1218,7 +1219,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     fn delete_to_plan(
         &self,
         table_name: ObjectName,
-        predicate_expr: Option<Expr>,
+        predicate_expr: Option<SQLExpr>,
     ) -> Result<LogicalPlan> {
         // Do a table lookup to verify the table exists
         let table_ref = self.object_name_to_table_reference(table_name.clone())?;
@@ -1264,7 +1265,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         table: TableWithJoins,
         assignments: Vec<Assignment>,
         from: Option<TableWithJoins>,
-        predicate_expr: Option<Expr>,
+        predicate_expr: Option<SQLExpr>,
     ) -> Result<LogicalPlan> {
         let (table_name, table_alias) = match &table.relation {
             TableFactor::Table { name, alias, .. } => (name.clone(), alias.clone()),
@@ -1284,8 +1285,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let mut assign_map = assignments
             .iter()
             .map(|assign| {
-                let col_name: &Ident = assign
-                    .id
+                let cols = match &assign.target {
+                    AssignmentTarget::ColumnName(cols) => cols,
+                    _ => plan_err!("Tuples are not supported")?,
+                };
+                let col_name: &Ident = cols
+                    .0
                     .iter()
                     .last()
                     .ok_or_else(|| plan_datafusion_err!("Empty column id"))?;
@@ -1293,7 +1298,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 table_schema.field_with_unqualified_name(&col_name.value)?;
                 Ok((col_name.value.clone(), assign.value.clone()))
             })
-            .collect::<Result<HashMap<String, Expr>>>()?;
+            .collect::<Result<HashMap<String, SQLExpr>>>()?;
 
         // Build scan, join with from table if it exists.
         let mut input_tables = vec![table];
@@ -1332,8 +1337,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                             &mut planner_context,
                         )?;
                         // Update placeholder's datatype to the type of the target column
-                        if let datafusion_expr::Expr::Placeholder(placeholder) = &mut expr
-                        {
+                        if let Expr::Placeholder(placeholder) = &mut expr {
                             placeholder.data_type = placeholder
                                 .data_type
                                 .take()
@@ -1345,14 +1349,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     None => {
                         // If the target table has an alias, use it to qualify the column name
                         if let Some(alias) = &table_alias {
-                            datafusion_expr::Expr::Column(Column::new(
+                            Expr::Column(Column::new(
                                 Some(self.normalizer.normalize(alias.name.clone())),
                                 field.name(),
                             ))
                         } else {
-                            datafusion_expr::Expr::Column(Column::from((
-                                qualifier, field,
-                            )))
+                            Expr::Column(Column::from((qualifier, field)))
                         }
                     }
                 };
@@ -1427,7 +1429,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         if let SetExpr::Values(ast::Values { rows, .. }) = (*source.body).clone() {
             for row in rows.iter() {
                 for (idx, val) in row.iter().enumerate() {
-                    if let ast::Expr::Value(Value::Placeholder(name)) = val {
+                    if let SQLExpr::Value(Value::Placeholder(name)) = val {
                         let name =
                             name.replace('$', "").parse::<usize>().map_err(|_| {
                                 plan_datafusion_err!("Can't parse placeholder: {name}")
@@ -1460,23 +1462,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             .map(|(i, value_index)| {
                 let target_field = table_schema.field(i);
                 let expr = match value_index {
-                    Some(v) => datafusion_expr::Expr::Column(Column::from(
-                        source.schema().qualified_field(v),
-                    ))
-                    .cast_to(target_field.data_type(), source.schema())?,
+                    Some(v) => {
+                        Expr::Column(Column::from(source.schema().qualified_field(v)))
+                            .cast_to(target_field.data_type(), source.schema())?
+                    }
                     // The value is not specified. Fill in the default value for the column.
                     None => table_source
                         .get_column_default(target_field.name())
                         .cloned()
                         .unwrap_or_else(|| {
                             // If there is no default for the column, then the default is NULL
-                            datafusion_expr::Expr::Literal(ScalarValue::Null)
+                            Expr::Literal(ScalarValue::Null)
                         })
                         .cast_to(target_field.data_type(), &DFSchema::empty())?,
                 };
                 Ok(expr.alias(target_field.name()))
             })
-            .collect::<Result<Vec<datafusion_expr::Expr>>>()?;
+            .collect::<Result<Vec<Expr>>>()?;
         let source = project(source, exprs)?;
 
         let op = if overwrite {
diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs
index 7cbe34825c503..06b4d4a710a31 100644
--- a/datafusion/sql/src/unparser/ast.rs
+++ b/datafusion/sql/src/unparser/ast.rs
@@ -93,6 +93,8 @@ impl QueryBuilder {
             fetch: self.fetch.clone(),
             locks: self.locks.clone(),
             for_clause: self.for_clause.clone(),
+            settings: None,
+            format_clause: None,
         })
     }
     fn create_empty() -> Self {
@@ -234,6 +236,7 @@ impl SelectBuilder {
             value_table_mode: self.value_table_mode,
             connect_by: None,
             window_before_qualify: false,
+            prewhere: None,
         })
     }
     fn create_empty() -> Self {
@@ -245,7 +248,7 @@ impl SelectBuilder {
             from: Default::default(),
             lateral_views: Default::default(),
             selection: Default::default(),
-            group_by: Some(ast::GroupByExpr::Expressions(Vec::new())),
+            group_by: Some(ast::GroupByExpr::Expressions(Vec::new(), Vec::new())),
             cluster_by: Default::default(),
             distribute_by: Default::default(),
             sort_by: Default::default(),
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 6b7775ee3d4db..e6b67b5d9fb2d 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -176,6 +176,7 @@ impl Unparser<'_> {
                     null_treatment: None,
                     over: None,
                     within_group: vec![],
+                    parameters: ast::FunctionArguments::None,
                 }))
             }
             Expr::Between(Between {
@@ -306,6 +307,7 @@ impl Unparser<'_> {
                     null_treatment: None,
                     over,
                     within_group: vec![],
+                    parameters: ast::FunctionArguments::None,
                 }))
             }
             Expr::SimilarTo(Like {
@@ -351,6 +353,7 @@ impl Unparser<'_> {
                     null_treatment: None,
                     over: None,
                     within_group: vec![],
+                    parameters: ast::FunctionArguments::None,
                 }))
             }
             Expr::ScalarSubquery(subq) => {
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 41a8c968841b3..7a653f80be08b 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -172,6 +172,7 @@ impl Unparser<'_> {
                         .iter()
                         .map(|expr| self.expr_to_sql(expr))
                         .collect::<Result<Vec<_>>>()?,
+                    vec![],
                 ));
             }
             Some(AggVariant::Window(window)) => {
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index aca0d040bb8da..e34e7e20a0f32 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -3627,7 +3627,7 @@ fn test_prepare_statement_to_plan_panic_prepare_wrong_syntax() {
     let sql = "PREPARE AS SELECT id, age  FROM person WHERE age = $foo";
     assert_eq!(
         logical_plan(sql).unwrap_err().strip_backtrace(),
-        "SQL error: ParserError(\"Expected AS, found: SELECT\")"
+        "SQL error: ParserError(\"Expected: AS, found: SELECT\")"
     )
 }
 
@@ -3668,7 +3668,7 @@ fn test_non_prepare_statement_should_infer_types() {
 
 #[test]
 #[should_panic(
-    expected = "value: SQL(ParserError(\"Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: $1\""
+    expected = "value: SQL(ParserError(\"Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: $1\""
 )]
 fn test_prepare_statement_to_plan_panic_is_param() {
     let sql = "PREPARE my_plan(INT) AS SELECT id, age  FROM person WHERE age is $1";
@@ -4347,7 +4347,7 @@ fn test_parse_escaped_string_literal_value() {
     let sql = r"SELECT character_length(E'\000') AS len";
     assert_eq!(
         logical_plan(sql).unwrap_err().strip_backtrace(),
-        "SQL error: TokenizerError(\"Unterminated encoded string literal at Line: 1, Column 25\")"
+        "SQL error: TokenizerError(\"Unterminated encoded string literal at Line: 1, Column: 25\")"
     )
 }
 
diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs
index 560328ee8619a..8c8ed2e587439 100644
--- a/datafusion/sqllogictest/bin/sqllogictests.rs
+++ b/datafusion/sqllogictest/bin/sqllogictests.rs
@@ -18,8 +18,6 @@
 use std::ffi::OsStr;
 use std::fs;
 use std::path::{Path, PathBuf};
-#[cfg(target_family = "windows")]
-use std::thread;
 
 use clap::Parser;
 use datafusion_sqllogictest::{DataFusion, TestContext};
@@ -32,29 +30,15 @@ use datafusion_common_runtime::SpawnedTask;
 
 const TEST_DIRECTORY: &str = "test_files/";
 const PG_COMPAT_FILE_PREFIX: &str = "pg_compat_";
+const STACK_SIZE: usize = 2 * 1024 * 1024 + 512 * 1024; // 2.5 MBs, the default 2 MBs is currently too small
 
-#[cfg(target_family = "windows")]
-pub fn main() {
-    // Tests from `tpch/tpch.slt` fail with stackoverflow with the default stack size.
-    thread::Builder::new()
-        .stack_size(2 * 1024 * 1024) // 2 MB
-        .spawn(move || {
-            tokio::runtime::Builder::new_multi_thread()
-                .enable_all()
-                .build()
-                .unwrap()
-                .block_on(async { run_tests().await })
-                .unwrap()
-        })
+pub fn main() -> Result<()> {
+    tokio::runtime::Builder::new_multi_thread()
+        .thread_stack_size(STACK_SIZE)
+        .enable_all()
+        .build()
         .unwrap()
-        .join()
-        .unwrap();
-}
-
-#[tokio::main]
-#[cfg(not(target_family = "windows"))]
-pub async fn main() -> Result<()> {
-    run_tests().await
+        .block_on(run_tests())
 }
 
 /// Sets up an empty directory at test_files/scratch/<name>
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 6fafc0a74110c..a0140b1c5292a 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -3643,7 +3643,7 @@ create table bool_aggregate_functions (
   c5 boolean,
   c6 boolean,
   c7 boolean,
-  c8 boolean,
+  c8 boolean
 )
 as values
   (true, true, false, false, true, true, null, null),
diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt
index ab4ff9e2ce926..448706744305a 100644
--- a/datafusion/sqllogictest/test_files/arrow_typeof.slt
+++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt
@@ -430,5 +430,5 @@ select arrow_cast('MyAwesomeString', 'Utf8View'), arrow_typeof(arrow_cast('MyAwe
 MyAwesomeString Utf8View
 
 # Fails until we update arrow-rs with support for https://github.com/apache/arrow-rs/pull/5894
-query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: arrow_cast"\)
+query error DataFusion error: SQL error: ParserError\("Expected: an SQL statement, found: arrow_cast"\)
 arrow_cast('MyAwesomeString', 'BinaryView'), arrow_typeof(arrow_cast('MyAwesomeString', 'BinaryView'))
diff --git a/datafusion/sqllogictest/test_files/coalesce.slt b/datafusion/sqllogictest/test_files/coalesce.slt
index 17b0e774d9cb7..d16b79734c62c 100644
--- a/datafusion/sqllogictest/test_files/coalesce.slt
+++ b/datafusion/sqllogictest/test_files/coalesce.slt
@@ -361,7 +361,7 @@ drop table test
 statement ok
 CREATE TABLE test(
   c1 BIGINT,
-  c2 BIGINT,
+  c2 BIGINT
 ) as VALUES
 (1, 2),
 (NULL, 2),
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index 21c34bc25cee0..6a6ab15a065d3 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -600,7 +600,7 @@ query error DataFusion error: Invalid or Unsupported Configuration: Config value
 COPY source_table  to 'test_files/scratch/copy/table.json' STORED AS JSON OPTIONS ('format.row_group_size' 55);
 
 # Incomplete statement
-query error DataFusion error: SQL error: ParserError\("Expected \), found: EOF"\)
+query error DataFusion error: SQL error: ParserError\("Expected: \), found: EOF"\)
 COPY (select col2, sum(col1) from source_table
 
 # Copy from table with non literal
@@ -609,4 +609,4 @@ COPY source_table  to '/tmp/table.parquet' (row_group_size 55 + 102);
 
 # Copy using execution.keep_partition_by_columns with an invalid value
 query error DataFusion error: Invalid or Unsupported Configuration: provided value for 'execution.keep_partition_by_columns' was not recognized: "invalid_value"
-COPY source_table  to '/tmp/table.parquet' OPTIONS (execution.keep_partition_by_columns invalid_value);
\ No newline at end of file
+COPY source_table  to '/tmp/table.parquet' OPTIONS (execution.keep_partition_by_columns invalid_value);
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
index 607c909fd63d5..e42d14e101f17 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -33,23 +33,23 @@ statement error DataFusion error: SQL error: ParserError\("Missing LOCATION clau
 CREATE EXTERNAL TABLE t STORED AS CSV
 
 # Option value is missing
-statement error DataFusion error: SQL error: ParserError\("Expected string or numeric value, found: \)"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: string or numeric value, found: \)"\)
 CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2, k3) LOCATION 'blahblah'
 
 # Missing `(` in WITH ORDER clause
-statement error DataFusion error: SQL error: ParserError\("Expected \(, found: c1"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: \(, found: c1"\)
 CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER c1 LOCATION 'foo.csv'
 
 # Missing `)` in WITH ORDER clause
-statement error DataFusion error: SQL error: ParserError\("Expected \), found: LOCATION"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: \), found: LOCATION"\)
 CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 LOCATION 'foo.csv'
 
 # Missing `ROW` in WITH HEADER clause
-statement error DataFusion error: SQL error: ParserError\("Expected ROW, found: LOCATION"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: ROW, found: LOCATION"\)
 CREATE EXTERNAL TABLE t STORED AS CSV WITH HEADER LOCATION 'abc'
 
 # Missing `BY` in PARTITIONED clause
-statement error DataFusion error: SQL error: ParserError\("Expected BY, found: LOCATION"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: BY, found: LOCATION"\)
 CREATE EXTERNAL TABLE t STORED AS CSV PARTITIONED LOCATION 'abc'
 
 # Duplicate `STORED AS` clause
@@ -69,11 +69,11 @@ statement error DataFusion error: SQL error: ParserError\("OPTIONS specified mor
 CREATE EXTERNAL TABLE t STORED AS CSV OPTIONS ('k1' 'v1', 'k2' 'v2') OPTIONS ('k3' 'v3') LOCATION 'foo.csv'
 
 # With typo error
-statement error DataFusion error: SQL error: ParserError\("Expected HEADER, found: HEAD"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: HEADER, found: HEAD"\)
 CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH HEAD ROW LOCATION 'foo.csv';
 
 # Missing `anything` in WITH clause
-statement error DataFusion error: SQL error: ParserError\("Expected HEADER, found: LOCATION"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: HEADER, found: LOCATION"\)
 CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH LOCATION 'foo.csv';
 
 # Unrecognized random clause
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
index a8a689cbb8b5e..ca3bebe79f279 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -167,7 +167,7 @@ physical_plan
 statement ok
 CREATE TABLE table_with_necessary_quoting (
   int_col INT,
-  string_col TEXT,
+  string_col TEXT
 ) AS VALUES
 (1, 'e|e|e'),
 (2, 'f|f|f'),
diff --git a/datafusion/sqllogictest/test_files/encoding.slt b/datafusion/sqllogictest/test_files/encoding.slt
index 626af88aa9b8c..7a6ac5ca7121a 100644
--- a/datafusion/sqllogictest/test_files/encoding.slt
+++ b/datafusion/sqllogictest/test_files/encoding.slt
@@ -20,7 +20,7 @@ CREATE TABLE test(
   num INT,
   bin_field  BYTEA,
   base64_field TEXT,
-  hex_field TEXT,
+  hex_field TEXT
 ) as VALUES
   (0, 'abc',  encode('abc', 'base64'), encode('abc', 'hex')),
   (1, 'qweqwe',    encode('qweqwe', 'base64'), encode('qweqwe', 'hex')),
diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt
index 4e8f3b59a650a..b08d329d4a863 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -2356,7 +2356,7 @@ CREATE TABLE t_source(
     column1 String,
     column2 String,
     column3 String,
-    column4 String,
+    column4 String
 ) AS VALUES
 ('one', 'one', 'one', 'one'),
 ('two', 'two', '', 'two'),
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 04a1fcc78fe7a..b2be65a609e37 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -4489,7 +4489,7 @@ LIMIT 5
 statement ok
 CREATE TABLE src_table (
 	t1 TIMESTAMP,
-	c2 INT,
+	c2 INT
 ) AS VALUES
 ('2020-12-10T00:00:00.00Z', 0),
 ('2020-12-11T00:00:00.00Z', 1),
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index df66bffab8e82..b9897f81a107a 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3844,7 +3844,7 @@ EXPLAIN SELECT * FROM (
 ----
 logical_plan EmptyRelation
 
-# Left ANTI join with empty right table 
+# Left ANTI join with empty right table
 query TT
 EXPLAIN SELECT * FROM (
 	SELECT 1 as a
@@ -3855,7 +3855,7 @@ logical_plan
 02)--Projection: Int64(1) AS a
 03)----EmptyRelation
 
-# Right ANTI join with empty left table 
+# Right ANTI join with empty left table
 query TT
 EXPLAIN SELECT * FROM (
 	SELECT 1 as a WHERE 1=0
@@ -4043,4 +4043,3 @@ physical_plan
 03)----MemoryExec: partitions=1, partition_sizes=[1]
 04)----SortExec: expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)------MemoryExec: partitions=1, partition_sizes=[1]
-
diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt
index 573441ab44013..6ff804c3065d9 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -280,7 +280,7 @@ CREATE TABLE test_non_nullable_integer(
     c5 TINYINT UNSIGNED NOT NULL, 
     c6 SMALLINT UNSIGNED NOT NULL, 
     c7 INT UNSIGNED NOT NULL, 
-    c8 BIGINT UNSIGNED NOT NULL, 
+    c8 BIGINT UNSIGNED NOT NULL
     );
 
 query I
@@ -348,7 +348,7 @@ drop table test_non_nullable_integer
 statement ok
 CREATE TABLE test_nullable_float(
     c1 float,
-    c2 double, 
+    c2 double
     ) AS VALUES
     (-1.0, -1.0),
     (1.0, 1.0), 
@@ -415,7 +415,7 @@ drop table test_nullable_float
 statement ok
 CREATE TABLE test_non_nullable_float(
     c1 float NOT NULL,
-    c2 double NOT NULL, 
+    c2 double NOT NULL
     ); 
 
 query I
diff --git a/datafusion/sqllogictest/test_files/misc.slt b/datafusion/sqllogictest/test_files/misc.slt
index 66606df834808..9f4710eb9bcc0 100644
--- a/datafusion/sqllogictest/test_files/misc.slt
+++ b/datafusion/sqllogictest/test_files/misc.slt
@@ -37,4 +37,4 @@ select 1 where NULL and 1 = 1
 query I
 select 1 where NULL or 1 = 1
 ----
-1
\ No newline at end of file
+1
diff --git a/datafusion/sqllogictest/test_files/predicates.slt b/datafusion/sqllogictest/test_files/predicates.slt
index ffaae7204ecaf..4695e37aa560f 100644
--- a/datafusion/sqllogictest/test_files/predicates.slt
+++ b/datafusion/sqllogictest/test_files/predicates.slt
@@ -584,7 +584,7 @@ DROP TABLE data_index_bloom_encoding_stats;
 # String coercion
 ########
 
-statement error DataFusion error: SQL error: ParserError\("Expected a data type name, found: ,"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: a data type name, found: ,"\)
 CREATE TABLE t(vendor_id_utf8, vendor_id_dict)
 AS VALUES
 (arrow_cast('124', 'Utf8'), arrow_cast('124', 'Dictionary(Int16, Utf8)')),
@@ -692,7 +692,7 @@ CREATE TABLE IF NOT EXISTS partsupp (
     ps_suppkey BIGINT,
     ps_availqty INTEGER,
     ps_supplycost DECIMAL(15, 2),
-    ps_comment VARCHAR,
+    ps_comment VARCHAR
 ) AS VALUES
 (63700, 7311, 100, 993.49, 'ven ideas. quickly even packages print. pending multipliers must have to are fluff');
 
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index 85ac5b0c242db..5daa9333fb36f 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1578,7 +1578,7 @@ false
 statement ok
 CREATE TABLE t1(
   a boolean,
-  b boolean,
+  b boolean
 ) as VALUES
   (true, true),
   (true, null),
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index 95f67245a981e..03426dec874f3 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -101,7 +101,7 @@ statement ok
 CREATE TABLE test (
   c1 BIGINT NOT NULL,
   c2 BIGINT NOT NULL,
-  c3 BOOLEAN NOT NULL,
+  c3 BOOLEAN NOT NULL
 ) AS VALUES (0, 1, false),
 (0, 10, true),
 (0, 2, true),
@@ -336,13 +336,13 @@ three 1
 NULL 1
 
 # select_values_list
-statement error DataFusion error: SQL error: ParserError\("Expected \(, found: EOF"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: \(, found: EOF"\)
 VALUES
 
-statement error DataFusion error: SQL error: ParserError\("Expected an expression:, found: \)"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: an expression:, found: \)"\)
 VALUES ()
 
-statement error DataFusion error: SQL error: ParserError\("Expected an expression:, found: \)"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: an expression:, found: \)"\)
 VALUES (1),()
 
 statement error DataFusion error: Error during planning: Inconsistent data length across values list: got 2 values in row 1 but expected 1
diff --git a/datafusion/sqllogictest/test_files/strings.slt b/datafusion/sqllogictest/test_files/strings.slt
index 3cd6c339b44fb..30fb2d750d95e 100644
--- a/datafusion/sqllogictest/test_files/strings.slt
+++ b/datafusion/sqllogictest/test_files/strings.slt
@@ -17,7 +17,7 @@
 
 statement ok
 CREATE TABLE test(
-  s TEXT,
+  s TEXT
 ) as VALUES
   ('p1'),
   ('p1e1'),
diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt
index fd6e25ea749df..a7384fd4d8ad6 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -24,7 +24,7 @@ CREATE TABLE values(
     a INT,
     b FLOAT,
     c VARCHAR,
-    n VARCHAR,
+    n VARCHAR
 ) AS VALUES
   (1, 1.1, 'a', NULL),
   (2, 2.2, 'b', NULL),
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 5ede68a42aae6..31b16f975e9ea 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -22,7 +22,7 @@
 statement ok
 CREATE TABLE t1(
   id INT,  
-  name TEXT,
+  name TEXT
 ) as VALUES
   (1, 'Alex'), 
   (2, 'Bob'), 
@@ -32,7 +32,7 @@ CREATE TABLE t1(
 statement ok
 CREATE TABLE t2(
   id TINYINT,
-  name TEXT,
+  name TEXT
 ) as VALUES
   (1, 'Alex'), 
   (2, 'Bob'), 
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 06733f7b1e40e..698faf87c9b20 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -267,7 +267,7 @@ query error DataFusion error: Error during planning: unnest\(\) requires exactly
 select unnest();
 
 ## Unnest empty expression in from clause
-query error DataFusion error: SQL error: ParserError\("Expected an expression:, found: \)"\)
+query error DataFusion error: SQL error: ParserError\("Expected: an expression:, found: \)"\)
 select * from unnest();
 
 
@@ -496,7 +496,7 @@ select unnest(column1) from (select * from (values([1,2,3]), ([4,5,6])) limit 1
 5
 6
 
-## FIXME: https://github.com/apache/datafusion/issues/11198 
+## FIXME: https://github.com/apache/datafusion/issues/11198
 query error DataFusion error: Error during planning: Projections require unique expression names but the expression "UNNEST\(Column\(Column \{ relation: Some\(Bare \{ table: "unnest_table" \}\), name: "column1" \}\)\)" at position 0 and "UNNEST\(Column\(Column \{ relation: Some\(Bare \{ table: "unnest_table" \}\), name: "column1" \}\)\)" at position 1 have the same name. Consider aliasing \("AS"\) one of them.
 select unnest(column1), unnest(column1) from unnest_table;
 
@@ -556,4 +556,4 @@ physical_plan
 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 06)----------UnnestExec
 07)------------ProjectionExec: expr=[column3@0 as unnest(recursive_unnest_table.column3), column3@0 as column3]
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
\ No newline at end of file
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index a865a7ccbd8fb..5296f13de08a5 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -3415,7 +3415,7 @@ SELECT
 # window1 spec is defined multiple times
 statement error DataFusion error: Error during planning: The window window1 is defined multiple times!
 SELECT
-  MAX(c12) OVER window1 as min1,
+  MAX(c12) OVER window1 as min1
   FROM aggregate_test_100
   WINDOW window1 AS (ORDER BY C12),
   window1 AS (ORDER BY C3)

From 81aff944bd76b674a22371f7deaa12560d2f629d Mon Sep 17 00:00:00 2001
From: Arttu <Blizzara@users.noreply.github.com>
Date: Tue, 16 Jul 2024 22:54:50 +0200
Subject: [PATCH 067/357] feat: support UDWFs in Substrait (#11489)

* feat: support UDWFs in Substrait

Previously Substrait consumer would, for window functions, look at:
1. UDAFs
2. built-in window functions
3. built-in aggregate functions

That makes it tough to override the built-in
window function behavior, as it could
only be overridden with a UDAF but some
window functions don't fit nicely into aggregates.

This change adds UDWFs at the top, so the consumer will look at:
1. UDWFs
2. UDAFs
3. built-in window functions
4. built-in aggregate functions

This also paves the way for moving DF's built-in window funcs into UDWFs.

* check udwf first, then udaf
---
 .../substrait/src/logical_plan/consumer.rs    | 27 +++++++-------
 .../tests/cases/roundtrip_logical_plan.rs     | 36 ++++++++++++++++++-
 2 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 991aa61fbf159..1365630d5079a 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -23,8 +23,8 @@ use datafusion::arrow::datatypes::{
 };
 use datafusion::common::plan_err;
 use datafusion::common::{
-    not_impl_datafusion_err, not_impl_err, plan_datafusion_err, substrait_datafusion_err,
-    substrait_err, DFSchema, DFSchemaRef,
+    not_impl_err, plan_datafusion_err, substrait_datafusion_err, substrait_err, DFSchema,
+    DFSchemaRef,
 };
 use datafusion::execution::FunctionRegistry;
 use datafusion::logical_expr::expr::{Exists, InSubquery, Sort};
@@ -1182,16 +1182,19 @@ pub async fn from_substrait_rex(
             };
             let fn_name = substrait_fun_name(fn_name);
 
-            // check udaf first, then built-in functions
-            let fun = match ctx.udaf(fn_name) {
-                Ok(udaf) => Ok(WindowFunctionDefinition::AggregateUDF(udaf)),
-                Err(_) => find_df_window_func(fn_name).ok_or_else(|| {
-                    not_impl_datafusion_err!(
-                        "Window function {} is not supported: function anchor = {:?}",
-                        fn_name,
-                        window.function_reference
-                    )
-                }),
+            // check udwf first, then udaf, then built-in window and aggregate functions
+            let fun = if let Ok(udwf) = ctx.udwf(fn_name) {
+                Ok(WindowFunctionDefinition::WindowUDF(udwf))
+            } else if let Ok(udaf) = ctx.udaf(fn_name) {
+                Ok(WindowFunctionDefinition::AggregateUDF(udaf))
+            } else if let Some(fun) = find_df_window_func(fn_name) {
+                Ok(fun)
+            } else {
+                not_impl_err!(
+                    "Window function {} is not supported: function anchor = {:?}",
+                    fn_name,
+                    window.function_reference
+                )
             }?;
 
             let order_by =
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 5b2d0fbacaef0..a7653e11d598f 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -31,7 +31,8 @@ use datafusion::error::Result;
 use datafusion::execution::registry::SerializerRegistry;
 use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::logical_expr::{
-    Extension, LogicalPlan, Repartition, UserDefinedLogicalNode, Volatility,
+    Extension, LogicalPlan, PartitionEvaluator, Repartition, UserDefinedLogicalNode,
+    Volatility,
 };
 use datafusion::optimizer::simplify_expressions::expr_simplifier::THRESHOLD_INLINE_INLIST;
 use datafusion::prelude::*;
@@ -860,6 +861,39 @@ async fn roundtrip_aggregate_udf() -> Result<()> {
     roundtrip_with_ctx("select dummy_agg(a) from data", ctx).await
 }
 
+#[tokio::test]
+async fn roundtrip_window_udf() -> Result<()> {
+    #[derive(Debug)]
+    struct Dummy {}
+
+    impl PartitionEvaluator for Dummy {
+        fn evaluate_all(
+            &mut self,
+            values: &[ArrayRef],
+            _num_rows: usize,
+        ) -> Result<ArrayRef> {
+            Ok(values[0].to_owned())
+        }
+    }
+
+    fn make_partition_evaluator() -> Result<Box<dyn PartitionEvaluator>> {
+        Ok(Box::new(Dummy {}))
+    }
+
+    let dummy_agg = create_udwf(
+        "dummy_window",            // name
+        DataType::Int64,           // input type
+        Arc::new(DataType::Int64), // return type
+        Volatility::Immutable,
+        Arc::new(make_partition_evaluator),
+    );
+
+    let ctx = create_context().await?;
+    ctx.register_udwf(dummy_agg);
+
+    roundtrip_with_ctx("select dummy_window(a) OVER () from data", ctx).await
+}
+
 #[tokio::test]
 async fn roundtrip_repartition_roundrobin() -> Result<()> {
     let ctx = create_context().await?;

From 02326998f07a13fda0c93988bf13853413c4a2b2 Mon Sep 17 00:00:00 2001
From: Georgi Krastev <georgi.krastev@coralogix.com>
Date: Wed, 17 Jul 2024 00:52:20 +0300
Subject: [PATCH 068/357] Add extension hooks for encoding and decoding UDAFs
 and UDWFs (#11417)

* Add extension hooks for encoding and decoding UDAFs and UDWFs

* Add tests for encoding and decoding UDAF
---
 .../examples/composed_extension_codec.rs      |  80 +++---
 .../physical-expr-common/src/aggregate/mod.rs |   5 +
 datafusion/proto/proto/datafusion.proto       |  35 +--
 datafusion/proto/src/generated/pbjson.rs      | 102 +++++++
 datafusion/proto/src/generated/prost.rs       |  10 +
 .../proto/src/logical_plan/file_formats.rs    |  80 ------
 .../proto/src/logical_plan/from_proto.rs      |  42 +--
 datafusion/proto/src/logical_plan/mod.rs      |  22 +-
 datafusion/proto/src/logical_plan/to_proto.rs |  84 +++---
 .../proto/src/physical_plan/from_proto.rs     |   6 +-
 datafusion/proto/src/physical_plan/mod.rs     |  23 +-
 .../proto/src/physical_plan/to_proto.rs       | 122 +++++----
 datafusion/proto/tests/cases/mod.rs           |  99 +++++++
 .../tests/cases/roundtrip_logical_plan.rs     | 171 +++++-------
 .../tests/cases/roundtrip_physical_plan.rs    | 251 +++++++++++-------
 15 files changed, 686 insertions(+), 446 deletions(-)

diff --git a/datafusion-examples/examples/composed_extension_codec.rs b/datafusion-examples/examples/composed_extension_codec.rs
index 43c6daba211ac..5c34eccf26e11 100644
--- a/datafusion-examples/examples/composed_extension_codec.rs
+++ b/datafusion-examples/examples/composed_extension_codec.rs
@@ -30,18 +30,19 @@
 //!           DeltaScan
 //! ```
 
+use std::any::Any;
+use std::fmt::Debug;
+use std::ops::Deref;
+use std::sync::Arc;
+
 use datafusion::common::Result;
 use datafusion::physical_plan::{DisplayAs, ExecutionPlan};
 use datafusion::prelude::SessionContext;
-use datafusion_common::internal_err;
+use datafusion_common::{internal_err, DataFusionError};
 use datafusion_expr::registry::FunctionRegistry;
-use datafusion_expr::ScalarUDF;
+use datafusion_expr::{AggregateUDF, ScalarUDF};
 use datafusion_proto::physical_plan::{AsExecutionPlan, PhysicalExtensionCodec};
 use datafusion_proto::protobuf;
-use std::any::Any;
-use std::fmt::Debug;
-use std::ops::Deref;
-use std::sync::Arc;
 
 #[tokio::main]
 async fn main() {
@@ -239,6 +240,25 @@ struct ComposedPhysicalExtensionCodec {
     codecs: Vec<Arc<dyn PhysicalExtensionCodec>>,
 }
 
+impl ComposedPhysicalExtensionCodec {
+    fn try_any<T>(
+        &self,
+        mut f: impl FnMut(&dyn PhysicalExtensionCodec) -> Result<T>,
+    ) -> Result<T> {
+        let mut last_err = None;
+        for codec in &self.codecs {
+            match f(codec.as_ref()) {
+                Ok(node) => return Ok(node),
+                Err(err) => last_err = Some(err),
+            }
+        }
+
+        Err(last_err.unwrap_or_else(|| {
+            DataFusionError::NotImplemented("Empty list of composed codecs".to_owned())
+        }))
+    }
+}
+
 impl PhysicalExtensionCodec for ComposedPhysicalExtensionCodec {
     fn try_decode(
         &self,
@@ -246,46 +266,26 @@ impl PhysicalExtensionCodec for ComposedPhysicalExtensionCodec {
         inputs: &[Arc<dyn ExecutionPlan>],
         registry: &dyn FunctionRegistry,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let mut last_err = None;
-        for codec in &self.codecs {
-            match codec.try_decode(buf, inputs, registry) {
-                Ok(plan) => return Ok(plan),
-                Err(e) => last_err = Some(e),
-            }
-        }
-        Err(last_err.unwrap())
+        self.try_any(|codec| codec.try_decode(buf, inputs, registry))
     }
 
     fn try_encode(&self, node: Arc<dyn ExecutionPlan>, buf: &mut Vec<u8>) -> Result<()> {
-        let mut last_err = None;
-        for codec in &self.codecs {
-            match codec.try_encode(node.clone(), buf) {
-                Ok(_) => return Ok(()),
-                Err(e) => last_err = Some(e),
-            }
-        }
-        Err(last_err.unwrap())
+        self.try_any(|codec| codec.try_encode(node.clone(), buf))
     }
 
-    fn try_decode_udf(&self, name: &str, _buf: &[u8]) -> Result<Arc<ScalarUDF>> {
-        let mut last_err = None;
-        for codec in &self.codecs {
-            match codec.try_decode_udf(name, _buf) {
-                Ok(plan) => return Ok(plan),
-                Err(e) => last_err = Some(e),
-            }
-        }
-        Err(last_err.unwrap())
+    fn try_decode_udf(&self, name: &str, buf: &[u8]) -> Result<Arc<ScalarUDF>> {
+        self.try_any(|codec| codec.try_decode_udf(name, buf))
     }
 
-    fn try_encode_udf(&self, _node: &ScalarUDF, _buf: &mut Vec<u8>) -> Result<()> {
-        let mut last_err = None;
-        for codec in &self.codecs {
-            match codec.try_encode_udf(_node, _buf) {
-                Ok(_) => return Ok(()),
-                Err(e) => last_err = Some(e),
-            }
-        }
-        Err(last_err.unwrap())
+    fn try_encode_udf(&self, node: &ScalarUDF, buf: &mut Vec<u8>) -> Result<()> {
+        self.try_any(|codec| codec.try_encode_udf(node, buf))
+    }
+
+    fn try_decode_udaf(&self, name: &str, buf: &[u8]) -> Result<Arc<AggregateUDF>> {
+        self.try_any(|codec| codec.try_decode_udaf(name, buf))
+    }
+
+    fn try_encode_udaf(&self, node: &AggregateUDF, buf: &mut Vec<u8>) -> Result<()> {
+        self.try_any(|codec| codec.try_encode_udaf(node, buf))
     }
 }
diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
index db4581a622acc..0e245fd0a66aa 100644
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -283,6 +283,11 @@ impl AggregateFunctionExpr {
     pub fn is_distinct(&self) -> bool {
         self.is_distinct
     }
+
+    /// Return if the aggregation ignores nulls
+    pub fn ignore_nulls(&self) -> bool {
+        self.ignore_nulls
+    }
 }
 
 impl AggregateExpr for AggregateFunctionExpr {
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 9ef884531e320..dc551778c5fb2 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -164,7 +164,7 @@ message CreateExternalTableNode {
   map<string, string> options = 8;
   datafusion_common.Constraints constraints = 12;
   map<string, LogicalExprNode> column_defaults = 13;
- }
+}
 
 message PrepareNode {
   string name = 1;
@@ -249,24 +249,24 @@ message DistinctOnNode {
 }
 
 message CopyToNode {
-    LogicalPlanNode input = 1;
-    string output_url = 2;
-    bytes file_type = 3;
-    repeated string partition_by = 7;
+  LogicalPlanNode input = 1;
+  string output_url = 2;
+  bytes file_type = 3;
+  repeated string partition_by = 7;
 }
 
 message UnnestNode {
-    LogicalPlanNode input = 1;
-    repeated datafusion_common.Column exec_columns = 2;
-    repeated uint64 list_type_columns = 3;
-    repeated uint64 struct_type_columns = 4;
-    repeated uint64 dependency_indices = 5;
-    datafusion_common.DfSchema schema = 6;
-    UnnestOptions options = 7;
+  LogicalPlanNode input = 1;
+  repeated datafusion_common.Column exec_columns = 2;
+  repeated uint64 list_type_columns = 3;
+  repeated uint64 struct_type_columns = 4;
+  repeated uint64 dependency_indices = 5;
+  datafusion_common.DfSchema schema = 6;
+  UnnestOptions options = 7;
 }
 
 message UnnestOptions {
-    bool preserve_nulls = 1;
+  bool preserve_nulls = 1;
 }
 
 message UnionNode {
@@ -488,8 +488,8 @@ enum AggregateFunction {
   // BIT_AND = 19;
   // BIT_OR = 20;
   // BIT_XOR = 21;
-//  BOOL_AND = 22;
-//  BOOL_OR = 23;
+  //  BOOL_AND = 22;
+  //  BOOL_OR = 23;
   // REGR_SLOPE = 26;
   // REGR_INTERCEPT = 27;
   // REGR_COUNT = 28;
@@ -517,6 +517,7 @@ message AggregateUDFExprNode {
   bool distinct = 5;
   LogicalExprNode filter = 3;
   repeated LogicalExprNode order_by = 4;
+  optional bytes fun_definition = 6;
 }
 
 message ScalarUDFExprNode {
@@ -551,6 +552,7 @@ message WindowExprNode {
   repeated LogicalExprNode order_by = 6;
   // repeated LogicalExprNode filter = 7;
   WindowFrame window_frame = 8;
+  optional bytes fun_definition = 10;
 }
 
 message BetweenNode {
@@ -856,6 +858,8 @@ message PhysicalAggregateExprNode {
   repeated PhysicalExprNode expr = 2;
   repeated PhysicalSortExprNode ordering_req = 5;
   bool distinct = 3;
+  bool ignore_nulls = 6;
+  optional bytes fun_definition = 7;
 }
 
 message PhysicalWindowExprNode {
@@ -869,6 +873,7 @@ message PhysicalWindowExprNode {
   repeated PhysicalSortExprNode order_by = 6;
   WindowFrame window_frame = 7;
   string name = 8;
+  optional bytes fun_definition = 9;
 }
 
 message PhysicalIsNull {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index fa989480fad90..8f77c24bd9117 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -829,6 +829,9 @@ impl serde::Serialize for AggregateUdfExprNode {
         if !self.order_by.is_empty() {
             len += 1;
         }
+        if self.fun_definition.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.AggregateUDFExprNode", len)?;
         if !self.fun_name.is_empty() {
             struct_ser.serialize_field("funName", &self.fun_name)?;
@@ -845,6 +848,10 @@ impl serde::Serialize for AggregateUdfExprNode {
         if !self.order_by.is_empty() {
             struct_ser.serialize_field("orderBy", &self.order_by)?;
         }
+        if let Some(v) = self.fun_definition.as_ref() {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?;
+        }
         struct_ser.end()
     }
 }
@@ -862,6 +869,8 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
             "filter",
             "order_by",
             "orderBy",
+            "fun_definition",
+            "funDefinition",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -871,6 +880,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
             Distinct,
             Filter,
             OrderBy,
+            FunDefinition,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -897,6 +907,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                             "distinct" => Ok(GeneratedField::Distinct),
                             "filter" => Ok(GeneratedField::Filter),
                             "orderBy" | "order_by" => Ok(GeneratedField::OrderBy),
+                            "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -921,6 +932,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                 let mut distinct__ = None;
                 let mut filter__ = None;
                 let mut order_by__ = None;
+                let mut fun_definition__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::FunName => {
@@ -953,6 +965,14 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                             }
                             order_by__ = Some(map_.next_value()?);
                         }
+                        GeneratedField::FunDefinition => {
+                            if fun_definition__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("funDefinition"));
+                            }
+                            fun_definition__ = 
+                                map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
+                            ;
+                        }
                     }
                 }
                 Ok(AggregateUdfExprNode {
@@ -961,6 +981,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                     distinct: distinct__.unwrap_or_default(),
                     filter: filter__,
                     order_by: order_by__.unwrap_or_default(),
+                    fun_definition: fun_definition__,
                 })
             }
         }
@@ -12631,6 +12652,12 @@ impl serde::Serialize for PhysicalAggregateExprNode {
         if self.distinct {
             len += 1;
         }
+        if self.ignore_nulls {
+            len += 1;
+        }
+        if self.fun_definition.is_some() {
+            len += 1;
+        }
         if self.aggregate_function.is_some() {
             len += 1;
         }
@@ -12644,6 +12671,13 @@ impl serde::Serialize for PhysicalAggregateExprNode {
         if self.distinct {
             struct_ser.serialize_field("distinct", &self.distinct)?;
         }
+        if self.ignore_nulls {
+            struct_ser.serialize_field("ignoreNulls", &self.ignore_nulls)?;
+        }
+        if let Some(v) = self.fun_definition.as_ref() {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?;
+        }
         if let Some(v) = self.aggregate_function.as_ref() {
             match v {
                 physical_aggregate_expr_node::AggregateFunction::AggrFunction(v) => {
@@ -12670,6 +12704,10 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode {
             "ordering_req",
             "orderingReq",
             "distinct",
+            "ignore_nulls",
+            "ignoreNulls",
+            "fun_definition",
+            "funDefinition",
             "aggr_function",
             "aggrFunction",
             "user_defined_aggr_function",
@@ -12681,6 +12719,8 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode {
             Expr,
             OrderingReq,
             Distinct,
+            IgnoreNulls,
+            FunDefinition,
             AggrFunction,
             UserDefinedAggrFunction,
         }
@@ -12707,6 +12747,8 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode {
                             "expr" => Ok(GeneratedField::Expr),
                             "orderingReq" | "ordering_req" => Ok(GeneratedField::OrderingReq),
                             "distinct" => Ok(GeneratedField::Distinct),
+                            "ignoreNulls" | "ignore_nulls" => Ok(GeneratedField::IgnoreNulls),
+                            "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
                             "aggrFunction" | "aggr_function" => Ok(GeneratedField::AggrFunction),
                             "userDefinedAggrFunction" | "user_defined_aggr_function" => Ok(GeneratedField::UserDefinedAggrFunction),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
@@ -12731,6 +12773,8 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode {
                 let mut expr__ = None;
                 let mut ordering_req__ = None;
                 let mut distinct__ = None;
+                let mut ignore_nulls__ = None;
+                let mut fun_definition__ = None;
                 let mut aggregate_function__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
@@ -12752,6 +12796,20 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode {
                             }
                             distinct__ = Some(map_.next_value()?);
                         }
+                        GeneratedField::IgnoreNulls => {
+                            if ignore_nulls__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("ignoreNulls"));
+                            }
+                            ignore_nulls__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::FunDefinition => {
+                            if fun_definition__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("funDefinition"));
+                            }
+                            fun_definition__ = 
+                                map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
+                            ;
+                        }
                         GeneratedField::AggrFunction => {
                             if aggregate_function__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("aggrFunction"));
@@ -12770,6 +12828,8 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode {
                     expr: expr__.unwrap_or_default(),
                     ordering_req: ordering_req__.unwrap_or_default(),
                     distinct: distinct__.unwrap_or_default(),
+                    ignore_nulls: ignore_nulls__.unwrap_or_default(),
+                    fun_definition: fun_definition__,
                     aggregate_function: aggregate_function__,
                 })
             }
@@ -15832,6 +15892,9 @@ impl serde::Serialize for PhysicalWindowExprNode {
         if !self.name.is_empty() {
             len += 1;
         }
+        if self.fun_definition.is_some() {
+            len += 1;
+        }
         if self.window_function.is_some() {
             len += 1;
         }
@@ -15851,6 +15914,10 @@ impl serde::Serialize for PhysicalWindowExprNode {
         if !self.name.is_empty() {
             struct_ser.serialize_field("name", &self.name)?;
         }
+        if let Some(v) = self.fun_definition.as_ref() {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?;
+        }
         if let Some(v) = self.window_function.as_ref() {
             match v {
                 physical_window_expr_node::WindowFunction::AggrFunction(v) => {
@@ -15886,6 +15953,8 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode {
             "window_frame",
             "windowFrame",
             "name",
+            "fun_definition",
+            "funDefinition",
             "aggr_function",
             "aggrFunction",
             "built_in_function",
@@ -15901,6 +15970,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode {
             OrderBy,
             WindowFrame,
             Name,
+            FunDefinition,
             AggrFunction,
             BuiltInFunction,
             UserDefinedAggrFunction,
@@ -15930,6 +16000,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode {
                             "orderBy" | "order_by" => Ok(GeneratedField::OrderBy),
                             "windowFrame" | "window_frame" => Ok(GeneratedField::WindowFrame),
                             "name" => Ok(GeneratedField::Name),
+                            "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
                             "aggrFunction" | "aggr_function" => Ok(GeneratedField::AggrFunction),
                             "builtInFunction" | "built_in_function" => Ok(GeneratedField::BuiltInFunction),
                             "userDefinedAggrFunction" | "user_defined_aggr_function" => Ok(GeneratedField::UserDefinedAggrFunction),
@@ -15957,6 +16028,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode {
                 let mut order_by__ = None;
                 let mut window_frame__ = None;
                 let mut name__ = None;
+                let mut fun_definition__ = None;
                 let mut window_function__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
@@ -15990,6 +16062,14 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode {
                             }
                             name__ = Some(map_.next_value()?);
                         }
+                        GeneratedField::FunDefinition => {
+                            if fun_definition__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("funDefinition"));
+                            }
+                            fun_definition__ = 
+                                map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
+                            ;
+                        }
                         GeneratedField::AggrFunction => {
                             if window_function__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("aggrFunction"));
@@ -16016,6 +16096,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode {
                     order_by: order_by__.unwrap_or_default(),
                     window_frame: window_frame__,
                     name: name__.unwrap_or_default(),
+                    fun_definition: fun_definition__,
                     window_function: window_function__,
                 })
             }
@@ -20349,6 +20430,9 @@ impl serde::Serialize for WindowExprNode {
         if self.window_frame.is_some() {
             len += 1;
         }
+        if self.fun_definition.is_some() {
+            len += 1;
+        }
         if self.window_function.is_some() {
             len += 1;
         }
@@ -20365,6 +20449,10 @@ impl serde::Serialize for WindowExprNode {
         if let Some(v) = self.window_frame.as_ref() {
             struct_ser.serialize_field("windowFrame", v)?;
         }
+        if let Some(v) = self.fun_definition.as_ref() {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?;
+        }
         if let Some(v) = self.window_function.as_ref() {
             match v {
                 window_expr_node::WindowFunction::AggrFunction(v) => {
@@ -20402,6 +20490,8 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
             "orderBy",
             "window_frame",
             "windowFrame",
+            "fun_definition",
+            "funDefinition",
             "aggr_function",
             "aggrFunction",
             "built_in_function",
@@ -20416,6 +20506,7 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
             PartitionBy,
             OrderBy,
             WindowFrame,
+            FunDefinition,
             AggrFunction,
             BuiltInFunction,
             Udaf,
@@ -20445,6 +20536,7 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                             "partitionBy" | "partition_by" => Ok(GeneratedField::PartitionBy),
                             "orderBy" | "order_by" => Ok(GeneratedField::OrderBy),
                             "windowFrame" | "window_frame" => Ok(GeneratedField::WindowFrame),
+                            "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
                             "aggrFunction" | "aggr_function" => Ok(GeneratedField::AggrFunction),
                             "builtInFunction" | "built_in_function" => Ok(GeneratedField::BuiltInFunction),
                             "udaf" => Ok(GeneratedField::Udaf),
@@ -20472,6 +20564,7 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                 let mut partition_by__ = None;
                 let mut order_by__ = None;
                 let mut window_frame__ = None;
+                let mut fun_definition__ = None;
                 let mut window_function__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
@@ -20499,6 +20592,14 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                             }
                             window_frame__ = map_.next_value()?;
                         }
+                        GeneratedField::FunDefinition => {
+                            if fun_definition__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("funDefinition"));
+                            }
+                            fun_definition__ = 
+                                map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
+                            ;
+                        }
                         GeneratedField::AggrFunction => {
                             if window_function__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("aggrFunction"));
@@ -20530,6 +20631,7 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                     partition_by: partition_by__.unwrap_or_default(),
                     order_by: order_by__.unwrap_or_default(),
                     window_frame: window_frame__,
+                    fun_definition: fun_definition__,
                     window_function: window_function__,
                 })
             }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 8407e545fe650..605c56fa946a3 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -756,6 +756,8 @@ pub struct AggregateUdfExprNode {
     pub filter: ::core::option::Option<::prost::alloc::boxed::Box<LogicalExprNode>>,
     #[prost(message, repeated, tag = "4")]
     pub order_by: ::prost::alloc::vec::Vec<LogicalExprNode>,
+    #[prost(bytes = "vec", optional, tag = "6")]
+    pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
@@ -779,6 +781,8 @@ pub struct WindowExprNode {
     /// repeated LogicalExprNode filter = 7;
     #[prost(message, optional, tag = "8")]
     pub window_frame: ::core::option::Option<WindowFrame>,
+    #[prost(bytes = "vec", optional, tag = "10")]
+    pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
     #[prost(oneof = "window_expr_node::WindowFunction", tags = "1, 2, 3, 9")]
     pub window_function: ::core::option::Option<window_expr_node::WindowFunction>,
 }
@@ -1291,6 +1295,10 @@ pub struct PhysicalAggregateExprNode {
     pub ordering_req: ::prost::alloc::vec::Vec<PhysicalSortExprNode>,
     #[prost(bool, tag = "3")]
     pub distinct: bool,
+    #[prost(bool, tag = "6")]
+    pub ignore_nulls: bool,
+    #[prost(bytes = "vec", optional, tag = "7")]
+    pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
     #[prost(oneof = "physical_aggregate_expr_node::AggregateFunction", tags = "1, 4")]
     pub aggregate_function: ::core::option::Option<
         physical_aggregate_expr_node::AggregateFunction,
@@ -1320,6 +1328,8 @@ pub struct PhysicalWindowExprNode {
     pub window_frame: ::core::option::Option<WindowFrame>,
     #[prost(string, tag = "8")]
     pub name: ::prost::alloc::string::String,
+    #[prost(bytes = "vec", optional, tag = "9")]
+    pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
     #[prost(oneof = "physical_window_expr_node::WindowFunction", tags = "1, 2, 3")]
     pub window_function: ::core::option::Option<
         physical_window_expr_node::WindowFunction,
diff --git a/datafusion/proto/src/logical_plan/file_formats.rs b/datafusion/proto/src/logical_plan/file_formats.rs
index 106d5639489e7..09e36a650b9fa 100644
--- a/datafusion/proto/src/logical_plan/file_formats.rs
+++ b/datafusion/proto/src/logical_plan/file_formats.rs
@@ -86,22 +86,6 @@ impl LogicalExtensionCodec for CsvLogicalExtensionCodec {
     ) -> datafusion_common::Result<()> {
         Ok(())
     }
-
-    fn try_decode_udf(
-        &self,
-        name: &str,
-        __buf: &[u8],
-    ) -> datafusion_common::Result<Arc<datafusion_expr::ScalarUDF>> {
-        not_impl_err!("LogicalExtensionCodec is not provided for scalar function {name}")
-    }
-
-    fn try_encode_udf(
-        &self,
-        __node: &datafusion_expr::ScalarUDF,
-        __buf: &mut Vec<u8>,
-    ) -> datafusion_common::Result<()> {
-        Ok(())
-    }
 }
 
 #[derive(Debug)]
@@ -162,22 +146,6 @@ impl LogicalExtensionCodec for JsonLogicalExtensionCodec {
     ) -> datafusion_common::Result<()> {
         Ok(())
     }
-
-    fn try_decode_udf(
-        &self,
-        name: &str,
-        __buf: &[u8],
-    ) -> datafusion_common::Result<Arc<datafusion_expr::ScalarUDF>> {
-        not_impl_err!("LogicalExtensionCodec is not provided for scalar function {name}")
-    }
-
-    fn try_encode_udf(
-        &self,
-        __node: &datafusion_expr::ScalarUDF,
-        __buf: &mut Vec<u8>,
-    ) -> datafusion_common::Result<()> {
-        Ok(())
-    }
 }
 
 #[derive(Debug)]
@@ -238,22 +206,6 @@ impl LogicalExtensionCodec for ParquetLogicalExtensionCodec {
     ) -> datafusion_common::Result<()> {
         Ok(())
     }
-
-    fn try_decode_udf(
-        &self,
-        name: &str,
-        __buf: &[u8],
-    ) -> datafusion_common::Result<Arc<datafusion_expr::ScalarUDF>> {
-        not_impl_err!("LogicalExtensionCodec is not provided for scalar function {name}")
-    }
-
-    fn try_encode_udf(
-        &self,
-        __node: &datafusion_expr::ScalarUDF,
-        __buf: &mut Vec<u8>,
-    ) -> datafusion_common::Result<()> {
-        Ok(())
-    }
 }
 
 #[derive(Debug)]
@@ -314,22 +266,6 @@ impl LogicalExtensionCodec for ArrowLogicalExtensionCodec {
     ) -> datafusion_common::Result<()> {
         Ok(())
     }
-
-    fn try_decode_udf(
-        &self,
-        name: &str,
-        __buf: &[u8],
-    ) -> datafusion_common::Result<Arc<datafusion_expr::ScalarUDF>> {
-        not_impl_err!("LogicalExtensionCodec is not provided for scalar function {name}")
-    }
-
-    fn try_encode_udf(
-        &self,
-        __node: &datafusion_expr::ScalarUDF,
-        __buf: &mut Vec<u8>,
-    ) -> datafusion_common::Result<()> {
-        Ok(())
-    }
 }
 
 #[derive(Debug)]
@@ -390,20 +326,4 @@ impl LogicalExtensionCodec for AvroLogicalExtensionCodec {
     ) -> datafusion_common::Result<()> {
         Ok(())
     }
-
-    fn try_decode_udf(
-        &self,
-        name: &str,
-        __buf: &[u8],
-    ) -> datafusion_common::Result<Arc<datafusion_expr::ScalarUDF>> {
-        not_impl_err!("LogicalExtensionCodec is not provided for scalar function {name}")
-    }
-
-    fn try_encode_udf(
-        &self,
-        __node: &datafusion_expr::ScalarUDF,
-        __buf: &mut Vec<u8>,
-    ) -> datafusion_common::Result<()> {
-        Ok(())
-    }
 }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 095c6a50973a1..b6b556a8ed6b2 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -308,14 +308,17 @@ pub fn parse_expr(
                     let aggr_function = parse_i32_to_aggregate_function(i)?;
 
                     Ok(Expr::WindowFunction(WindowFunction::new(
-                        datafusion_expr::expr::WindowFunctionDefinition::AggregateFunction(
-                            aggr_function,
-                        ),
-                        vec![parse_required_expr(expr.expr.as_deref(), registry, "expr", codec)?],
+                        expr::WindowFunctionDefinition::AggregateFunction(aggr_function),
+                        vec![parse_required_expr(
+                            expr.expr.as_deref(),
+                            registry,
+                            "expr",
+                            codec,
+                        )?],
                         partition_by,
                         order_by,
                         window_frame,
-                        None
+                        None,
                     )))
                 }
                 window_expr_node::WindowFunction::BuiltInFunction(i) => {
@@ -329,26 +332,28 @@ pub fn parse_expr(
                             .unwrap_or_else(Vec::new);
 
                     Ok(Expr::WindowFunction(WindowFunction::new(
-                        datafusion_expr::expr::WindowFunctionDefinition::BuiltInWindowFunction(
+                        expr::WindowFunctionDefinition::BuiltInWindowFunction(
                             built_in_function,
                         ),
                         args,
                         partition_by,
                         order_by,
                         window_frame,
-                        null_treatment
+                        null_treatment,
                     )))
                 }
                 window_expr_node::WindowFunction::Udaf(udaf_name) => {
-                    let udaf_function = registry.udaf(udaf_name)?;
+                    let udaf_function = match &expr.fun_definition {
+                        Some(buf) => codec.try_decode_udaf(udaf_name, buf)?,
+                        None => registry.udaf(udaf_name)?,
+                    };
+
                     let args =
                         parse_optional_expr(expr.expr.as_deref(), registry, codec)?
                             .map(|e| vec![e])
                             .unwrap_or_else(Vec::new);
                     Ok(Expr::WindowFunction(WindowFunction::new(
-                        datafusion_expr::expr::WindowFunctionDefinition::AggregateUDF(
-                            udaf_function,
-                        ),
+                        expr::WindowFunctionDefinition::AggregateUDF(udaf_function),
                         args,
                         partition_by,
                         order_by,
@@ -357,15 +362,17 @@ pub fn parse_expr(
                     )))
                 }
                 window_expr_node::WindowFunction::Udwf(udwf_name) => {
-                    let udwf_function = registry.udwf(udwf_name)?;
+                    let udwf_function = match &expr.fun_definition {
+                        Some(buf) => codec.try_decode_udwf(udwf_name, buf)?,
+                        None => registry.udwf(udwf_name)?,
+                    };
+
                     let args =
                         parse_optional_expr(expr.expr.as_deref(), registry, codec)?
                             .map(|e| vec![e])
                             .unwrap_or_else(Vec::new);
                     Ok(Expr::WindowFunction(WindowFunction::new(
-                        datafusion_expr::expr::WindowFunctionDefinition::WindowUDF(
-                            udwf_function,
-                        ),
+                        expr::WindowFunctionDefinition::WindowUDF(udwf_function),
                         args,
                         partition_by,
                         order_by,
@@ -613,7 +620,10 @@ pub fn parse_expr(
             )))
         }
         ExprType::AggregateUdfExpr(pb) => {
-            let agg_fn = registry.udaf(pb.fun_name.as_str())?;
+            let agg_fn = match &pb.fun_definition {
+                Some(buf) => codec.try_decode_udaf(&pb.fun_name, buf)?,
+                None => registry.udaf(&pb.fun_name)?,
+            };
 
             Ok(Expr::AggregateFunction(expr::AggregateFunction::new_udf(
                 agg_fn,
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index 664cd7e115557..2a963fb13ccf0 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -51,7 +51,6 @@ use datafusion_common::{
     context, internal_datafusion_err, internal_err, not_impl_err, DataFusionError,
     Result, TableReference,
 };
-use datafusion_expr::Unnest;
 use datafusion_expr::{
     dml,
     logical_plan::{
@@ -60,8 +59,9 @@ use datafusion_expr::{
         EmptyRelation, Extension, Join, JoinConstraint, Limit, Prepare, Projection,
         Repartition, Sort, SubqueryAlias, TableScan, Values, Window,
     },
-    DistinctOn, DropView, Expr, LogicalPlan, LogicalPlanBuilder, ScalarUDF,
+    DistinctOn, DropView, Expr, LogicalPlan, LogicalPlanBuilder, ScalarUDF, WindowUDF,
 };
+use datafusion_expr::{AggregateUDF, Unnest};
 
 use prost::bytes::BufMut;
 use prost::Message;
@@ -144,6 +144,24 @@ pub trait LogicalExtensionCodec: Debug + Send + Sync {
     fn try_encode_udf(&self, _node: &ScalarUDF, _buf: &mut Vec<u8>) -> Result<()> {
         Ok(())
     }
+
+    fn try_decode_udaf(&self, name: &str, _buf: &[u8]) -> Result<Arc<AggregateUDF>> {
+        not_impl_err!(
+            "LogicalExtensionCodec is not provided for aggregate function {name}"
+        )
+    }
+
+    fn try_encode_udaf(&self, _node: &AggregateUDF, _buf: &mut Vec<u8>) -> Result<()> {
+        Ok(())
+    }
+
+    fn try_decode_udwf(&self, name: &str, _buf: &[u8]) -> Result<Arc<WindowUDF>> {
+        not_impl_err!("LogicalExtensionCodec is not provided for window function {name}")
+    }
+
+    fn try_encode_udwf(&self, _node: &WindowUDF, _buf: &mut Vec<u8>) -> Result<()> {
+        Ok(())
+    }
 }
 
 #[derive(Debug, Clone)]
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index d8f8ea002b2dd..9607b918eb895 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -319,25 +319,37 @@ pub fn serialize_expr(
             // TODO: support null treatment in proto
             null_treatment: _,
         }) => {
-            let window_function = match fun {
-                WindowFunctionDefinition::AggregateFunction(fun) => {
+            let (window_function, fun_definition) = match fun {
+                WindowFunctionDefinition::AggregateFunction(fun) => (
                     protobuf::window_expr_node::WindowFunction::AggrFunction(
                         protobuf::AggregateFunction::from(fun).into(),
-                    )
-                }
-                WindowFunctionDefinition::BuiltInWindowFunction(fun) => {
+                    ),
+                    None,
+                ),
+                WindowFunctionDefinition::BuiltInWindowFunction(fun) => (
                     protobuf::window_expr_node::WindowFunction::BuiltInFunction(
                         protobuf::BuiltInWindowFunction::from(fun).into(),
-                    )
-                }
+                    ),
+                    None,
+                ),
                 WindowFunctionDefinition::AggregateUDF(aggr_udf) => {
-                    protobuf::window_expr_node::WindowFunction::Udaf(
-                        aggr_udf.name().to_string(),
+                    let mut buf = Vec::new();
+                    let _ = codec.try_encode_udaf(aggr_udf, &mut buf);
+                    (
+                        protobuf::window_expr_node::WindowFunction::Udaf(
+                            aggr_udf.name().to_string(),
+                        ),
+                        (!buf.is_empty()).then_some(buf),
                     )
                 }
                 WindowFunctionDefinition::WindowUDF(window_udf) => {
-                    protobuf::window_expr_node::WindowFunction::Udwf(
-                        window_udf.name().to_string(),
+                    let mut buf = Vec::new();
+                    let _ = codec.try_encode_udwf(window_udf, &mut buf);
+                    (
+                        protobuf::window_expr_node::WindowFunction::Udwf(
+                            window_udf.name().to_string(),
+                        ),
+                        (!buf.is_empty()).then_some(buf),
                     )
                 }
             };
@@ -358,6 +370,7 @@ pub fn serialize_expr(
                 partition_by,
                 order_by,
                 window_frame,
+                fun_definition,
             });
             protobuf::LogicalExprNode {
                 expr_type: Some(ExprType::WindowExpr(window_expr)),
@@ -395,23 +408,30 @@ pub fn serialize_expr(
                     expr_type: Some(ExprType::AggregateExpr(Box::new(aggregate_expr))),
                 }
             }
-            AggregateFunctionDefinition::UDF(fun) => protobuf::LogicalExprNode {
-                expr_type: Some(ExprType::AggregateUdfExpr(Box::new(
-                    protobuf::AggregateUdfExprNode {
-                        fun_name: fun.name().to_string(),
-                        args: serialize_exprs(args, codec)?,
-                        distinct: *distinct,
-                        filter: match filter {
-                            Some(e) => Some(Box::new(serialize_expr(e.as_ref(), codec)?)),
-                            None => None,
-                        },
-                        order_by: match order_by {
-                            Some(e) => serialize_exprs(e, codec)?,
-                            None => vec![],
+            AggregateFunctionDefinition::UDF(fun) => {
+                let mut buf = Vec::new();
+                let _ = codec.try_encode_udaf(fun, &mut buf);
+                protobuf::LogicalExprNode {
+                    expr_type: Some(ExprType::AggregateUdfExpr(Box::new(
+                        protobuf::AggregateUdfExprNode {
+                            fun_name: fun.name().to_string(),
+                            args: serialize_exprs(args, codec)?,
+                            distinct: *distinct,
+                            filter: match filter {
+                                Some(e) => {
+                                    Some(Box::new(serialize_expr(e.as_ref(), codec)?))
+                                }
+                                None => None,
+                            },
+                            order_by: match order_by {
+                                Some(e) => serialize_exprs(e, codec)?,
+                                None => vec![],
+                            },
+                            fun_definition: (!buf.is_empty()).then_some(buf),
                         },
-                    },
-                ))),
-            },
+                    ))),
+                }
+            }
         },
 
         Expr::ScalarVariable(_, _) => {
@@ -420,17 +440,13 @@ pub fn serialize_expr(
             ))
         }
         Expr::ScalarFunction(ScalarFunction { func, args }) => {
-            let args = serialize_exprs(args, codec)?;
             let mut buf = Vec::new();
-            let _ = codec.try_encode_udf(func.as_ref(), &mut buf);
-
-            let fun_definition = if buf.is_empty() { None } else { Some(buf) };
-
+            let _ = codec.try_encode_udf(func, &mut buf);
             protobuf::LogicalExprNode {
                 expr_type: Some(ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode {
                     fun_name: func.name().to_string(),
-                    fun_definition,
-                    args,
+                    fun_definition: (!buf.is_empty()).then_some(buf),
+                    args: serialize_exprs(args, codec)?,
                 })),
             }
         }
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index b7311c694d4c9..5ecca51478053 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -164,8 +164,10 @@ pub fn parse_physical_window_expr(
                 WindowFunctionDefinition::BuiltInWindowFunction(f.into())
             }
             protobuf::physical_window_expr_node::WindowFunction::UserDefinedAggrFunction(udaf_name) => {
-                let agg_udf = registry.udaf(udaf_name)?;
-                WindowFunctionDefinition::AggregateUDF(agg_udf)
+                WindowFunctionDefinition::AggregateUDF(match &proto.fun_definition {
+                    Some(buf) => codec.try_decode_udaf(udaf_name, buf)?,
+                    None => registry.udaf(udaf_name)?
+                })
             }
         }
     } else {
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 948a39bfe0be7..1220f42ded836 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -61,7 +61,7 @@ use datafusion::physical_plan::{
     udaf, AggregateExpr, ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr,
 };
 use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result};
-use datafusion_expr::ScalarUDF;
+use datafusion_expr::{AggregateUDF, ScalarUDF};
 
 use crate::common::{byte_to_string, str_to_byte};
 use crate::convert_required;
@@ -491,19 +491,22 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                                                 &ordering_req,
                                                 &physical_schema,
                                                 name.to_string(),
-                                                false,
+                                                agg_node.ignore_nulls,
                                             )
                                         }
                                         AggregateFunction::UserDefinedAggrFunction(udaf_name) => {
-                                            let agg_udf = registry.udaf(udaf_name)?;
+                                            let agg_udf = match &agg_node.fun_definition {
+                                                Some(buf) => extension_codec.try_decode_udaf(udaf_name, buf)?,
+                                                None => registry.udaf(udaf_name)?
+                                            };
+
                                             // TODO: 'logical_exprs' is not supported for UDAF yet.
                                             // approx_percentile_cont and approx_percentile_cont_weight are not supported for UDAF from protobuf yet.
                                             let logical_exprs = &[];
                                             // TODO: `order by` is not supported for UDAF yet
                                             let sort_exprs = &[];
                                             let ordering_req = &[];
-                                            let ignore_nulls = false;
-                                            udaf::create_aggregate_expr(agg_udf.as_ref(), &input_phy_expr, logical_exprs, sort_exprs, ordering_req, &physical_schema, name, ignore_nulls, false)
+                                            udaf::create_aggregate_expr(agg_udf.as_ref(), &input_phy_expr, logical_exprs, sort_exprs, ordering_req, &physical_schema, name, agg_node.ignore_nulls, agg_node.distinct)
                                         }
                                     }
                                 }).transpose()?.ok_or_else(|| {
@@ -2034,6 +2037,16 @@ pub trait PhysicalExtensionCodec: Debug + Send + Sync {
     ) -> Result<()> {
         not_impl_err!("PhysicalExtensionCodec is not provided")
     }
+
+    fn try_decode_udaf(&self, name: &str, _buf: &[u8]) -> Result<Arc<AggregateUDF>> {
+        not_impl_err!(
+            "PhysicalExtensionCodec is not provided for aggregate function {name}"
+        )
+    }
+
+    fn try_encode_udaf(&self, _node: &AggregateUDF, _buf: &mut Vec<u8>) -> Result<()> {
+        Ok(())
+    }
 }
 
 #[derive(Debug)]
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index d8d0291e1ca52..7ea2902cf3c09 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -40,6 +40,7 @@ use datafusion::{
     physical_plan::expressions::LikeExpr,
 };
 use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result};
+use datafusion_expr::WindowFrame;
 
 use crate::protobuf::{
     self, physical_aggregate_expr_node, physical_window_expr_node, PhysicalSortExprNode,
@@ -58,13 +59,17 @@ pub fn serialize_physical_aggr_expr(
 
     if let Some(a) = aggr_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
         let name = a.fun().name().to_string();
+        let mut buf = Vec::new();
+        codec.try_encode_udaf(a.fun(), &mut buf)?;
         return Ok(protobuf::PhysicalExprNode {
             expr_type: Some(protobuf::physical_expr_node::ExprType::AggregateExpr(
                 protobuf::PhysicalAggregateExprNode {
                     aggregate_function: Some(physical_aggregate_expr_node::AggregateFunction::UserDefinedAggrFunction(name)),
                     expr: expressions,
                     ordering_req,
-                    distinct: false,
+                    distinct: a.is_distinct(),
+                    ignore_nulls: a.ignore_nulls(),
+                    fun_definition: (!buf.is_empty()).then_some(buf)
                 },
             )),
         });
@@ -86,11 +91,55 @@ pub fn serialize_physical_aggr_expr(
                 expr: expressions,
                 ordering_req,
                 distinct,
+                ignore_nulls: false,
+                fun_definition: None,
             },
         )),
     })
 }
 
+fn serialize_physical_window_aggr_expr(
+    aggr_expr: &dyn AggregateExpr,
+    window_frame: &WindowFrame,
+    codec: &dyn PhysicalExtensionCodec,
+) -> Result<(physical_window_expr_node::WindowFunction, Option<Vec<u8>>)> {
+    if let Some(a) = aggr_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
+        if a.is_distinct() || a.ignore_nulls() {
+            // TODO
+            return not_impl_err!(
+                "Distinct aggregate functions not supported in window expressions"
+            );
+        }
+
+        let mut buf = Vec::new();
+        codec.try_encode_udaf(a.fun(), &mut buf)?;
+        Ok((
+            physical_window_expr_node::WindowFunction::UserDefinedAggrFunction(
+                a.fun().name().to_string(),
+            ),
+            (!buf.is_empty()).then_some(buf),
+        ))
+    } else {
+        let AggrFn { inner, distinct } = aggr_expr_to_aggr_fn(aggr_expr)?;
+        if distinct {
+            return not_impl_err!(
+                "Distinct aggregate functions not supported in window expressions"
+            );
+        }
+
+        if !window_frame.start_bound.is_unbounded() {
+            return Err(DataFusionError::Internal(format!(
+                "Unbounded start bound in WindowFrame = {window_frame}"
+            )));
+        }
+
+        Ok((
+            physical_window_expr_node::WindowFunction::AggrFunction(inner as i32),
+            None,
+        ))
+    }
+}
+
 pub fn serialize_physical_window_expr(
     window_expr: Arc<dyn WindowExpr>,
     codec: &dyn PhysicalExtensionCodec,
@@ -99,7 +148,7 @@ pub fn serialize_physical_window_expr(
     let mut args = window_expr.expressions().to_vec();
     let window_frame = window_expr.get_window_frame();
 
-    let window_function = if let Some(built_in_window_expr) =
+    let (window_function, fun_definition) = if let Some(built_in_window_expr) =
         expr.downcast_ref::<BuiltInWindowExpr>()
     {
         let expr = built_in_window_expr.get_built_in_func_expr();
@@ -160,58 +209,26 @@ pub fn serialize_physical_window_expr(
             return not_impl_err!("BuiltIn function not supported: {expr:?}");
         };
 
-        physical_window_expr_node::WindowFunction::BuiltInFunction(builtin_fn as i32)
+        (
+            physical_window_expr_node::WindowFunction::BuiltInFunction(builtin_fn as i32),
+            None,
+        )
     } else if let Some(plain_aggr_window_expr) =
         expr.downcast_ref::<PlainAggregateWindowExpr>()
     {
-        let aggr_expr = plain_aggr_window_expr.get_aggregate_expr();
-        if let Some(a) = aggr_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-            physical_window_expr_node::WindowFunction::UserDefinedAggrFunction(
-                a.fun().name().to_string(),
-            )
-        } else {
-            let AggrFn { inner, distinct } = aggr_expr_to_aggr_fn(
-                plain_aggr_window_expr.get_aggregate_expr().as_ref(),
-            )?;
-
-            if distinct {
-                return not_impl_err!(
-                    "Distinct aggregate functions not supported in window expressions"
-                );
-            }
-
-            if !window_frame.start_bound.is_unbounded() {
-                return Err(DataFusionError::Internal(format!("Invalid PlainAggregateWindowExpr = {window_expr:?} with WindowFrame = {window_frame:?}")));
-            }
-
-            physical_window_expr_node::WindowFunction::AggrFunction(inner as i32)
-        }
+        serialize_physical_window_aggr_expr(
+            plain_aggr_window_expr.get_aggregate_expr().as_ref(),
+            window_frame,
+            codec,
+        )?
     } else if let Some(sliding_aggr_window_expr) =
         expr.downcast_ref::<SlidingAggregateWindowExpr>()
     {
-        let aggr_expr = sliding_aggr_window_expr.get_aggregate_expr();
-        if let Some(a) = aggr_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-            physical_window_expr_node::WindowFunction::UserDefinedAggrFunction(
-                a.fun().name().to_string(),
-            )
-        } else {
-            let AggrFn { inner, distinct } = aggr_expr_to_aggr_fn(
-                sliding_aggr_window_expr.get_aggregate_expr().as_ref(),
-            )?;
-
-            if distinct {
-                // TODO
-                return not_impl_err!(
-                    "Distinct aggregate functions not supported in window expressions"
-                );
-            }
-
-            if window_frame.start_bound.is_unbounded() {
-                return Err(DataFusionError::Internal(format!("Invalid SlidingAggregateWindowExpr = {window_expr:?} with WindowFrame = {window_frame:?}")));
-            }
-
-            physical_window_expr_node::WindowFunction::AggrFunction(inner as i32)
-        }
+        serialize_physical_window_aggr_expr(
+            sliding_aggr_window_expr.get_aggregate_expr().as_ref(),
+            window_frame,
+            codec,
+        )?
     } else {
         return not_impl_err!("WindowExpr not supported: {window_expr:?}");
     };
@@ -232,6 +249,7 @@ pub fn serialize_physical_window_expr(
         window_frame: Some(window_frame),
         window_function: Some(window_function),
         name: window_expr.name().to_string(),
+        fun_definition,
     })
 }
 
@@ -461,18 +479,14 @@ pub fn serialize_physical_expr(
             ))),
         })
     } else if let Some(expr) = expr.downcast_ref::<ScalarFunctionExpr>() {
-        let args = serialize_physical_exprs(expr.args().to_vec(), codec)?;
-
         let mut buf = Vec::new();
         codec.try_encode_udf(expr.fun(), &mut buf)?;
-
-        let fun_definition = if buf.is_empty() { None } else { Some(buf) };
         Ok(protobuf::PhysicalExprNode {
             expr_type: Some(protobuf::physical_expr_node::ExprType::ScalarUdf(
                 protobuf::PhysicalScalarUdfNode {
                     name: expr.name().to_string(),
-                    args,
-                    fun_definition,
+                    args: serialize_physical_exprs(expr.args().to_vec(), codec)?,
+                    fun_definition: (!buf.is_empty()).then_some(buf),
                     return_type: Some(expr.return_type().try_into()?),
                 },
             )),
diff --git a/datafusion/proto/tests/cases/mod.rs b/datafusion/proto/tests/cases/mod.rs
index b17289205f3de..1f837b7f42e86 100644
--- a/datafusion/proto/tests/cases/mod.rs
+++ b/datafusion/proto/tests/cases/mod.rs
@@ -15,6 +15,105 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::any::Any;
+
+use arrow::datatypes::DataType;
+
+use datafusion_common::plan_err;
+use datafusion_expr::function::AccumulatorArgs;
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, ColumnarValue, ScalarUDFImpl, Signature, Volatility,
+};
+
 mod roundtrip_logical_plan;
 mod roundtrip_physical_plan;
 mod serialize;
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+struct MyRegexUdf {
+    signature: Signature,
+    // regex as original string
+    pattern: String,
+}
+
+impl MyRegexUdf {
+    fn new(pattern: String) -> Self {
+        let signature = Signature::exact(vec![DataType::Utf8], Volatility::Immutable);
+        Self { signature, pattern }
+    }
+}
+
+/// Implement the ScalarUDFImpl trait for MyRegexUdf
+impl ScalarUDFImpl for MyRegexUdf {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "regex_udf"
+    }
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+    fn return_type(&self, args: &[DataType]) -> datafusion_common::Result<DataType> {
+        if matches!(args, [DataType::Utf8]) {
+            Ok(DataType::Int64)
+        } else {
+            plan_err!("regex_udf only accepts Utf8 arguments")
+        }
+    }
+    fn invoke(
+        &self,
+        _args: &[ColumnarValue],
+    ) -> datafusion_common::Result<ColumnarValue> {
+        unimplemented!()
+    }
+}
+
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct MyRegexUdfNode {
+    #[prost(string, tag = "1")]
+    pub pattern: String,
+}
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+struct MyAggregateUDF {
+    signature: Signature,
+    result: String,
+}
+
+impl MyAggregateUDF {
+    fn new(result: String) -> Self {
+        let signature = Signature::exact(vec![DataType::Int64], Volatility::Immutable);
+        Self { signature, result }
+    }
+}
+
+impl AggregateUDFImpl for MyAggregateUDF {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "aggregate_udf"
+    }
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+    fn return_type(
+        &self,
+        _arg_types: &[DataType],
+    ) -> datafusion_common::Result<DataType> {
+        Ok(DataType::Utf8)
+    }
+    fn accumulator(
+        &self,
+        _acc_args: AccumulatorArgs,
+    ) -> datafusion_common::Result<Box<dyn Accumulator>> {
+        unimplemented!()
+    }
+}
+
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct MyAggregateUdfNode {
+    #[prost(string, tag = "1")]
+    pub result: String,
+}
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index d0209d811b7ce..0117502f400d2 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -28,15 +28,12 @@ use arrow::datatypes::{
     DataType, Field, Fields, Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType,
     IntervalUnit, Schema, SchemaRef, TimeUnit, UnionFields, UnionMode,
 };
+use prost::Message;
+
 use datafusion::datasource::file_format::arrow::ArrowFormatFactory;
 use datafusion::datasource::file_format::csv::CsvFormatFactory;
 use datafusion::datasource::file_format::format_as_file_type;
 use datafusion::datasource::file_format::parquet::ParquetFormatFactory;
-use datafusion_proto::logical_plan::file_formats::{
-    ArrowLogicalExtensionCodec, CsvLogicalExtensionCodec, ParquetLogicalExtensionCodec,
-};
-use prost::Message;
-
 use datafusion::datasource::provider::TableProviderFactory;
 use datafusion::datasource::TableProvider;
 use datafusion::execution::session_state::SessionStateBuilder;
@@ -62,9 +59,9 @@ use datafusion_expr::expr::{
 };
 use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore};
 use datafusion_expr::{
-    Accumulator, AggregateExt, AggregateFunction, ColumnarValue, ExprSchemable,
-    LogicalPlan, Operator, PartitionEvaluator, ScalarUDF, ScalarUDFImpl, Signature,
-    TryCast, Volatility, WindowFrame, WindowFrameBound, WindowFrameUnits,
+    Accumulator, AggregateExt, AggregateFunction, AggregateUDF, ColumnarValue,
+    ExprSchemable, Literal, LogicalPlan, Operator, PartitionEvaluator, ScalarUDF,
+    Signature, TryCast, Volatility, WindowFrame, WindowFrameBound, WindowFrameUnits,
     WindowFunctionDefinition, WindowUDF, WindowUDFImpl,
 };
 use datafusion_functions_aggregate::average::avg_udaf;
@@ -76,12 +73,17 @@ use datafusion_proto::bytes::{
     logical_plan_from_bytes, logical_plan_from_bytes_with_extension_codec,
     logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec,
 };
+use datafusion_proto::logical_plan::file_formats::{
+    ArrowLogicalExtensionCodec, CsvLogicalExtensionCodec, ParquetLogicalExtensionCodec,
+};
 use datafusion_proto::logical_plan::to_proto::serialize_expr;
 use datafusion_proto::logical_plan::{
     from_proto, DefaultLogicalExtensionCodec, LogicalExtensionCodec,
 };
 use datafusion_proto::protobuf;
 
+use crate::cases::{MyAggregateUDF, MyAggregateUdfNode, MyRegexUdf, MyRegexUdfNode};
+
 #[cfg(feature = "json")]
 fn roundtrip_json_test(proto: &protobuf::LogicalExprNode) {
     let string = serde_json::to_string(proto).unwrap();
@@ -744,7 +746,7 @@ pub mod proto {
         pub k: u64,
 
         #[prost(message, optional, tag = "2")]
-        pub expr: ::core::option::Option<datafusion_proto::protobuf::LogicalExprNode>,
+        pub expr: Option<datafusion_proto::protobuf::LogicalExprNode>,
     }
 
     #[derive(Clone, PartialEq, Eq, ::prost::Message)]
@@ -752,12 +754,6 @@ pub mod proto {
         #[prost(uint64, tag = "1")]
         pub k: u64,
     }
-
-    #[derive(Clone, PartialEq, ::prost::Message)]
-    pub struct MyRegexUdfNode {
-        #[prost(string, tag = "1")]
-        pub pattern: String,
-    }
 }
 
 #[derive(PartialEq, Eq, Hash)]
@@ -890,51 +886,9 @@ impl LogicalExtensionCodec for TopKExtensionCodec {
 }
 
 #[derive(Debug)]
-struct MyRegexUdf {
-    signature: Signature,
-    // regex as original string
-    pattern: String,
-}
-
-impl MyRegexUdf {
-    fn new(pattern: String) -> Self {
-        Self {
-            signature: Signature::uniform(
-                1,
-                vec![DataType::Int32],
-                Volatility::Immutable,
-            ),
-            pattern,
-        }
-    }
-}
-
-/// Implement the ScalarUDFImpl trait for MyRegexUdf
-impl ScalarUDFImpl for MyRegexUdf {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-    fn name(&self) -> &str {
-        "regex_udf"
-    }
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-    fn return_type(&self, args: &[DataType]) -> Result<DataType> {
-        if !matches!(args.first(), Some(&DataType::Utf8)) {
-            return plan_err!("regex_udf only accepts Utf8 arguments");
-        }
-        Ok(DataType::Int32)
-    }
-    fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        unimplemented!()
-    }
-}
-
-#[derive(Debug)]
-pub struct ScalarUDFExtensionCodec {}
+pub struct UDFExtensionCodec;
 
-impl LogicalExtensionCodec for ScalarUDFExtensionCodec {
+impl LogicalExtensionCodec for UDFExtensionCodec {
     fn try_decode(
         &self,
         _buf: &[u8],
@@ -969,13 +923,11 @@ impl LogicalExtensionCodec for ScalarUDFExtensionCodec {
 
     fn try_decode_udf(&self, name: &str, buf: &[u8]) -> Result<Arc<ScalarUDF>> {
         if name == "regex_udf" {
-            let proto = proto::MyRegexUdfNode::decode(buf).map_err(|err| {
-                DataFusionError::Internal(format!("failed to decode regex_udf: {}", err))
+            let proto = MyRegexUdfNode::decode(buf).map_err(|err| {
+                DataFusionError::Internal(format!("failed to decode regex_udf: {err}"))
             })?;
 
-            Ok(Arc::new(ScalarUDF::new_from_impl(MyRegexUdf::new(
-                proto.pattern,
-            ))))
+            Ok(Arc::new(ScalarUDF::from(MyRegexUdf::new(proto.pattern))))
         } else {
             not_impl_err!("unrecognized scalar UDF implementation, cannot decode")
         }
@@ -984,11 +936,39 @@ impl LogicalExtensionCodec for ScalarUDFExtensionCodec {
     fn try_encode_udf(&self, node: &ScalarUDF, buf: &mut Vec<u8>) -> Result<()> {
         let binding = node.inner();
         let udf = binding.as_any().downcast_ref::<MyRegexUdf>().unwrap();
-        let proto = proto::MyRegexUdfNode {
+        let proto = MyRegexUdfNode {
             pattern: udf.pattern.clone(),
         };
-        proto.encode(buf).map_err(|e| {
-            DataFusionError::Internal(format!("failed to encode udf: {e:?}"))
+        proto.encode(buf).map_err(|err| {
+            DataFusionError::Internal(format!("failed to encode udf: {err}"))
+        })?;
+        Ok(())
+    }
+
+    fn try_decode_udaf(&self, name: &str, buf: &[u8]) -> Result<Arc<AggregateUDF>> {
+        if name == "aggregate_udf" {
+            let proto = MyAggregateUdfNode::decode(buf).map_err(|err| {
+                DataFusionError::Internal(format!(
+                    "failed to decode aggregate_udf: {err}"
+                ))
+            })?;
+
+            Ok(Arc::new(AggregateUDF::from(MyAggregateUDF::new(
+                proto.result,
+            ))))
+        } else {
+            not_impl_err!("unrecognized aggregate UDF implementation, cannot decode")
+        }
+    }
+
+    fn try_encode_udaf(&self, node: &AggregateUDF, buf: &mut Vec<u8>) -> Result<()> {
+        let binding = node.inner();
+        let udf = binding.as_any().downcast_ref::<MyAggregateUDF>().unwrap();
+        let proto = MyAggregateUdfNode {
+            result: udf.result.clone(),
+        };
+        proto.encode(buf).map_err(|err| {
+            DataFusionError::Internal(format!("failed to encode udf: {err}"))
         })?;
         Ok(())
     }
@@ -1563,8 +1543,7 @@ fn roundtrip_null_scalar_values() {
 
     for test_case in test_types.into_iter() {
         let proto_scalar: protobuf::ScalarValue = (&test_case).try_into().unwrap();
-        let returned_scalar: datafusion::scalar::ScalarValue =
-            (&proto_scalar).try_into().unwrap();
+        let returned_scalar: ScalarValue = (&proto_scalar).try_into().unwrap();
         assert_eq!(format!("{:?}", &test_case), format!("{returned_scalar:?}"));
     }
 }
@@ -1893,22 +1872,19 @@ fn roundtrip_aggregate_udf() {
     struct Dummy {}
 
     impl Accumulator for Dummy {
-        fn state(&mut self) -> datafusion::error::Result<Vec<ScalarValue>> {
+        fn state(&mut self) -> Result<Vec<ScalarValue>> {
             Ok(vec![])
         }
 
-        fn update_batch(
-            &mut self,
-            _values: &[ArrayRef],
-        ) -> datafusion::error::Result<()> {
+        fn update_batch(&mut self, _values: &[ArrayRef]) -> Result<()> {
             Ok(())
         }
 
-        fn merge_batch(&mut self, _states: &[ArrayRef]) -> datafusion::error::Result<()> {
+        fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
             Ok(())
         }
 
-        fn evaluate(&mut self) -> datafusion::error::Result<ScalarValue> {
+        fn evaluate(&mut self) -> Result<ScalarValue> {
             Ok(ScalarValue::Float64(None))
         }
 
@@ -1976,25 +1952,27 @@ fn roundtrip_scalar_udf() {
 
 #[test]
 fn roundtrip_scalar_udf_extension_codec() {
-    let pattern = ".*";
-    let udf = ScalarUDF::from(MyRegexUdf::new(pattern.to_string()));
-    let test_expr =
-        Expr::ScalarFunction(ScalarFunction::new_udf(Arc::new(udf.clone()), vec![]));
-
+    let udf = ScalarUDF::from(MyRegexUdf::new(".*".to_owned()));
+    let test_expr = udf.call(vec!["foo".lit()]);
     let ctx = SessionContext::new();
-    ctx.register_udf(udf);
-
-    let extension_codec = ScalarUDFExtensionCodec {};
-    let proto: protobuf::LogicalExprNode =
-        match serialize_expr(&test_expr, &extension_codec) {
-            Ok(p) => p,
-            Err(e) => panic!("Error serializing expression: {:?}", e),
-        };
-    let round_trip: Expr =
-        from_proto::parse_expr(&proto, &ctx, &extension_codec).unwrap();
+    let proto = serialize_expr(&test_expr, &UDFExtensionCodec).expect("serialize expr");
+    let round_trip =
+        from_proto::parse_expr(&proto, &ctx, &UDFExtensionCodec).expect("parse expr");
 
     assert_eq!(format!("{:?}", &test_expr), format!("{round_trip:?}"));
+    roundtrip_json_test(&proto);
+}
+
+#[test]
+fn roundtrip_aggregate_udf_extension_codec() {
+    let udf = AggregateUDF::from(MyAggregateUDF::new("DataFusion".to_owned()));
+    let test_expr = udf.call(vec![42.lit()]);
+    let ctx = SessionContext::new();
+    let proto = serialize_expr(&test_expr, &UDFExtensionCodec).expect("serialize expr");
+    let round_trip =
+        from_proto::parse_expr(&proto, &ctx, &UDFExtensionCodec).expect("parse expr");
 
+    assert_eq!(format!("{:?}", &test_expr), format!("{round_trip:?}"));
     roundtrip_json_test(&proto);
 }
 
@@ -2120,22 +2098,19 @@ fn roundtrip_window() {
     struct DummyAggr {}
 
     impl Accumulator for DummyAggr {
-        fn state(&mut self) -> datafusion::error::Result<Vec<ScalarValue>> {
+        fn state(&mut self) -> Result<Vec<ScalarValue>> {
             Ok(vec![])
         }
 
-        fn update_batch(
-            &mut self,
-            _values: &[ArrayRef],
-        ) -> datafusion::error::Result<()> {
+        fn update_batch(&mut self, _values: &[ArrayRef]) -> Result<()> {
             Ok(())
         }
 
-        fn merge_batch(&mut self, _states: &[ArrayRef]) -> datafusion::error::Result<()> {
+        fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
             Ok(())
         }
 
-        fn evaluate(&mut self) -> datafusion::error::Result<ScalarValue> {
+        fn evaluate(&mut self) -> Result<ScalarValue> {
             Ok(ScalarValue::Float64(None))
         }
 
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 2fcc65008fd8f..fba6dfe425996 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::RecordBatch;
 use std::any::Any;
 use std::fmt::Display;
 use std::hash::Hasher;
@@ -23,8 +22,8 @@ use std::ops::Deref;
 use std::sync::Arc;
 use std::vec;
 
+use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
-use datafusion::functions_aggregate::sum::sum_udaf;
 use prost::Message;
 
 use datafusion::arrow::array::ArrayRef;
@@ -40,9 +39,10 @@ use datafusion::datasource::physical_plan::{
     FileSinkConfig, ParquetExec,
 };
 use datafusion::execution::FunctionRegistry;
+use datafusion::functions_aggregate::sum::sum_udaf;
 use datafusion::logical_expr::{create_udf, JoinType, Operator, Volatility};
 use datafusion::physical_expr::aggregate::utils::down_cast_any_ref;
-use datafusion::physical_expr::expressions::Max;
+use datafusion::physical_expr::expressions::{Literal, Max};
 use datafusion::physical_expr::window::SlidingAggregateWindowExpr;
 use datafusion::physical_expr::{PhysicalSortRequirement, ScalarFunctionExpr};
 use datafusion::physical_plan::aggregates::{
@@ -70,7 +70,7 @@ use datafusion::physical_plan::windows::{
     BuiltInWindowExpr, PlainAggregateWindowExpr, WindowAggExec,
 };
 use datafusion::physical_plan::{
-    udaf, AggregateExpr, ExecutionPlan, Partitioning, PhysicalExpr, Statistics,
+    AggregateExpr, ExecutionPlan, Partitioning, PhysicalExpr, Statistics,
 };
 use datafusion::prelude::SessionContext;
 use datafusion::scalar::ScalarValue;
@@ -79,10 +79,10 @@ use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::file_options::json_writer::JsonWriterOptions;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::stats::Precision;
-use datafusion_common::{internal_err, not_impl_err, plan_err, DataFusionError, Result};
+use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result};
 use datafusion_expr::{
     Accumulator, AccumulatorFactoryFunction, AggregateUDF, ColumnarValue, ScalarUDF,
-    ScalarUDFImpl, Signature, SimpleAggregateUDF, WindowFrame, WindowFrameBound,
+    Signature, SimpleAggregateUDF, WindowFrame, WindowFrameBound,
 };
 use datafusion_functions_aggregate::average::avg_udaf;
 use datafusion_functions_aggregate::nth_value::nth_value_udaf;
@@ -92,6 +92,8 @@ use datafusion_proto::physical_plan::{
 };
 use datafusion_proto::protobuf;
 
+use crate::cases::{MyAggregateUDF, MyAggregateUdfNode, MyRegexUdf, MyRegexUdfNode};
+
 /// Perform a serde roundtrip and assert that the string representation of the before and after plans
 /// are identical. Note that this often isn't sufficient to guarantee that no information is
 /// lost during serde because the string representation of a plan often only shows a subset of state.
@@ -312,7 +314,7 @@ fn roundtrip_window() -> Result<()> {
     );
 
     let args = vec![cast(col("a", &schema)?, &schema, DataType::Float64)?];
-    let sum_expr = udaf::create_aggregate_expr(
+    let sum_expr = create_aggregate_expr(
         &sum_udaf(),
         &args,
         &[],
@@ -367,7 +369,7 @@ fn rountrip_aggregate() -> Result<()> {
             false,
         )?],
         // NTH_VALUE
-        vec![udaf::create_aggregate_expr(
+        vec![create_aggregate_expr(
             &nth_value_udaf(),
             &[col("b", &schema)?, lit(1u64)],
             &[],
@@ -379,7 +381,7 @@ fn rountrip_aggregate() -> Result<()> {
             false,
         )?],
         // STRING_AGG
-        vec![udaf::create_aggregate_expr(
+        vec![create_aggregate_expr(
             &AggregateUDF::new_from_impl(StringAgg::new()),
             &[
                 cast(col("b", &schema)?, &schema, DataType::Utf8)?,
@@ -490,7 +492,7 @@ fn roundtrip_aggregate_udaf() -> Result<()> {
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
 
-    let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![udaf::create_aggregate_expr(
+    let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![create_aggregate_expr(
         &udaf,
         &[col("b", &schema)?],
         &[],
@@ -845,123 +847,161 @@ fn roundtrip_scalar_udf() -> Result<()> {
     roundtrip_test_with_context(Arc::new(project), &ctx)
 }
 
-#[test]
-fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
-    #[derive(Debug)]
-    struct MyRegexUdf {
-        signature: Signature,
-        // regex as original string
-        pattern: String,
+#[derive(Debug)]
+struct UDFExtensionCodec;
+
+impl PhysicalExtensionCodec for UDFExtensionCodec {
+    fn try_decode(
+        &self,
+        _buf: &[u8],
+        _inputs: &[Arc<dyn ExecutionPlan>],
+        _registry: &dyn FunctionRegistry,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        not_impl_err!("No extension codec provided")
     }
 
-    impl MyRegexUdf {
-        fn new(pattern: String) -> Self {
-            Self {
-                signature: Signature::exact(vec![DataType::Utf8], Volatility::Immutable),
-                pattern,
-            }
-        }
+    fn try_encode(
+        &self,
+        _node: Arc<dyn ExecutionPlan>,
+        _buf: &mut Vec<u8>,
+    ) -> Result<()> {
+        not_impl_err!("No extension codec provided")
     }
 
-    /// Implement the ScalarUDFImpl trait for MyRegexUdf
-    impl ScalarUDFImpl for MyRegexUdf {
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
+    fn try_decode_udf(&self, name: &str, buf: &[u8]) -> Result<Arc<ScalarUDF>> {
+        if name == "regex_udf" {
+            let proto = MyRegexUdfNode::decode(buf).map_err(|err| {
+                DataFusionError::Internal(format!("failed to decode regex_udf: {err}"))
+            })?;
 
-        fn name(&self) -> &str {
-            "regex_udf"
+            Ok(Arc::new(ScalarUDF::from(MyRegexUdf::new(proto.pattern))))
+        } else {
+            not_impl_err!("unrecognized scalar UDF implementation, cannot decode")
         }
+    }
 
-        fn signature(&self) -> &Signature {
-            &self.signature
+    fn try_encode_udf(&self, node: &ScalarUDF, buf: &mut Vec<u8>) -> Result<()> {
+        let binding = node.inner();
+        if let Some(udf) = binding.as_any().downcast_ref::<MyRegexUdf>() {
+            let proto = MyRegexUdfNode {
+                pattern: udf.pattern.clone(),
+            };
+            proto.encode(buf).map_err(|err| {
+                DataFusionError::Internal(format!("failed to encode udf: {err}"))
+            })?;
         }
+        Ok(())
+    }
 
-        fn return_type(&self, args: &[DataType]) -> Result<DataType> {
-            if !matches!(args.first(), Some(&DataType::Utf8)) {
-                return plan_err!("regex_udf only accepts Utf8 arguments");
-            }
-            Ok(DataType::Int64)
+    fn try_decode_udaf(&self, name: &str, buf: &[u8]) -> Result<Arc<AggregateUDF>> {
+        if name == "aggregate_udf" {
+            let proto = MyAggregateUdfNode::decode(buf).map_err(|err| {
+                DataFusionError::Internal(format!(
+                    "failed to decode aggregate_udf: {err}"
+                ))
+            })?;
+
+            Ok(Arc::new(AggregateUDF::from(MyAggregateUDF::new(
+                proto.result,
+            ))))
+        } else {
+            not_impl_err!("unrecognized scalar UDF implementation, cannot decode")
         }
+    }
 
-        fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
-            unimplemented!()
+    fn try_encode_udaf(&self, node: &AggregateUDF, buf: &mut Vec<u8>) -> Result<()> {
+        let binding = node.inner();
+        if let Some(udf) = binding.as_any().downcast_ref::<MyAggregateUDF>() {
+            let proto = MyAggregateUdfNode {
+                result: udf.result.clone(),
+            };
+            proto.encode(buf).map_err(|err| {
+                DataFusionError::Internal(format!("failed to encode udf: {err:?}"))
+            })?;
         }
+        Ok(())
     }
+}
 
-    #[derive(Clone, PartialEq, ::prost::Message)]
-    pub struct MyRegexUdfNode {
-        #[prost(string, tag = "1")]
-        pub pattern: String,
-    }
+#[test]
+fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
+    let field_text = Field::new("text", DataType::Utf8, true);
+    let field_published = Field::new("published", DataType::Boolean, false);
+    let field_author = Field::new("author", DataType::Utf8, false);
+    let schema = Arc::new(Schema::new(vec![field_text, field_published, field_author]));
+    let input = Arc::new(EmptyExec::new(schema.clone()));
 
-    #[derive(Debug)]
-    pub struct ScalarUDFExtensionCodec {}
+    let udf_expr = Arc::new(ScalarFunctionExpr::new(
+        "regex_udf",
+        Arc::new(ScalarUDF::from(MyRegexUdf::new(".*".to_string()))),
+        vec![col("text", &schema)?],
+        DataType::Int64,
+    ));
 
-    impl PhysicalExtensionCodec for ScalarUDFExtensionCodec {
-        fn try_decode(
-            &self,
-            _buf: &[u8],
-            _inputs: &[Arc<dyn ExecutionPlan>],
-            _registry: &dyn FunctionRegistry,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            not_impl_err!("No extension codec provided")
-        }
+    let filter = Arc::new(FilterExec::try_new(
+        Arc::new(BinaryExpr::new(
+            col("published", &schema)?,
+            Operator::And,
+            Arc::new(BinaryExpr::new(udf_expr.clone(), Operator::Gt, lit(0))),
+        )),
+        input,
+    )?);
 
-        fn try_encode(
-            &self,
-            _node: Arc<dyn ExecutionPlan>,
-            _buf: &mut Vec<u8>,
-        ) -> Result<()> {
-            not_impl_err!("No extension codec provided")
-        }
+    let window = Arc::new(WindowAggExec::try_new(
+        vec![Arc::new(PlainAggregateWindowExpr::new(
+            Arc::new(Max::new(udf_expr.clone(), "max", DataType::Int64)),
+            &[col("author", &schema)?],
+            &[],
+            Arc::new(WindowFrame::new(None)),
+        ))],
+        filter,
+        vec![col("author", &schema)?],
+    )?);
 
-        fn try_decode_udf(&self, name: &str, buf: &[u8]) -> Result<Arc<ScalarUDF>> {
-            if name == "regex_udf" {
-                let proto = MyRegexUdfNode::decode(buf).map_err(|err| {
-                    DataFusionError::Internal(format!(
-                        "failed to decode regex_udf: {}",
-                        err
-                    ))
-                })?;
-
-                Ok(Arc::new(ScalarUDF::new_from_impl(MyRegexUdf::new(
-                    proto.pattern,
-                ))))
-            } else {
-                not_impl_err!("unrecognized scalar UDF implementation, cannot decode")
-            }
-        }
+    let aggregate = Arc::new(AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::new(vec![], vec![], vec![]),
+        vec![Arc::new(Max::new(udf_expr, "max", DataType::Int64))],
+        vec![None],
+        window,
+        schema.clone(),
+    )?);
 
-        fn try_encode_udf(&self, node: &ScalarUDF, buf: &mut Vec<u8>) -> Result<()> {
-            let binding = node.inner();
-            if let Some(udf) = binding.as_any().downcast_ref::<MyRegexUdf>() {
-                let proto = MyRegexUdfNode {
-                    pattern: udf.pattern.clone(),
-                };
-                proto.encode(buf).map_err(|e| {
-                    DataFusionError::Internal(format!("failed to encode udf: {e:?}"))
-                })?;
-            }
-            Ok(())
-        }
-    }
+    let ctx = SessionContext::new();
+    roundtrip_test_and_return(aggregate, &ctx, &UDFExtensionCodec)?;
+    Ok(())
+}
 
+#[test]
+fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
     let field_text = Field::new("text", DataType::Utf8, true);
     let field_published = Field::new("published", DataType::Boolean, false);
     let field_author = Field::new("author", DataType::Utf8, false);
     let schema = Arc::new(Schema::new(vec![field_text, field_published, field_author]));
     let input = Arc::new(EmptyExec::new(schema.clone()));
 
-    let pattern = ".*";
-    let udf = ScalarUDF::from(MyRegexUdf::new(pattern.to_string()));
     let udf_expr = Arc::new(ScalarFunctionExpr::new(
-        udf.name(),
-        Arc::new(udf.clone()),
+        "regex_udf",
+        Arc::new(ScalarUDF::from(MyRegexUdf::new(".*".to_string()))),
         vec![col("text", &schema)?],
         DataType::Int64,
     ));
 
+    let udaf = AggregateUDF::from(MyAggregateUDF::new("result".to_string()));
+    let aggr_args: [Arc<dyn PhysicalExpr>; 1] =
+        [Arc::new(Literal::new(ScalarValue::from(42)))];
+    let aggr_expr = create_aggregate_expr(
+        &udaf,
+        &aggr_args,
+        &[],
+        &[],
+        &[],
+        &schema,
+        "aggregate_udf",
+        false,
+        false,
+    )?;
+
     let filter = Arc::new(FilterExec::try_new(
         Arc::new(BinaryExpr::new(
             col("published", &schema)?,
@@ -973,7 +1013,7 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
 
     let window = Arc::new(WindowAggExec::try_new(
         vec![Arc::new(PlainAggregateWindowExpr::new(
-            Arc::new(Max::new(udf_expr.clone(), "max", DataType::Int64)),
+            aggr_expr,
             &[col("author", &schema)?],
             &[],
             Arc::new(WindowFrame::new(None)),
@@ -982,18 +1022,29 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
         vec![col("author", &schema)?],
     )?);
 
+    let aggr_expr = create_aggregate_expr(
+        &udaf,
+        &aggr_args,
+        &[],
+        &[],
+        &[],
+        &schema,
+        "aggregate_udf",
+        true,
+        true,
+    )?;
+
     let aggregate = Arc::new(AggregateExec::try_new(
         AggregateMode::Final,
         PhysicalGroupBy::new(vec![], vec![], vec![]),
-        vec![Arc::new(Max::new(udf_expr, "max", DataType::Int64))],
+        vec![aggr_expr],
         vec![None],
         window,
         schema.clone(),
     )?);
 
     let ctx = SessionContext::new();
-    let codec = ScalarUDFExtensionCodec {};
-    roundtrip_test_and_return(aggregate, &ctx, &codec)?;
+    roundtrip_test_and_return(aggregate, &ctx, &UDFExtensionCodec)?;
     Ok(())
 }
 

From a979f3e5d4745edf31a489185e6dda5008e6e628 Mon Sep 17 00:00:00 2001
From: JasonLi <lijingxuan92@126.com>
Date: Wed, 17 Jul 2024 09:32:36 +0800
Subject: [PATCH 069/357] feat: support `unnest` in GROUP BY clause (#11469)

* feat: support group by unnest

* pass slt

* refactor: mv process_group_by_unnest into try_process_unnest

* chore: add some documentation comments and tests

* Avoid cloning input

* use consistent field names

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/sql/src/select.rs                  | 118 ++++++++++++++-
 datafusion/sqllogictest/test_files/unnest.slt | 134 +++++++++++++++++-
 2 files changed, 249 insertions(+), 3 deletions(-)

diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index a5891e655a052..84b80c311245c 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -26,18 +26,20 @@ use crate::utils::{
     resolve_columns, resolve_positions_to_exprs, transform_bottom_unnest,
 };
 
+use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
 use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
 use datafusion_common::{Column, UnnestOptions};
 use datafusion_expr::expr::Alias;
 use datafusion_expr::expr_rewriter::{
     normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_cols,
 };
+use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::utils::{
     expand_qualified_wildcard, expand_wildcard, expr_as_column_expr, expr_to_columns,
     find_aggregate_exprs, find_window_exprs,
 };
 use datafusion_expr::{
-    Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning,
+    Aggregate, Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning,
 };
 use sqlparser::ast::{
     Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, OrderByExpr,
@@ -297,6 +299,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         input: LogicalPlan,
         select_exprs: Vec<Expr>,
     ) -> Result<LogicalPlan> {
+        // Try process group by unnest
+        let input = self.try_process_aggregate_unnest(input)?;
+
         let mut intermediate_plan = input;
         let mut intermediate_select_exprs = select_exprs;
         // Each expr in select_exprs can contains multiple unnest stage
@@ -354,6 +359,117 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             .build()
     }
 
+    fn try_process_aggregate_unnest(&self, input: LogicalPlan) -> Result<LogicalPlan> {
+        match input {
+            LogicalPlan::Aggregate(agg) => {
+                let agg_expr = agg.aggr_expr.clone();
+                let (new_input, new_group_by_exprs) =
+                    self.try_process_group_by_unnest(agg)?;
+                LogicalPlanBuilder::from(new_input)
+                    .aggregate(new_group_by_exprs, agg_expr)?
+                    .build()
+            }
+            LogicalPlan::Filter(mut filter) => {
+                filter.input = Arc::new(
+                    self.try_process_aggregate_unnest(unwrap_arc(filter.input))?,
+                );
+                Ok(LogicalPlan::Filter(filter))
+            }
+            _ => Ok(input),
+        }
+    }
+
+    /// Try converting Unnest(Expr) of group by to Unnest/Projection
+    /// Return the new input and group_by_exprs of Aggregate.
+    fn try_process_group_by_unnest(
+        &self,
+        agg: Aggregate,
+    ) -> Result<(LogicalPlan, Vec<Expr>)> {
+        let mut aggr_expr_using_columns: Option<HashSet<Expr>> = None;
+
+        let Aggregate {
+            input,
+            group_expr,
+            aggr_expr,
+            ..
+        } = agg;
+
+        // process unnest of group_by_exprs, and input of agg will be rewritten
+        // for example:
+        //
+        // ```
+        // Aggregate: groupBy=[[UNNEST(Column(Column { relation: Some(Bare { table: "tab" }), name: "array_col" }))]], aggr=[[]]
+        //   TableScan: tab
+        // ```
+        //
+        // will be transformed into
+        //
+        // ```
+        // Aggregate: groupBy=[[unnest(tab.array_col)]], aggr=[[]]
+        //   Unnest: lists[unnest(tab.array_col)] structs[]
+        //     Projection: tab.array_col AS unnest(tab.array_col)
+        //       TableScan: tab
+        // ```
+        let mut intermediate_plan = unwrap_arc(input);
+        let mut intermediate_select_exprs = group_expr;
+
+        loop {
+            let mut unnest_columns = vec![];
+            let mut inner_projection_exprs = vec![];
+
+            let outer_projection_exprs: Vec<Expr> = intermediate_select_exprs
+                .iter()
+                .map(|expr| {
+                    transform_bottom_unnest(
+                        &intermediate_plan,
+                        &mut unnest_columns,
+                        &mut inner_projection_exprs,
+                        expr,
+                    )
+                })
+                .collect::<Result<Vec<_>>>()?
+                .into_iter()
+                .flatten()
+                .collect();
+
+            if unnest_columns.is_empty() {
+                break;
+            } else {
+                let columns = unnest_columns.into_iter().map(|col| col.into()).collect();
+                let unnest_options = UnnestOptions::new().with_preserve_nulls(false);
+
+                let mut projection_exprs = match &aggr_expr_using_columns {
+                    Some(exprs) => (*exprs).clone(),
+                    None => {
+                        let mut columns = HashSet::new();
+                        for expr in &aggr_expr {
+                            expr.apply(|expr| {
+                                if let Expr::Column(c) = expr {
+                                    columns.insert(Expr::Column(c.clone()));
+                                }
+                                Ok(TreeNodeRecursion::Continue)
+                            })
+                            // As the closure always returns Ok, this "can't" error
+                            .expect("Unexpected error");
+                        }
+                        aggr_expr_using_columns = Some(columns.clone());
+                        columns
+                    }
+                };
+                projection_exprs.extend(inner_projection_exprs);
+
+                intermediate_plan = LogicalPlanBuilder::from(intermediate_plan)
+                    .project(projection_exprs)?
+                    .unnest_columns_with_options(columns, unnest_options)?
+                    .build()?;
+
+                intermediate_select_exprs = outer_projection_exprs;
+            }
+        }
+
+        Ok((intermediate_plan, intermediate_select_exprs))
+    }
+
     fn plan_selection(
         &self,
         selection: Option<SQLExpr>,
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 698faf87c9b20..93146541e107b 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -500,8 +500,6 @@ select unnest(column1) from (select * from (values([1,2,3]), ([4,5,6])) limit 1
 query error DataFusion error: Error during planning: Projections require unique expression names but the expression "UNNEST\(Column\(Column \{ relation: Some\(Bare \{ table: "unnest_table" \}\), name: "column1" \}\)\)" at position 0 and "UNNEST\(Column\(Column \{ relation: Some\(Bare \{ table: "unnest_table" \}\), name: "column1" \}\)\)" at position 1 have the same name. Consider aliasing \("AS"\) one of them.
 select unnest(column1), unnest(column1) from unnest_table;
 
-statement ok
-drop table unnest_table;
 
 ## unnest list followed by unnest struct
 query ???
@@ -557,3 +555,135 @@ physical_plan
 06)----------UnnestExec
 07)------------ProjectionExec: expr=[column3@0 as unnest(recursive_unnest_table.column3), column3@0 as column3]
 08)--------------MemoryExec: partitions=1, partition_sizes=[1]
+
+## group by unnest
+
+### without agg exprs
+query I
+select unnest(column1) c1 from unnest_table group by c1 order by c1;
+----
+1
+2
+3
+4
+5
+6
+12
+
+query II
+select unnest(column1) c1, unnest(column2) c2 from unnest_table group by c1, c2 order by c1, c2;
+----
+1 7
+2 NULL
+3 NULL
+4 8
+5 9
+6 11
+12 NULL
+NULL 10
+NULL 12
+NULL 42
+NULL NULL
+
+query III
+select unnest(column1) c1, unnest(column2) c2, column3 c3 from unnest_table group by c1, c2, c3 order by c1, c2, c3;
+----
+1 7 1
+2 NULL 1
+3 NULL 1
+4 8 2
+5 9 2
+6 11 3
+12 NULL NULL
+NULL 10 2
+NULL 12 3
+NULL 42 NULL
+NULL NULL NULL
+
+### with agg exprs
+
+query IIII
+select unnest(column1) c1, unnest(column2) c2, column3 c3, count(1) from unnest_table group by c1, c2, c3 order by c1, c2, c3;
+----
+1 7 1 1
+2 NULL 1 1
+3 NULL 1 1
+4 8 2 1
+5 9 2 1
+6 11 3 1
+12 NULL NULL 1
+NULL 10 2 1
+NULL 12 3 1
+NULL 42 NULL 1
+NULL NULL NULL 1
+
+query IIII
+select unnest(column1) c1, unnest(column2) c2, column3 c3, count(column4) from unnest_table group by c1, c2, c3 order by c1, c2, c3;
+----
+1 7 1 1
+2 NULL 1 1
+3 NULL 1 1
+4 8 2 1
+5 9 2 1
+6 11 3 0
+12 NULL NULL 0
+NULL 10 2 1
+NULL 12 3 0
+NULL 42 NULL 0
+NULL NULL NULL 0
+
+query IIIII
+select unnest(column1) c1, unnest(column2) c2, column3 c3, count(column4), sum(column3) from unnest_table group by c1, c2, c3 order by c1, c2, c3;
+----
+1 7 1 1 1
+2 NULL 1 1 1
+3 NULL 1 1 1
+4 8 2 1 2
+5 9 2 1 2
+6 11 3 0 3
+12 NULL NULL 0 NULL
+NULL 10 2 1 2
+NULL 12 3 0 3
+NULL 42 NULL 0 NULL
+NULL NULL NULL 0 NULL
+
+query II
+select unnest(column1), count(*) from unnest_table group by unnest(column1) order by unnest(column1) desc;
+----
+12 1
+6 1
+5 1
+4 1
+3 1
+2 1
+1 1
+
+### group by recursive unnest list
+
+query ?
+select unnest(unnest(column2)) c2 from recursive_unnest_table group by c2 order by c2;
+----
+[1]
+[1, 1]
+[2]
+[3, 4]
+[5]
+[7, 8]
+[, 6]
+NULL
+
+query ?I
+select unnest(unnest(column2)) c2, count(column3) from recursive_unnest_table group by c2 order by c2;
+----
+[1] 1
+[1, 1] 1
+[2] 1
+[3, 4] 1
+[5] 1
+[7, 8] 1
+[, 6] 1
+NULL 1
+
+### TODO: group by unnest struct
+query error DataFusion error: Error during planning: Projection references non\-aggregate values
+select unnest(column1) c1 from nested_unnest_table group by c1.c0;

From d67b0fbf52a2c428399811fabac3eec6cf15da41 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Wed, 17 Jul 2024 13:34:07 +0800
Subject: [PATCH 070/357] Remove element's nullability of array_agg function
 (#11447)

* rm null

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/core/tests/sql/aggregates.rs       |  2 +-
 .../physical-expr/src/aggregate/array_agg.rs  | 23 +++---------
 .../src/aggregate/array_agg_distinct.rs       | 23 +++---------
 .../src/aggregate/array_agg_ordered.rs        | 37 +++++--------------
 .../physical-expr/src/aggregate/build_in.rs   | 12 +-----
 .../physical-plan/src/aggregates/mod.rs       |  1 -
 6 files changed, 23 insertions(+), 75 deletions(-)

diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
index 86032dc9bc963..1f4f9e77d5dc5 100644
--- a/datafusion/core/tests/sql/aggregates.rs
+++ b/datafusion/core/tests/sql/aggregates.rs
@@ -36,7 +36,7 @@ async fn csv_query_array_agg_distinct() -> Result<()> {
         *actual[0].schema(),
         Schema::new(vec![Field::new_list(
             "ARRAY_AGG(DISTINCT aggregate_test_100.c2)",
-            Field::new("item", DataType::UInt32, false),
+            Field::new("item", DataType::UInt32, true),
             true
         ),])
     );
diff --git a/datafusion/physical-expr/src/aggregate/array_agg.rs b/datafusion/physical-expr/src/aggregate/array_agg.rs
index 38a9738029335..0d5ed730e2834 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg.rs
@@ -24,7 +24,7 @@ use arrow::array::ArrayRef;
 use arrow::datatypes::{DataType, Field};
 use arrow_array::Array;
 use datafusion_common::cast::as_list_array;
-use datafusion_common::utils::array_into_list_array;
+use datafusion_common::utils::array_into_list_array_nullable;
 use datafusion_common::Result;
 use datafusion_common::ScalarValue;
 use datafusion_expr::Accumulator;
@@ -40,8 +40,6 @@ pub struct ArrayAgg {
     input_data_type: DataType,
     /// The input expression
     expr: Arc<dyn PhysicalExpr>,
-    /// If the input expression can have NULLs
-    nullable: bool,
 }
 
 impl ArrayAgg {
@@ -50,13 +48,11 @@ impl ArrayAgg {
         expr: Arc<dyn PhysicalExpr>,
         name: impl Into<String>,
         data_type: DataType,
-        nullable: bool,
     ) -> Self {
         Self {
             name: name.into(),
             input_data_type: data_type,
             expr,
-            nullable,
         }
     }
 }
@@ -70,7 +66,7 @@ impl AggregateExpr for ArrayAgg {
         Ok(Field::new_list(
             &self.name,
             // This should be the same as return type of AggregateFunction::ArrayAgg
-            Field::new("item", self.input_data_type.clone(), self.nullable),
+            Field::new("item", self.input_data_type.clone(), true),
             true,
         ))
     }
@@ -78,14 +74,13 @@ impl AggregateExpr for ArrayAgg {
     fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
         Ok(Box::new(ArrayAggAccumulator::try_new(
             &self.input_data_type,
-            self.nullable,
         )?))
     }
 
     fn state_fields(&self) -> Result<Vec<Field>> {
         Ok(vec![Field::new_list(
             format_state_name(&self.name, "array_agg"),
-            Field::new("item", self.input_data_type.clone(), self.nullable),
+            Field::new("item", self.input_data_type.clone(), true),
             true,
         )])
     }
@@ -116,16 +111,14 @@ impl PartialEq<dyn Any> for ArrayAgg {
 pub(crate) struct ArrayAggAccumulator {
     values: Vec<ArrayRef>,
     datatype: DataType,
-    nullable: bool,
 }
 
 impl ArrayAggAccumulator {
     /// new array_agg accumulator based on given item data type
-    pub fn try_new(datatype: &DataType, nullable: bool) -> Result<Self> {
+    pub fn try_new(datatype: &DataType) -> Result<Self> {
         Ok(Self {
             values: vec![],
             datatype: datatype.clone(),
-            nullable,
         })
     }
 }
@@ -169,15 +162,11 @@ impl Accumulator for ArrayAggAccumulator {
             self.values.iter().map(|a| a.as_ref()).collect();
 
         if element_arrays.is_empty() {
-            return Ok(ScalarValue::new_null_list(
-                self.datatype.clone(),
-                self.nullable,
-                1,
-            ));
+            return Ok(ScalarValue::new_null_list(self.datatype.clone(), true, 1));
         }
 
         let concated_array = arrow::compute::concat(&element_arrays)?;
-        let list_array = array_into_list_array(concated_array, self.nullable);
+        let list_array = array_into_list_array_nullable(concated_array);
 
         Ok(ScalarValue::List(Arc::new(list_array)))
     }
diff --git a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
index 368d11d7421ab..eca6e4ce4f656 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
@@ -42,8 +42,6 @@ pub struct DistinctArrayAgg {
     input_data_type: DataType,
     /// The input expression
     expr: Arc<dyn PhysicalExpr>,
-    /// If the input expression can have NULLs
-    nullable: bool,
 }
 
 impl DistinctArrayAgg {
@@ -52,14 +50,12 @@ impl DistinctArrayAgg {
         expr: Arc<dyn PhysicalExpr>,
         name: impl Into<String>,
         input_data_type: DataType,
-        nullable: bool,
     ) -> Self {
         let name = name.into();
         Self {
             name,
             input_data_type,
             expr,
-            nullable,
         }
     }
 }
@@ -74,7 +70,7 @@ impl AggregateExpr for DistinctArrayAgg {
         Ok(Field::new_list(
             &self.name,
             // This should be the same as return type of AggregateFunction::ArrayAgg
-            Field::new("item", self.input_data_type.clone(), self.nullable),
+            Field::new("item", self.input_data_type.clone(), true),
             true,
         ))
     }
@@ -82,14 +78,13 @@ impl AggregateExpr for DistinctArrayAgg {
     fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
         Ok(Box::new(DistinctArrayAggAccumulator::try_new(
             &self.input_data_type,
-            self.nullable,
         )?))
     }
 
     fn state_fields(&self) -> Result<Vec<Field>> {
         Ok(vec![Field::new_list(
             format_state_name(&self.name, "distinct_array_agg"),
-            Field::new("item", self.input_data_type.clone(), self.nullable),
+            Field::new("item", self.input_data_type.clone(), true),
             true,
         )])
     }
@@ -120,15 +115,13 @@ impl PartialEq<dyn Any> for DistinctArrayAgg {
 struct DistinctArrayAggAccumulator {
     values: HashSet<ScalarValue>,
     datatype: DataType,
-    nullable: bool,
 }
 
 impl DistinctArrayAggAccumulator {
-    pub fn try_new(datatype: &DataType, nullable: bool) -> Result<Self> {
+    pub fn try_new(datatype: &DataType) -> Result<Self> {
         Ok(Self {
             values: HashSet::new(),
             datatype: datatype.clone(),
-            nullable,
         })
     }
 }
@@ -166,13 +159,9 @@ impl Accumulator for DistinctArrayAggAccumulator {
     fn evaluate(&mut self) -> Result<ScalarValue> {
         let values: Vec<ScalarValue> = self.values.iter().cloned().collect();
         if values.is_empty() {
-            return Ok(ScalarValue::new_null_list(
-                self.datatype.clone(),
-                self.nullable,
-                1,
-            ));
+            return Ok(ScalarValue::new_null_list(self.datatype.clone(), true, 1));
         }
-        let arr = ScalarValue::new_list(&values, &self.datatype, self.nullable);
+        let arr = ScalarValue::new_list(&values, &self.datatype, true);
         Ok(ScalarValue::List(arr))
     }
 
@@ -255,7 +244,6 @@ mod tests {
             col("a", &schema)?,
             "bla".to_string(),
             datatype,
-            true,
         ));
         let actual = aggregate(&batch, agg)?;
         compare_list_contents(expected, actual)
@@ -272,7 +260,6 @@ mod tests {
             col("a", &schema)?,
             "bla".to_string(),
             datatype,
-            true,
         ));
 
         let mut accum1 = agg.create_accumulator()?;
diff --git a/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs b/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
index d44811192f667..992c06f5bf628 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
@@ -33,7 +33,7 @@ use arrow::datatypes::{DataType, Field};
 use arrow_array::cast::AsArray;
 use arrow_array::{new_empty_array, Array, ArrayRef, StructArray};
 use arrow_schema::Fields;
-use datafusion_common::utils::{array_into_list_array, get_row_at_idx};
+use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx};
 use datafusion_common::{exec_err, Result, ScalarValue};
 use datafusion_expr::utils::AggregateOrderSensitivity;
 use datafusion_expr::Accumulator;
@@ -50,8 +50,6 @@ pub struct OrderSensitiveArrayAgg {
     input_data_type: DataType,
     /// The input expression
     expr: Arc<dyn PhysicalExpr>,
-    /// If the input expression can have `NULL`s
-    nullable: bool,
     /// Ordering data types
     order_by_data_types: Vec<DataType>,
     /// Ordering requirement
@@ -66,7 +64,6 @@ impl OrderSensitiveArrayAgg {
         expr: Arc<dyn PhysicalExpr>,
         name: impl Into<String>,
         input_data_type: DataType,
-        nullable: bool,
         order_by_data_types: Vec<DataType>,
         ordering_req: LexOrdering,
     ) -> Self {
@@ -74,7 +71,6 @@ impl OrderSensitiveArrayAgg {
             name: name.into(),
             input_data_type,
             expr,
-            nullable,
             order_by_data_types,
             ordering_req,
             reverse: false,
@@ -90,8 +86,8 @@ impl AggregateExpr for OrderSensitiveArrayAgg {
     fn field(&self) -> Result<Field> {
         Ok(Field::new_list(
             &self.name,
-            // This should be the same as return type of AggregateFunction::ArrayAgg
-            Field::new("item", self.input_data_type.clone(), self.nullable),
+            // This should be the same as return type of AggregateFunction::OrderSensitiveArrayAgg
+            Field::new("item", self.input_data_type.clone(), true),
             true,
         ))
     }
@@ -102,7 +98,6 @@ impl AggregateExpr for OrderSensitiveArrayAgg {
             &self.order_by_data_types,
             self.ordering_req.clone(),
             self.reverse,
-            self.nullable,
         )
         .map(|acc| Box::new(acc) as _)
     }
@@ -110,17 +105,13 @@ impl AggregateExpr for OrderSensitiveArrayAgg {
     fn state_fields(&self) -> Result<Vec<Field>> {
         let mut fields = vec![Field::new_list(
             format_state_name(&self.name, "array_agg"),
-            Field::new("item", self.input_data_type.clone(), self.nullable),
+            Field::new("item", self.input_data_type.clone(), true),
             true, // This should be the same as field()
         )];
         let orderings = ordering_fields(&self.ordering_req, &self.order_by_data_types);
         fields.push(Field::new_list(
             format_state_name(&self.name, "array_agg_orderings"),
-            Field::new(
-                "item",
-                DataType::Struct(Fields::from(orderings)),
-                self.nullable,
-            ),
+            Field::new("item", DataType::Struct(Fields::from(orderings)), true),
             false,
         ));
         Ok(fields)
@@ -147,7 +138,6 @@ impl AggregateExpr for OrderSensitiveArrayAgg {
             name: self.name.to_string(),
             input_data_type: self.input_data_type.clone(),
             expr: Arc::clone(&self.expr),
-            nullable: self.nullable,
             order_by_data_types: self.order_by_data_types.clone(),
             // Reverse requirement:
             ordering_req: reverse_order_bys(&self.ordering_req),
@@ -186,8 +176,6 @@ pub(crate) struct OrderSensitiveArrayAggAccumulator {
     ordering_req: LexOrdering,
     /// Whether the aggregation is running in reverse.
     reverse: bool,
-    /// Whether the input expr is nullable
-    nullable: bool,
 }
 
 impl OrderSensitiveArrayAggAccumulator {
@@ -198,7 +186,6 @@ impl OrderSensitiveArrayAggAccumulator {
         ordering_dtypes: &[DataType],
         ordering_req: LexOrdering,
         reverse: bool,
-        nullable: bool,
     ) -> Result<Self> {
         let mut datatypes = vec![datatype.clone()];
         datatypes.extend(ordering_dtypes.iter().cloned());
@@ -208,7 +195,6 @@ impl OrderSensitiveArrayAggAccumulator {
             datatypes,
             ordering_req,
             reverse,
-            nullable,
         })
     }
 }
@@ -312,7 +298,7 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
         if self.values.is_empty() {
             return Ok(ScalarValue::new_null_list(
                 self.datatypes[0].clone(),
-                self.nullable,
+                true,
                 1,
             ));
         }
@@ -322,14 +308,10 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
             ScalarValue::new_list_from_iter(
                 values.into_iter().rev(),
                 &self.datatypes[0],
-                self.nullable,
+                true,
             )
         } else {
-            ScalarValue::new_list_from_iter(
-                values.into_iter(),
-                &self.datatypes[0],
-                self.nullable,
-            )
+            ScalarValue::new_list_from_iter(values.into_iter(), &self.datatypes[0], true)
         };
         Ok(ScalarValue::List(array))
     }
@@ -385,9 +367,8 @@ impl OrderSensitiveArrayAggAccumulator {
             column_wise_ordering_values,
             None,
         )?;
-        Ok(ScalarValue::List(Arc::new(array_into_list_array(
+        Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable(
             Arc::new(ordering_array),
-            self.nullable,
         ))))
     }
 }
diff --git a/datafusion/physical-expr/src/aggregate/build_in.rs b/datafusion/physical-expr/src/aggregate/build_in.rs
index 68c9b4859f1f8..ef21b3d0f7883 100644
--- a/datafusion/physical-expr/src/aggregate/build_in.rs
+++ b/datafusion/physical-expr/src/aggregate/build_in.rs
@@ -62,16 +62,14 @@ pub fn create_aggregate_expr(
     Ok(match (fun, distinct) {
         (AggregateFunction::ArrayAgg, false) => {
             let expr = Arc::clone(&input_phy_exprs[0]);
-            let nullable = expr.nullable(input_schema)?;
 
             if ordering_req.is_empty() {
-                Arc::new(expressions::ArrayAgg::new(expr, name, data_type, nullable))
+                Arc::new(expressions::ArrayAgg::new(expr, name, data_type))
             } else {
                 Arc::new(expressions::OrderSensitiveArrayAgg::new(
                     expr,
                     name,
                     data_type,
-                    nullable,
                     ordering_types,
                     ordering_req.to_vec(),
                 ))
@@ -84,13 +82,7 @@ pub fn create_aggregate_expr(
                 );
             }
             let expr = Arc::clone(&input_phy_exprs[0]);
-            let is_expr_nullable = expr.nullable(input_schema)?;
-            Arc::new(expressions::DistinctArrayAgg::new(
-                expr,
-                name,
-                data_type,
-                is_expr_nullable,
-            ))
+            Arc::new(expressions::DistinctArrayAgg::new(expr, name, data_type))
         }
         (AggregateFunction::Min, _) => Arc::new(expressions::Min::new(
             Arc::clone(&input_phy_exprs[0]),
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 8bf808af3b5b8..5f780f1ff8019 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -2231,7 +2231,6 @@ mod tests {
                     Arc::clone(col_a),
                     "array_agg",
                     DataType::Int32,
-                    false,
                     vec![],
                     order_by_expr.unwrap_or_default(),
                 )) as _

From de0765a97b4c348c2a9667cccd5b652591c8e532 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Wed, 17 Jul 2024 18:29:58 +0800
Subject: [PATCH 071/357] Get expr planners when creating new planner (#11485)

* get expr planners when creating new planner

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* get expr planner when creating planner

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* no planners in sqltorel

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* Add docs about SessionContextProvider

* Use Slice rather than Vec to access expr planners

* add test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* clippy

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../core/src/execution/session_state.rs       | 70 ++++++++++++++-----
 datafusion/expr/src/planner.rs                |  5 ++
 datafusion/sql/src/expr/mod.rs                | 14 ++--
 datafusion/sql/src/expr/substring.rs          |  2 +-
 datafusion/sql/src/expr/value.rs              |  2 +-
 datafusion/sql/src/planner.rs                 | 10 ---
 6 files changed, 68 insertions(+), 35 deletions(-)

diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 75eef43454873..03ce8d3b5892a 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -516,7 +516,7 @@ impl SessionState {
             }
         }
 
-        let query = self.build_sql_query_planner(&provider);
+        let query = SqlToRel::new_with_options(&provider, self.get_parser_options());
         query.statement_to_plan(statement)
     }
 
@@ -569,7 +569,7 @@ impl SessionState {
             tables: HashMap::new(),
         };
 
-        let query = self.build_sql_query_planner(&provider);
+        let query = SqlToRel::new_with_options(&provider, self.get_parser_options());
         query.sql_to_expr(sql_expr, df_schema, &mut PlannerContext::new())
     }
 
@@ -854,20 +854,6 @@ impl SessionState {
         let udtf = self.table_functions.remove(name);
         Ok(udtf.map(|x| x.function().clone()))
     }
-
-    fn build_sql_query_planner<'a, S>(&self, provider: &'a S) -> SqlToRel<'a, S>
-    where
-        S: ContextProvider,
-    {
-        let mut query = SqlToRel::new_with_options(provider, self.get_parser_options());
-
-        // custom planners are registered first, so they're run first and take precedence over built-in planners
-        for planner in self.expr_planners.iter() {
-            query = query.with_user_defined_planner(planner.clone());
-        }
-
-        query
-    }
 }
 
 /// A builder to be used for building [`SessionState`]'s. Defaults will
@@ -1597,12 +1583,20 @@ impl SessionStateDefaults {
     }
 }
 
+/// Adapter that implements the [`ContextProvider`] trait for a [`SessionState`]
+///
+/// This is used so the SQL planner can access the state of the session without
+/// having a direct dependency on the [`SessionState`] struct (and core crate)
 struct SessionContextProvider<'a> {
     state: &'a SessionState,
     tables: HashMap<String, Arc<dyn TableSource>>,
 }
 
 impl<'a> ContextProvider for SessionContextProvider<'a> {
+    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
+        &self.state.expr_planners
+    }
+
     fn get_table_source(
         &self,
         name: TableReference,
@@ -1898,3 +1892,47 @@ impl<'a> SimplifyInfo for SessionSimplifyProvider<'a> {
         expr.get_type(self.df_schema)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+
+    use arrow_schema::{DataType, Field, Schema};
+    use datafusion_common::DFSchema;
+    use datafusion_common::Result;
+    use datafusion_expr::Expr;
+    use datafusion_sql::planner::{PlannerContext, SqlToRel};
+
+    use crate::execution::context::SessionState;
+
+    use super::{SessionContextProvider, SessionStateBuilder};
+
+    #[test]
+    fn test_session_state_with_default_features() {
+        // test array planners with and without builtin planners
+        fn sql_to_expr(state: &SessionState) -> Result<Expr> {
+            let provider = SessionContextProvider {
+                state,
+                tables: HashMap::new(),
+            };
+
+            let sql = "[1,2,3]";
+            let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
+            let df_schema = DFSchema::try_from(schema)?;
+            let dialect = state.config.options().sql_parser.dialect.as_str();
+            let sql_expr = state.sql_to_expr(sql, dialect)?;
+
+            let query = SqlToRel::new_with_options(&provider, state.get_parser_options());
+            query.sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new())
+        }
+
+        let state = SessionStateBuilder::new().with_default_features().build();
+
+        assert!(sql_to_expr(&state).is_ok());
+
+        // if no builtin planners exist, you should register your own, otherwise returns error
+        let state = SessionStateBuilder::new().build();
+
+        assert!(sql_to_expr(&state).is_err())
+    }
+}
diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs
index 2f13923b1f10a..009f3512c588e 100644
--- a/datafusion/expr/src/planner.rs
+++ b/datafusion/expr/src/planner.rs
@@ -60,6 +60,11 @@ pub trait ContextProvider {
         not_impl_err!("Recursive CTE is not implemented")
     }
 
+    /// Getter for expr planners
+    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
+        &[]
+    }
+
     /// Getter for a UDF description
     fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>>;
     /// Getter for a UDAF description
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 062ef805fd9f8..71ff7c03bea2f 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -111,7 +111,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     ) -> Result<Expr> {
         // try extension planers
         let mut binary_expr = datafusion_expr::planner::RawBinaryExpr { op, left, right };
-        for planner in self.planners.iter() {
+        for planner in self.context_provider.get_expr_planners() {
             match planner.plan_binary_op(binary_expr, schema)? {
                 PlannerResult::Planned(expr) => {
                     return Ok(expr);
@@ -184,7 +184,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
                 ];
 
-                for planner in self.planners.iter() {
+                for planner in self.context_provider.get_expr_planners() {
                     match planner.plan_extract(extract_args)? {
                         PlannerResult::Planned(expr) => return Ok(expr),
                         PlannerResult::Original(args) => {
@@ -283,7 +283,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 };
 
                 let mut field_access_expr = RawFieldAccessExpr { expr, field_access };
-                for planner in self.planners.iter() {
+                for planner in self.context_provider.get_expr_planners() {
                     match planner.plan_field_access(field_access_expr, schema)? {
                         PlannerResult::Planned(expr) => return Ok(expr),
                         PlannerResult::Original(expr) => {
@@ -653,7 +653,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             self.create_struct_expr(values, schema, planner_context)?
         };
 
-        for planner in self.planners.iter() {
+        for planner in self.context_provider.get_expr_planners() {
             match planner.plan_struct_literal(create_struct_args, is_named_struct)? {
                 PlannerResult::Planned(expr) => return Ok(expr),
                 PlannerResult::Original(args) => create_struct_args = args,
@@ -673,7 +673,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             self.sql_expr_to_logical_expr(substr_expr, schema, planner_context)?;
         let fullstr = self.sql_expr_to_logical_expr(str_expr, schema, planner_context)?;
         let mut position_args = vec![fullstr, substr];
-        for planner in self.planners.iter() {
+        for planner in self.context_provider.get_expr_planners() {
             match planner.plan_position(position_args)? {
                 PlannerResult::Planned(expr) => return Ok(expr),
                 PlannerResult::Original(args) => {
@@ -703,7 +703,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         let mut raw_expr = RawDictionaryExpr { keys, values };
 
-        for planner in self.planners.iter() {
+        for planner in self.context_provider.get_expr_planners() {
             match planner.plan_dictionary_literal(raw_expr, schema)? {
                 PlannerResult::Planned(expr) => {
                     return Ok(expr);
@@ -927,7 +927,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
             None => vec![arg, what_arg, from_arg],
         };
-        for planner in self.planners.iter() {
+        for planner in self.context_provider.get_expr_planners() {
             match planner.plan_overlay(overlay_args)? {
                 PlannerResult::Planned(expr) => return Ok(expr),
                 PlannerResult::Original(args) => overlay_args = args,
diff --git a/datafusion/sql/src/expr/substring.rs b/datafusion/sql/src/expr/substring.rs
index a0dfee1b9d907..f58ab5ff3612c 100644
--- a/datafusion/sql/src/expr/substring.rs
+++ b/datafusion/sql/src/expr/substring.rs
@@ -68,7 +68,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
         };
 
-        for planner in self.planners.iter() {
+        for planner in self.context_provider.get_expr_planners() {
             match planner.plan_substring(substring_args)? {
                 PlannerResult::Planned(expr) => return Ok(expr),
                 PlannerResult::Original(args) => {
diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index 5cd6ffc687888..1564f06fe4b9a 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -154,7 +154,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         schema: &DFSchema,
     ) -> Result<Expr> {
         let mut exprs = values;
-        for planner in self.planners.iter() {
+        for planner in self.context_provider.get_expr_planners() {
             match planner.plan_array_literal(exprs, schema)? {
                 PlannerResult::Planned(expr) => {
                     return Ok(expr);
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index be04f51f4f2c9..901a2ad38d8cc 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -24,7 +24,6 @@ use arrow_schema::*;
 use datafusion_common::{
     field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, SchemaError,
 };
-use datafusion_expr::planner::ExprPlanner;
 use sqlparser::ast::TimezoneInfo;
 use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo};
 use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
@@ -186,8 +185,6 @@ pub struct SqlToRel<'a, S: ContextProvider> {
     pub(crate) context_provider: &'a S,
     pub(crate) options: ParserOptions,
     pub(crate) normalizer: IdentNormalizer,
-    /// user defined planner extensions
-    pub(crate) planners: Vec<Arc<dyn ExprPlanner>>,
 }
 
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -196,12 +193,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         Self::new_with_options(context_provider, ParserOptions::default())
     }
 
-    /// add an user defined planner
-    pub fn with_user_defined_planner(mut self, planner: Arc<dyn ExprPlanner>) -> Self {
-        self.planners.push(planner);
-        self
-    }
-
     /// Create a new query planner
     pub fn new_with_options(context_provider: &'a S, options: ParserOptions) -> Self {
         let normalize = options.enable_ident_normalization;
@@ -210,7 +201,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             context_provider,
             options,
             normalizer: IdentNormalizer::new(normalize),
-            planners: vec![],
         }
     }
 

From b0925c801e1c07bd78c78b045ab58fbd0630b638 Mon Sep 17 00:00:00 2001
From: Sergei Grebnov <sergei.grebnov@gmail.com>
Date: Wed, 17 Jul 2024 04:21:34 -0700
Subject: [PATCH 072/357] Support alternate format for Utf8 unparsing (CHAR)
 (#11494)

* Add dialect param to use CHAR instead of TEXT for Utf8 unparsing for MySQL (#12)

* Configurable data type instead of flag for Utf8 unparsing

* Fix type in comment
---
 datafusion/sql/src/unparser/dialect.rs | 52 +++++++++++++++++++++++++-
 datafusion/sql/src/unparser/expr.rs    | 34 ++++++++++++++++-
 2 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index eca2eb4fd0ec7..87453f81ee3d8 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use regex::Regex;
-use sqlparser::keywords::ALL_KEYWORDS;
+use sqlparser::{ast, keywords::ALL_KEYWORDS};
 
 /// `Dialect` to use for Unparsing
 ///
@@ -45,6 +45,17 @@ pub trait Dialect {
     fn interval_style(&self) -> IntervalStyle {
         IntervalStyle::PostgresVerbose
     }
+
+    // The SQL type to use for Arrow Utf8 unparsing
+    // Most dialects use VARCHAR, but some, like MySQL, require CHAR
+    fn utf8_cast_dtype(&self) -> ast::DataType {
+        ast::DataType::Varchar(None)
+    }
+    // The SQL type to use for Arrow LargeUtf8 unparsing
+    // Most dialects use TEXT, but some, like MySQL, require CHAR
+    fn large_utf8_cast_dtype(&self) -> ast::DataType {
+        ast::DataType::Text
+    }
 }
 
 /// `IntervalStyle` to use for unparsing
@@ -103,6 +114,14 @@ impl Dialect for MySqlDialect {
     fn interval_style(&self) -> IntervalStyle {
         IntervalStyle::MySQL
     }
+
+    fn utf8_cast_dtype(&self) -> ast::DataType {
+        ast::DataType::Char(None)
+    }
+
+    fn large_utf8_cast_dtype(&self) -> ast::DataType {
+        ast::DataType::Char(None)
+    }
 }
 
 pub struct SqliteDialect {}
@@ -118,6 +137,8 @@ pub struct CustomDialect {
     supports_nulls_first_in_sort: bool,
     use_timestamp_for_date64: bool,
     interval_style: IntervalStyle,
+    utf8_cast_dtype: ast::DataType,
+    large_utf8_cast_dtype: ast::DataType,
 }
 
 impl Default for CustomDialect {
@@ -127,6 +148,8 @@ impl Default for CustomDialect {
             supports_nulls_first_in_sort: true,
             use_timestamp_for_date64: false,
             interval_style: IntervalStyle::SQLStandard,
+            utf8_cast_dtype: ast::DataType::Varchar(None),
+            large_utf8_cast_dtype: ast::DataType::Text,
         }
     }
 }
@@ -158,6 +181,14 @@ impl Dialect for CustomDialect {
     fn interval_style(&self) -> IntervalStyle {
         self.interval_style
     }
+
+    fn utf8_cast_dtype(&self) -> ast::DataType {
+        self.utf8_cast_dtype.clone()
+    }
+
+    fn large_utf8_cast_dtype(&self) -> ast::DataType {
+        self.large_utf8_cast_dtype.clone()
+    }
 }
 
 /// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
@@ -179,6 +210,8 @@ pub struct CustomDialectBuilder {
     supports_nulls_first_in_sort: bool,
     use_timestamp_for_date64: bool,
     interval_style: IntervalStyle,
+    utf8_cast_dtype: ast::DataType,
+    large_utf8_cast_dtype: ast::DataType,
 }
 
 impl Default for CustomDialectBuilder {
@@ -194,6 +227,8 @@ impl CustomDialectBuilder {
             supports_nulls_first_in_sort: true,
             use_timestamp_for_date64: false,
             interval_style: IntervalStyle::PostgresVerbose,
+            utf8_cast_dtype: ast::DataType::Varchar(None),
+            large_utf8_cast_dtype: ast::DataType::Text,
         }
     }
 
@@ -203,6 +238,8 @@ impl CustomDialectBuilder {
             supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
             use_timestamp_for_date64: self.use_timestamp_for_date64,
             interval_style: self.interval_style,
+            utf8_cast_dtype: self.utf8_cast_dtype,
+            large_utf8_cast_dtype: self.large_utf8_cast_dtype,
         }
     }
 
@@ -235,4 +272,17 @@ impl CustomDialectBuilder {
         self.interval_style = interval_style;
         self
     }
+
+    pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
+        self.utf8_cast_dtype = utf8_cast_dtype;
+        self
+    }
+
+    pub fn with_large_utf8_cast_dtype(
+        mut self,
+        large_utf8_cast_dtype: ast::DataType,
+    ) -> Self {
+        self.large_utf8_cast_dtype = large_utf8_cast_dtype;
+        self
+    }
 }
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index e6b67b5d9fb2d..950e7e11288a7 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -1275,8 +1275,8 @@ impl Unparser<'_> {
             DataType::BinaryView => {
                 not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
             }
-            DataType::Utf8 => Ok(ast::DataType::Varchar(None)),
-            DataType::LargeUtf8 => Ok(ast::DataType::Text),
+            DataType::Utf8 => Ok(self.dialect.utf8_cast_dtype()),
+            DataType::LargeUtf8 => Ok(self.dialect.large_utf8_cast_dtype()),
             DataType::Utf8View => {
                 not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
             }
@@ -1936,4 +1936,34 @@ mod tests {
             assert_eq!(actual, expected);
         }
     }
+
+    #[test]
+    fn custom_dialect_use_char_for_utf8_cast() -> Result<()> {
+        let default_dialect = CustomDialectBuilder::default().build();
+        let mysql_custom_dialect = CustomDialectBuilder::new()
+            .with_utf8_cast_dtype(ast::DataType::Char(None))
+            .with_large_utf8_cast_dtype(ast::DataType::Char(None))
+            .build();
+
+        for (dialect, data_type, identifier) in [
+            (&default_dialect, DataType::Utf8, "VARCHAR"),
+            (&default_dialect, DataType::LargeUtf8, "TEXT"),
+            (&mysql_custom_dialect, DataType::Utf8, "CHAR"),
+            (&mysql_custom_dialect, DataType::LargeUtf8, "CHAR"),
+        ] {
+            let unparser = Unparser::new(dialect);
+
+            let expr = Expr::Cast(Cast {
+                expr: Box::new(col("a")),
+                data_type,
+            });
+            let ast = unparser.expr_to_sql(&expr)?;
+
+            let actual = format!("{}", ast);
+            let expected = format!(r#"CAST(a AS {identifier})"#);
+
+            assert_eq!(actual, expected);
+        }
+        Ok(())
+    }
 }

From fb34ef2ccab5fd20bf24eb1e5bce5a30c1e2d234 Mon Sep 17 00:00:00 2001
From: Drew Hayward <d.andrew.hayward@gmail.com>
Date: Wed, 17 Jul 2024 13:48:54 -0400
Subject: [PATCH 073/357] implement retract_batch for xor accumulator (#11500)

* implement retract_batch for xor accumulator

* add comment
---
 .../functions-aggregate/src/bit_and_or_xor.rs | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/datafusion/functions-aggregate/src/bit_and_or_xor.rs b/datafusion/functions-aggregate/src/bit_and_or_xor.rs
index 9224b06e407ac..6c2d6cb5285c1 100644
--- a/datafusion/functions-aggregate/src/bit_and_or_xor.rs
+++ b/datafusion/functions-aggregate/src/bit_and_or_xor.rs
@@ -358,6 +358,15 @@ where
         Ok(())
     }
 
+    fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        // XOR is it's own inverse
+        self.update_batch(values)
+    }
+
+    fn supports_retract_batch(&self) -> bool {
+        true
+    }
+
     fn evaluate(&mut self) -> Result<ScalarValue> {
         ScalarValue::new_primitive::<T>(self.value, &T::DATA_TYPE)
     }
@@ -456,3 +465,41 @@ where
         Ok(())
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow::array::{ArrayRef, UInt64Array};
+    use arrow::datatypes::UInt64Type;
+    use datafusion_common::ScalarValue;
+
+    use crate::bit_and_or_xor::BitXorAccumulator;
+    use datafusion_expr::Accumulator;
+
+    #[test]
+    fn test_bit_xor_accumulator() {
+        let mut accumulator = BitXorAccumulator::<UInt64Type> { value: None };
+        let batches: Vec<_> = vec![vec![1, 2], vec![1]]
+            .into_iter()
+            .map(|b| Arc::new(b.into_iter().collect::<UInt64Array>()) as ArrayRef)
+            .collect();
+
+        let added = &[Arc::clone(&batches[0])];
+        let retracted = &[Arc::clone(&batches[1])];
+
+        // XOR of 1..3 is 3
+        accumulator.update_batch(added).unwrap();
+        assert_eq!(
+            accumulator.evaluate().unwrap(),
+            ScalarValue::UInt64(Some(3))
+        );
+
+        // Removing [1] ^ 3 = 2
+        accumulator.retract_batch(retracted).unwrap();
+        assert_eq!(
+            accumulator.evaluate().unwrap(),
+            ScalarValue::UInt64(Some(2))
+        );
+    }
+}

From c95556def34bab6a7b4a74467eb08a92f67426af Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Wed, 17 Jul 2024 12:08:48 -0700
Subject: [PATCH 074/357] Refactor: more clearly delineate between
 `TableParquetOptions` and  `ParquetWriterOptions` (#11444)

* refactor: make more explicit the relationship btwn TableParquetOptions vs ParquetOptions vs WriterProperties

* test: demonstrate the relationship btwn session configs and writer props

* refactor: move parquet-format specific functionality to the parquet submodule, leaving only the config options in the config module.

* test: update test fixtures to use the ParquetOptions::default

* test: update test helper session_config_from_writer_props, to not add column configuration when none exists

* test(11367): write test to demonstrate issue 11367

* fix: existing sqllogictests require specific ParquetOptions settings to be left as None

* test(11367): demonstrate how the require bloom filter defaults, (required to avoid test regression), result in different default behavior than parquet crate

* chore: make more reviewable, by pulling tests for issue 11367 into followup PR

* refactor: move all parquet-associated features into parquet-writer mod

* chore: better function naming convention
---
 .../common/src/file_options/parquet_writer.rs | 409 ++++++++++++++----
 1 file changed, 320 insertions(+), 89 deletions(-)

diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index 009164a29e348..a14cbdecf6011 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -24,10 +24,11 @@ use crate::{
 
 use parquet::{
     basic::{BrotliLevel, GzipLevel, ZstdLevel},
-    file::{
-        metadata::KeyValue,
-        properties::{EnabledStatistics, WriterProperties, WriterVersion},
+    file::properties::{
+        EnabledStatistics, WriterProperties, WriterPropertiesBuilder, WriterVersion,
+        DEFAULT_MAX_STATISTICS_SIZE, DEFAULT_STATISTICS_ENABLED,
     },
+    format::KeyValue,
     schema::types::ColumnPath,
 };
 
@@ -52,92 +53,43 @@ impl ParquetWriterOptions {
 impl TryFrom<&TableParquetOptions> for ParquetWriterOptions {
     type Error = DataFusionError;
 
-    fn try_from(parquet_options: &TableParquetOptions) -> Result<Self> {
-        let ParquetOptions {
-            data_pagesize_limit,
-            write_batch_size,
-            writer_version,
-            dictionary_page_size_limit,
-            max_row_group_size,
-            created_by,
-            column_index_truncate_length,
-            data_page_row_count_limit,
-            bloom_filter_on_write,
-            encoding,
-            dictionary_enabled,
-            compression,
-            statistics_enabled,
-            max_statistics_size,
-            bloom_filter_fpp,
-            bloom_filter_ndv,
-            //  below is not part of ParquetWriterOptions
-            enable_page_index: _,
-            pruning: _,
-            skip_metadata: _,
-            metadata_size_hint: _,
-            pushdown_filters: _,
-            reorder_filters: _,
-            allow_single_file_parallelism: _,
-            maximum_parallel_row_group_writers: _,
-            maximum_buffered_record_batches_per_stream: _,
-            bloom_filter_on_read: _,
-        } = &parquet_options.global;
-
-        let key_value_metadata = if !parquet_options.key_value_metadata.is_empty() {
-            Some(
-                parquet_options
-                    .key_value_metadata
-                    .clone()
-                    .drain()
-                    .map(|(key, value)| KeyValue { key, value })
-                    .collect::<Vec<_>>(),
-            )
-        } else {
-            None
-        };
-
-        let mut builder = WriterProperties::builder()
-            .set_data_page_size_limit(*data_pagesize_limit)
-            .set_write_batch_size(*write_batch_size)
-            .set_writer_version(parse_version_string(writer_version.as_str())?)
-            .set_dictionary_page_size_limit(*dictionary_page_size_limit)
-            .set_max_row_group_size(*max_row_group_size)
-            .set_created_by(created_by.clone())
-            .set_column_index_truncate_length(*column_index_truncate_length)
-            .set_data_page_row_count_limit(*data_page_row_count_limit)
-            .set_bloom_filter_enabled(*bloom_filter_on_write)
-            .set_key_value_metadata(key_value_metadata);
-
-        if let Some(encoding) = &encoding {
-            builder = builder.set_encoding(parse_encoding_string(encoding)?);
-        }
-
-        if let Some(enabled) = dictionary_enabled {
-            builder = builder.set_dictionary_enabled(*enabled);
-        }
-
-        if let Some(compression) = &compression {
-            builder = builder.set_compression(parse_compression_string(compression)?);
-        }
-
-        if let Some(statistics) = &statistics_enabled {
-            builder =
-                builder.set_statistics_enabled(parse_statistics_string(statistics)?);
-        }
-
-        if let Some(size) = max_statistics_size {
-            builder = builder.set_max_statistics_size(*size);
-        }
+    fn try_from(parquet_table_options: &TableParquetOptions) -> Result<Self> {
+        // ParquetWriterOptions will have defaults for the remaining fields (e.g. sorting_columns)
+        Ok(ParquetWriterOptions {
+            writer_options: WriterPropertiesBuilder::try_from(parquet_table_options)?
+                .build(),
+        })
+    }
+}
 
-        if let Some(fpp) = bloom_filter_fpp {
-            builder = builder.set_bloom_filter_fpp(*fpp);
-        }
+impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
+    type Error = DataFusionError;
 
-        if let Some(ndv) = bloom_filter_ndv {
-            builder = builder.set_bloom_filter_ndv(*ndv);
+    /// Convert the session's [`TableParquetOptions`] into a single write action's [`WriterPropertiesBuilder`].
+    ///
+    /// The returned [`WriterPropertiesBuilder`] includes customizations applicable per column.
+    fn try_from(table_parquet_options: &TableParquetOptions) -> Result<Self> {
+        // Table options include kv_metadata and col-specific options
+        let TableParquetOptions {
+            global,
+            column_specific_options,
+            key_value_metadata,
+        } = table_parquet_options;
+
+        let mut builder = global.into_writer_properties_builder()?;
+
+        if !key_value_metadata.is_empty() {
+            builder = builder.set_key_value_metadata(Some(
+                key_value_metadata
+                    .to_owned()
+                    .drain()
+                    .map(|(key, value)| KeyValue { key, value })
+                    .collect(),
+            ));
         }
 
-        for (column, options) in &parquet_options.column_specific_options {
+        // Apply column-specific options:
+        for (column, options) in column_specific_options {
             let path = ColumnPath::new(column.split('.').map(|s| s.to_owned()).collect());
 
             if let Some(bloom_filter_enabled) = options.bloom_filter_enabled {
@@ -183,10 +135,87 @@ impl TryFrom<&TableParquetOptions> for ParquetWriterOptions {
             }
         }
 
-        // ParquetWriterOptions will have defaults for the remaining fields (e.g. sorting_columns)
-        Ok(ParquetWriterOptions {
-            writer_options: builder.build(),
-        })
+        Ok(builder)
+    }
+}
+
+impl ParquetOptions {
+    /// Convert the global session options, [`ParquetOptions`], into a single write action's [`WriterPropertiesBuilder`].
+    ///
+    /// The returned [`WriterPropertiesBuilder`] can then be further modified with additional options
+    /// applied per column; a customization which is not applicable for [`ParquetOptions`].
+    pub fn into_writer_properties_builder(&self) -> Result<WriterPropertiesBuilder> {
+        let ParquetOptions {
+            data_pagesize_limit,
+            write_batch_size,
+            writer_version,
+            compression,
+            dictionary_enabled,
+            dictionary_page_size_limit,
+            statistics_enabled,
+            max_statistics_size,
+            max_row_group_size,
+            created_by,
+            column_index_truncate_length,
+            data_page_row_count_limit,
+            encoding,
+            bloom_filter_on_write,
+            bloom_filter_fpp,
+            bloom_filter_ndv,
+
+            // not in WriterProperties
+            enable_page_index: _,
+            pruning: _,
+            skip_metadata: _,
+            metadata_size_hint: _,
+            pushdown_filters: _,
+            reorder_filters: _,
+            allow_single_file_parallelism: _,
+            maximum_parallel_row_group_writers: _,
+            maximum_buffered_record_batches_per_stream: _,
+            bloom_filter_on_read: _, // reads not used for writer props
+        } = self;
+
+        let mut builder = WriterProperties::builder()
+            .set_data_page_size_limit(*data_pagesize_limit)
+            .set_write_batch_size(*write_batch_size)
+            .set_writer_version(parse_version_string(writer_version.as_str())?)
+            .set_dictionary_page_size_limit(*dictionary_page_size_limit)
+            .set_statistics_enabled(
+                statistics_enabled
+                    .as_ref()
+                    .and_then(|s| parse_statistics_string(s).ok())
+                    .unwrap_or(DEFAULT_STATISTICS_ENABLED),
+            )
+            .set_max_statistics_size(
+                max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
+            )
+            .set_max_row_group_size(*max_row_group_size)
+            .set_created_by(created_by.clone())
+            .set_column_index_truncate_length(*column_index_truncate_length)
+            .set_data_page_row_count_limit(*data_page_row_count_limit)
+            .set_bloom_filter_enabled(*bloom_filter_on_write);
+
+        if let Some(bloom_filter_fpp) = bloom_filter_fpp {
+            builder = builder.set_bloom_filter_fpp(*bloom_filter_fpp);
+        };
+        if let Some(bloom_filter_ndv) = bloom_filter_ndv {
+            builder = builder.set_bloom_filter_ndv(*bloom_filter_ndv);
+        };
+        if let Some(dictionary_enabled) = dictionary_enabled {
+            builder = builder.set_dictionary_enabled(*dictionary_enabled);
+        };
+
+        // We do not have access to default ColumnProperties set in Arrow.
+        // Therefore, only overwrite if these settings exist.
+        if let Some(compression) = compression {
+            builder = builder.set_compression(parse_compression_string(compression)?);
+        }
+        if let Some(encoding) = encoding {
+            builder = builder.set_encoding(parse_encoding_string(encoding)?);
+        }
+
+        Ok(builder)
     }
 }
 
@@ -336,3 +365,205 @@ pub(crate) fn parse_statistics_string(str_setting: &str) -> Result<EnabledStatis
         ))),
     }
 }
+
+#[cfg(feature = "parquet")]
+#[cfg(test)]
+mod tests {
+    use parquet::{basic::Compression, file::properties::EnabledStatistics};
+    use std::collections::HashMap;
+
+    use crate::config::{ColumnOptions, ParquetOptions};
+
+    use super::*;
+
+    const COL_NAME: &str = "configured";
+
+    /// Take the column defaults provided in [`ParquetOptions`], and generate a non-default col config.
+    fn column_options_with_non_defaults(
+        src_col_defaults: &ParquetOptions,
+    ) -> ColumnOptions {
+        ColumnOptions {
+            compression: Some("zstd(22)".into()),
+            dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v),
+            statistics_enabled: Some("page".into()),
+            max_statistics_size: Some(72),
+            encoding: Some("RLE".into()),
+            bloom_filter_enabled: Some(true),
+            bloom_filter_fpp: Some(0.72),
+            bloom_filter_ndv: Some(72),
+        }
+    }
+
+    fn parquet_options_with_non_defaults() -> ParquetOptions {
+        let defaults = ParquetOptions::default();
+        let writer_version = if defaults.writer_version.eq("1.0") {
+            "2.0"
+        } else {
+            "1.0"
+        };
+
+        ParquetOptions {
+            data_pagesize_limit: 42,
+            write_batch_size: 42,
+            writer_version: writer_version.into(),
+            compression: Some("zstd(22)".into()),
+            dictionary_enabled: Some(!defaults.dictionary_enabled.unwrap_or(false)),
+            dictionary_page_size_limit: 42,
+            statistics_enabled: Some("chunk".into()),
+            max_statistics_size: Some(42),
+            max_row_group_size: 42,
+            created_by: "wordy".into(),
+            column_index_truncate_length: Some(42),
+            data_page_row_count_limit: 42,
+            encoding: Some("BYTE_STREAM_SPLIT".into()),
+            bloom_filter_on_write: !defaults.bloom_filter_on_write,
+            bloom_filter_fpp: Some(0.42),
+            bloom_filter_ndv: Some(42),
+
+            // not in WriterProperties, but itemizing here to not skip newly added props
+            enable_page_index: defaults.enable_page_index,
+            pruning: defaults.pruning,
+            skip_metadata: defaults.skip_metadata,
+            metadata_size_hint: defaults.metadata_size_hint,
+            pushdown_filters: defaults.pushdown_filters,
+            reorder_filters: defaults.reorder_filters,
+            allow_single_file_parallelism: defaults.allow_single_file_parallelism,
+            maximum_parallel_row_group_writers: defaults
+                .maximum_parallel_row_group_writers,
+            maximum_buffered_record_batches_per_stream: defaults
+                .maximum_buffered_record_batches_per_stream,
+            bloom_filter_on_read: defaults.bloom_filter_on_read,
+        }
+    }
+
+    fn extract_column_options(
+        props: &WriterProperties,
+        col: ColumnPath,
+    ) -> ColumnOptions {
+        let bloom_filter_default_props = props.bloom_filter_properties(&col);
+
+        ColumnOptions {
+            bloom_filter_enabled: Some(bloom_filter_default_props.is_some()),
+            encoding: props.encoding(&col).map(|s| s.to_string()),
+            dictionary_enabled: Some(props.dictionary_enabled(&col)),
+            compression: match props.compression(&col) {
+                Compression::ZSTD(lvl) => {
+                    Some(format!("zstd({})", lvl.compression_level()))
+                }
+                _ => None,
+            },
+            statistics_enabled: Some(
+                match props.statistics_enabled(&col) {
+                    EnabledStatistics::None => "none",
+                    EnabledStatistics::Chunk => "chunk",
+                    EnabledStatistics::Page => "page",
+                }
+                .into(),
+            ),
+            bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp),
+            bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv),
+            max_statistics_size: Some(props.max_statistics_size(&col)),
+        }
+    }
+
+    /// For testing only, take a single write's props and convert back into the session config.
+    /// (use identity to confirm correct.)
+    fn session_config_from_writer_props(props: &WriterProperties) -> TableParquetOptions {
+        let default_col = ColumnPath::from("col doesn't have specific config");
+        let default_col_props = extract_column_options(props, default_col);
+
+        let configured_col = ColumnPath::from(COL_NAME);
+        let configured_col_props = extract_column_options(props, configured_col);
+
+        let key_value_metadata = props
+            .key_value_metadata()
+            .map(|pairs| {
+                HashMap::from_iter(
+                    pairs
+                        .iter()
+                        .cloned()
+                        .map(|KeyValue { key, value }| (key, value)),
+                )
+            })
+            .unwrap_or_default();
+
+        let global_options_defaults = ParquetOptions::default();
+
+        let column_specific_options = if configured_col_props.eq(&default_col_props) {
+            HashMap::default()
+        } else {
+            HashMap::from([(COL_NAME.into(), configured_col_props)])
+        };
+
+        TableParquetOptions {
+            global: ParquetOptions {
+                // global options
+                data_pagesize_limit: props.dictionary_page_size_limit(),
+                write_batch_size: props.write_batch_size(),
+                writer_version: format!("{}.0", props.writer_version().as_num()),
+                dictionary_page_size_limit: props.dictionary_page_size_limit(),
+                max_row_group_size: props.max_row_group_size(),
+                created_by: props.created_by().to_string(),
+                column_index_truncate_length: props.column_index_truncate_length(),
+                data_page_row_count_limit: props.data_page_row_count_limit(),
+
+                // global options which set the default column props
+                encoding: default_col_props.encoding,
+                compression: default_col_props.compression,
+                dictionary_enabled: default_col_props.dictionary_enabled,
+                statistics_enabled: default_col_props.statistics_enabled,
+                max_statistics_size: default_col_props.max_statistics_size,
+                bloom_filter_on_write: default_col_props
+                    .bloom_filter_enabled
+                    .unwrap_or_default(),
+                bloom_filter_fpp: default_col_props.bloom_filter_fpp,
+                bloom_filter_ndv: default_col_props.bloom_filter_ndv,
+
+                // not in WriterProperties
+                enable_page_index: global_options_defaults.enable_page_index,
+                pruning: global_options_defaults.pruning,
+                skip_metadata: global_options_defaults.skip_metadata,
+                metadata_size_hint: global_options_defaults.metadata_size_hint,
+                pushdown_filters: global_options_defaults.pushdown_filters,
+                reorder_filters: global_options_defaults.reorder_filters,
+                allow_single_file_parallelism: global_options_defaults
+                    .allow_single_file_parallelism,
+                maximum_parallel_row_group_writers: global_options_defaults
+                    .maximum_parallel_row_group_writers,
+                maximum_buffered_record_batches_per_stream: global_options_defaults
+                    .maximum_buffered_record_batches_per_stream,
+                bloom_filter_on_read: global_options_defaults.bloom_filter_on_read,
+            },
+            column_specific_options,
+            key_value_metadata,
+        }
+    }
+
+    #[test]
+    fn table_parquet_opts_to_writer_props() {
+        // ParquetOptions, all props set to non-default
+        let parquet_options = parquet_options_with_non_defaults();
+
+        // TableParquetOptions, using ParquetOptions for global settings
+        let key = "foo".to_string();
+        let value = Some("bar".into());
+        let table_parquet_opts = TableParquetOptions {
+            global: parquet_options.clone(),
+            column_specific_options: [(
+                COL_NAME.into(),
+                column_options_with_non_defaults(&parquet_options),
+            )]
+            .into(),
+            key_value_metadata: [(key.clone(), value.clone())].into(),
+        };
+
+        let writer_props = WriterPropertiesBuilder::try_from(&table_parquet_opts)
+            .unwrap()
+            .build();
+        assert_eq!(
+            table_parquet_opts,
+            session_config_from_writer_props(&writer_props),
+            "the writer_props should have the same configuration as the session's TableParquetOptions",
+        );
+    }
+}

From 0021356fb6c68f4b7cba3802f941b8fa26676c1a Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Wed, 17 Jul 2024 12:10:49 -0700
Subject: [PATCH 075/357] feat: support `COUNT()` (#11229)

* feat: add count empty rewrite

* feat: make count support zero args

* docs: add apache license

* tests: make count() valid

* tests: more tests

* refactor: sketch `AggregateFunctionPlanner`

* refactor: cleanup `AggregateFunctionPlanner`

* feat: add back rule

* Revert "feat: add back rule"

This reverts commit 2c4fc0a695790936666e752101659531281b784b.

* Revert "refactor: cleanup `AggregateFunctionPlanner`"

This reverts commit 4550dbd8eb596947fa14e62c69c67255b1b36ba5.

* Revert "refactor: sketch `AggregateFunctionPlanner`"

This reverts commit 658671ead948a6c78416a53e672f3faf08f9c6bb.

* Apply suggestions from code review

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* refactor: PR feedback

* style: fix indent

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions-aggregate/src/count.rs   |   8 +-
 .../src/analyzer/count_wildcard_rule.rs       |   6 +-
 .../test_files/count_star_rule.slt            | 105 ++++++++++++++++++
 datafusion/sqllogictest/test_files/errors.slt |   4 -
 4 files changed, 114 insertions(+), 9 deletions(-)
 create mode 100644 datafusion/sqllogictest/test_files/count_star_rule.slt

diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index 0a667d35dce5e..0ead22e90a163 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -44,7 +44,7 @@ use datafusion_expr::{
     function::AccumulatorArgs, utils::format_state_name, Accumulator, AggregateUDFImpl,
     EmitTo, GroupsAccumulator, Signature, Volatility,
 };
-use datafusion_expr::{Expr, ReversedUDAF};
+use datafusion_expr::{Expr, ReversedUDAF, TypeSignature};
 use datafusion_physical_expr_common::aggregate::groups_accumulator::accumulate::accumulate_indices;
 use datafusion_physical_expr_common::{
     aggregate::count_distinct::{
@@ -95,7 +95,11 @@ impl Default for Count {
 impl Count {
     pub fn new() -> Self {
         Self {
-            signature: Signature::variadic_any(Volatility::Immutable),
+            signature: Signature::one_of(
+                // TypeSignature::Any(0) is required to handle `Count()` with no args
+                vec![TypeSignature::VariadicAny, TypeSignature::Any(0)],
+                Volatility::Immutable,
+            ),
         }
     }
 }
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index 959ffdaaa2129..fa8aeb86ed31e 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -35,7 +35,7 @@ pub struct CountWildcardRule {}
 
 impl CountWildcardRule {
     pub fn new() -> Self {
-        CountWildcardRule {}
+        Self {}
     }
 }
 
@@ -59,14 +59,14 @@ fn is_count_star_aggregate(aggregate_function: &AggregateFunction) -> bool {
             func_def: AggregateFunctionDefinition::UDF(udf),
             args,
             ..
-        } if udf.name() == "count" && args.len() == 1 && is_wildcard(&args[0]))
+        } if udf.name() == "count" && (args.len() == 1 && is_wildcard(&args[0]) || args.is_empty()))
 }
 
 fn is_count_star_window_aggregate(window_function: &WindowFunction) -> bool {
     let args = &window_function.args;
     matches!(window_function.fun,
         WindowFunctionDefinition::AggregateUDF(ref udaf)
-            if udaf.name() == "count" && args.len() == 1 && is_wildcard(&args[0]))
+            if udaf.name() == "count" && (args.len() == 1 && is_wildcard(&args[0]) || args.is_empty()))
 }
 
 fn analyze_internal(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
diff --git a/datafusion/sqllogictest/test_files/count_star_rule.slt b/datafusion/sqllogictest/test_files/count_star_rule.slt
new file mode 100644
index 0000000000000..99d358ad17f02
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/count_star_rule.slt
@@ -0,0 +1,105 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+statement ok
+CREATE TABLE t1 (a INTEGER, b INTEGER, c INTEGER);
+
+statement ok
+INSERT INTO t1 VALUES
+(1, 2, 3),
+(1, 5, 6),
+(2, 3, 5);
+
+statement ok
+CREATE TABLE t2 (a INTEGER, b INTEGER, c INTEGER);
+
+query TT
+EXPLAIN SELECT COUNT() FROM (SELECT 1 AS a, 2 AS b) AS t;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count()]]
+02)--SubqueryAlias: t
+03)----EmptyRelation
+physical_plan
+01)ProjectionExec: expr=[1 as count()]
+02)--PlaceholderRowExec
+
+query TT
+EXPLAIN SELECT t1.a, COUNT() FROM t1 GROUP BY t1.a;
+----
+logical_plan
+01)Aggregate: groupBy=[[t1.a]], aggr=[[count(Int64(1)) AS count()]]
+02)--TableScan: t1 projection=[a]
+physical_plan
+01)AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count()]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count()]
+06)----------MemoryExec: partitions=1, partition_sizes=[1]
+
+query TT
+EXPLAIN SELECT t1.a, COUNT() AS cnt FROM t1 GROUP BY t1.a HAVING COUNT() > 0;
+----
+logical_plan
+01)Projection: t1.a, count() AS cnt
+02)--Filter: count() > Int64(0)
+03)----Aggregate: groupBy=[[t1.a]], aggr=[[count(Int64(1)) AS count()]]
+04)------TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[a@0 as a, count()@1 as cnt]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----FilterExec: count()@1 > 0
+04)------AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count()]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count()]
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+
+query II
+SELECT t1.a, COUNT() AS cnt FROM t1 GROUP BY t1.a HAVING COUNT() > 1;
+----
+1 2
+
+query TT
+EXPLAIN SELECT a, COUNT() OVER (PARTITION BY a) AS count_a FROM t1;
+----
+logical_plan
+01)Projection: t1.a, count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS count_a
+02)--WindowAggr: windowExpr=[[count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+03)----TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[a@0 as a, count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as count_a]
+02)--WindowAggExec: wdw=[count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
+06)----------MemoryExec: partitions=1, partition_sizes=[1]
+
+query II
+SELECT a, COUNT() OVER (PARTITION BY a) AS count_a FROM t1 ORDER BY a;
+----
+1 2
+1 2
+2 1
+
+statement ok
+DROP TABLE t1;
+
+statement ok
+DROP TABLE t2;
diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt
index fa25f00974a9b..be7fdac71b57d 100644
--- a/datafusion/sqllogictest/test_files/errors.slt
+++ b/datafusion/sqllogictest/test_files/errors.slt
@@ -103,10 +103,6 @@ SELECT power(1, 2, 3);
 # Wrong window/aggregate function signature
 #
 
-# AggregateFunction with wrong number of arguments
-query error
-select count();
-
 # AggregateFunction with wrong number of arguments
 query error
 select avg(c1, c12) from aggregate_test_100;

From 81a6094753b7c02fff2706ca0e7112c198bbd102 Mon Sep 17 00:00:00 2001
From: JasonLi <lijingxuan92@126.com>
Date: Thu, 18 Jul 2024 03:11:25 +0800
Subject: [PATCH 076/357] chore: fix typos of common and core packages (#11520)

---
 datafusion/common/src/column.rs                  |  4 ++--
 datafusion/common/src/dfschema.rs                |  2 +-
 datafusion/common/src/functional_dependencies.rs |  2 +-
 datafusion/common/src/table_reference.rs         |  2 +-
 .../core/src/catalog/information_schema.rs       |  4 ++--
 .../core/src/datasource/file_format/arrow.rs     |  4 ++--
 .../core/src/datasource/file_format/csv.rs       |  2 +-
 .../core/src/datasource/file_format/parquet.rs   | 12 ++++++------
 .../src/datasource/file_format/write/demux.rs    |  2 +-
 .../file_format/write/orchestration.rs           |  6 +++---
 .../core/src/datasource/listing/helpers.rs       |  2 +-
 datafusion/core/src/datasource/listing/table.rs  |  4 ++--
 datafusion/core/src/datasource/listing/url.rs    |  2 +-
 datafusion/core/src/datasource/memory.rs         |  2 +-
 .../datasource/physical_plan/file_scan_config.rs |  4 ++--
 .../physical_plan/parquet/row_group_filter.rs    |  2 +-
 .../physical_plan/parquet/statistics.rs          | 16 ++++++++--------
 datafusion/core/src/datasource/schema_adapter.rs |  2 +-
 datafusion/core/src/datasource/streaming.rs      |  2 +-
 datafusion/core/src/execution/context/mod.rs     |  4 ++--
 datafusion/core/src/execution/session_state.rs   |  2 +-
 .../combine_partial_final_agg.rs                 |  4 ++--
 .../physical_optimizer/enforce_distribution.rs   |  4 ++--
 .../src/physical_optimizer/join_selection.rs     |  8 ++++----
 .../physical_optimizer/projection_pushdown.rs    |  2 +-
 .../core/src/physical_optimizer/pruning.rs       |  2 +-
 .../src/physical_optimizer/update_aggr_exprs.rs  |  2 +-
 datafusion/core/tests/dataframe/mod.rs           |  2 +-
 datafusion/core/tests/memory_limit/mod.rs        |  4 ++--
 .../core/tests/parquet/arrow_statistics.rs       |  6 +++---
 datafusion/core/tests/parquet/schema.rs          |  4 ++--
 datafusion/core/tests/sql/explain_analyze.rs     |  2 +-
 .../user_defined_scalar_functions.rs             |  8 ++++----
 33 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index e36a4f8906446..2e2bfff403403 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -214,7 +214,7 @@ impl Column {
                     for using_col in using_columns {
                         let all_matched = columns.iter().all(|f| using_col.contains(f));
                         // All matched fields belong to the same using column set, in orther words
-                        // the same join clause. We simply pick the qualifer from the first match.
+                        // the same join clause. We simply pick the qualifier from the first match.
                         if all_matched {
                             return Ok(columns[0].clone());
                         }
@@ -303,7 +303,7 @@ impl Column {
                     for using_col in using_columns {
                         let all_matched = columns.iter().all(|c| using_col.contains(c));
                         // All matched fields belong to the same using column set, in orther words
-                        // the same join clause. We simply pick the qualifer from the first match.
+                        // the same join clause. We simply pick the qualifier from the first match.
                         if all_matched {
                             return Ok(columns[0].clone());
                         }
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 7598cbc4d86a0..eef0a03df700b 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -1310,7 +1310,7 @@ mod tests {
         ])
     }
     #[test]
-    fn test_dfschema_to_schema_convertion() {
+    fn test_dfschema_to_schema_conversion() {
         let mut a_metadata = HashMap::new();
         a_metadata.insert("key".to_string(), "value".to_string());
         let a_field = Field::new("a", DataType::Int64, false).with_metadata(a_metadata);
diff --git a/datafusion/common/src/functional_dependencies.rs b/datafusion/common/src/functional_dependencies.rs
index d1c3747b52b4c..452f1862b2746 100644
--- a/datafusion/common/src/functional_dependencies.rs
+++ b/datafusion/common/src/functional_dependencies.rs
@@ -433,7 +433,7 @@ impl FunctionalDependencies {
     }
 
     /// This function ensures that functional dependencies involving uniquely
-    /// occuring determinant keys cover their entire table in terms of
+    /// occurring determinant keys cover their entire table in terms of
     /// dependent columns.
     pub fn extend_target_indices(&mut self, n_out: usize) {
         self.deps.iter_mut().for_each(
diff --git a/datafusion/common/src/table_reference.rs b/datafusion/common/src/table_reference.rs
index b6ccaa74d5fcf..67f3da4f48deb 100644
--- a/datafusion/common/src/table_reference.rs
+++ b/datafusion/common/src/table_reference.rs
@@ -62,7 +62,7 @@ impl std::fmt::Display for ResolvedTableReference {
 /// assert_eq!(table_reference, TableReference::bare("mytable"));
 ///
 /// // Get a table reference to 'MyTable' (note the capitalization) using double quotes
-/// // (programatically it is better to use `TableReference::bare` for this)
+/// // (programmatically it is better to use `TableReference::bare` for this)
 /// let table_reference = TableReference::from(r#""MyTable""#);
 /// assert_eq!(table_reference, TableReference::bare("MyTable"));
 ///
diff --git a/datafusion/core/src/catalog/information_schema.rs b/datafusion/core/src/catalog/information_schema.rs
index c953de6d16d38..a79f62e742bef 100644
--- a/datafusion/core/src/catalog/information_schema.rs
+++ b/datafusion/core/src/catalog/information_schema.rs
@@ -314,7 +314,7 @@ impl InformationSchemaTablesBuilder {
         table_name: impl AsRef<str>,
         table_type: TableType,
     ) {
-        // Note: append_value is actually infallable.
+        // Note: append_value is actually infallible.
         self.catalog_names.append_value(catalog_name.as_ref());
         self.schema_names.append_value(schema_name.as_ref());
         self.table_names.append_value(table_name.as_ref());
@@ -405,7 +405,7 @@ impl InformationSchemaViewBuilder {
         table_name: impl AsRef<str>,
         definition: Option<impl AsRef<str>>,
     ) {
-        // Note: append_value is actually infallable.
+        // Note: append_value is actually infallible.
         self.catalog_names.append_value(catalog_name.as_ref());
         self.schema_names.append_value(schema_name.as_ref());
         self.table_names.append_value(table_name.as_ref());
diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs
index 9a3aa2454e27e..6bcbd43476821 100644
--- a/datafusion/core/src/datasource/file_format/arrow.rs
+++ b/datafusion/core/src/datasource/file_format/arrow.rs
@@ -353,7 +353,7 @@ async fn infer_schema_from_file_stream(
     // Expected format:
     // <magic number "ARROW1"> - 6 bytes
     // <empty padding bytes [to 8 byte boundary]> - 2 bytes
-    // <continutation: 0xFFFFFFFF> - 4 bytes, not present below v0.15.0
+    // <continuation: 0xFFFFFFFF> - 4 bytes, not present below v0.15.0
     // <metadata_size: int32> - 4 bytes
     // <metadata_flatbuffer: bytes>
     // <rest of file bytes>
@@ -365,7 +365,7 @@ async fn infer_schema_from_file_stream(
     // Files should start with these magic bytes
     if bytes[0..6] != ARROW_MAGIC {
         return Err(ArrowError::ParseError(
-            "Arrow file does not contian correct header".to_string(),
+            "Arrow file does not contain correct header".to_string(),
         ))?;
     }
 
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index baeaf51fb56d1..5daa8447551b1 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -645,7 +645,7 @@ mod tests {
         let session_ctx = SessionContext::new_with_config(config);
         let state = session_ctx.state();
         let task_ctx = state.task_ctx();
-        // skip column 9 that overflows the automaticly discovered column type of i64 (u64 would work)
+        // skip column 9 that overflows the automatically discovered column type of i64 (u64 would work)
         let projection = Some(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12]);
         let exec =
             get_exec(&state, "aggregate_test_100.csv", projection, None, true).await?;
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 6271d8af37862..d4e77b911c9f1 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -854,14 +854,14 @@ fn spawn_column_parallel_row_group_writer(
     let mut col_array_channels = Vec::with_capacity(num_columns);
     for writer in col_writers.into_iter() {
         // Buffer size of this channel limits the number of arrays queued up for column level serialization
-        let (send_array, recieve_array) =
+        let (send_array, receive_array) =
             mpsc::channel::<ArrowLeafColumn>(max_buffer_size);
         col_array_channels.push(send_array);
 
         let reservation =
             MemoryConsumer::new("ParquetSink(ArrowColumnWriter)").register(pool);
         let task = SpawnedTask::spawn(column_serializer_task(
-            recieve_array,
+            receive_array,
             writer,
             reservation,
         ));
@@ -936,7 +936,7 @@ fn spawn_rg_join_and_finalize_task(
 /// row group is reached, the parallel tasks are joined on another separate task
 /// and sent to a concatenation task. This task immediately continues to work
 /// on the next row group in parallel. So, parquet serialization is parallelized
-/// accross both columns and row_groups, with a theoretical max number of parallel tasks
+/// across both columns and row_groups, with a theoretical max number of parallel tasks
 /// given by n_columns * num_row_groups.
 fn spawn_parquet_parallel_serialization_task(
     mut data: Receiver<RecordBatch>,
@@ -1560,7 +1560,7 @@ mod tests {
         // . batch1 written into first file and includes:
         //    - column c1 that has 3 rows with one null. Stats min and max of string column is missing for this test even the column has values
         // . batch2 written into second file and includes:
-        //    - column c2 that has 3 rows with one null. Stats min and max of int are avaialble and 1 and 2 respectively
+        //    - column c2 that has 3 rows with one null. Stats min and max of int are available and 1 and 2 respectively
         let store = Arc::new(LocalFileSystem::new()) as _;
         let (files, _file_names) = store_parquet(vec![batch1, batch2], false).await?;
 
@@ -2112,7 +2112,7 @@ mod tests {
         let path_parts = path.parts().collect::<Vec<_>>();
         assert_eq!(path_parts.len(), 1, "should not have path prefix");
 
-        assert_eq!(num_rows, 2, "file metdata to have 2 rows");
+        assert_eq!(num_rows, 2, "file metadata to have 2 rows");
         assert!(
             schema.iter().any(|col_schema| col_schema.name == "a"),
             "output file metadata should contain col a"
@@ -2208,7 +2208,7 @@ mod tests {
             );
             expected_partitions.remove(prefix);
 
-            assert_eq!(num_rows, 1, "file metdata to have 1 row");
+            assert_eq!(num_rows, 1, "file metadata to have 1 row");
             assert!(
                 !schema.iter().any(|col_schema| col_schema.name == "a"),
                 "output file metadata will not contain partitioned col a"
diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs
index e29c877442cf4..a58c77e313137 100644
--- a/datafusion/core/src/datasource/file_format/write/demux.rs
+++ b/datafusion/core/src/datasource/file_format/write/demux.rs
@@ -54,7 +54,7 @@ type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
 /// which should be contained within the same output file. The outer channel
 /// is used to send a dynamic number of inner channels, representing a dynamic
 /// number of total output files. The caller is also responsible to monitor
-/// the demux task for errors and abort accordingly. The single_file_ouput parameter
+/// the demux task for errors and abort accordingly. The single_file_output parameter
 /// overrides all other settings to force only a single file to be written.
 /// partition_by parameter will additionally split the input based on the unique
 /// values of a specific column `<https://github.com/apache/datafusion/issues/7744>``
diff --git a/datafusion/core/src/datasource/file_format/write/orchestration.rs b/datafusion/core/src/datasource/file_format/write/orchestration.rs
index 8bd0dae9f5a48..f788865b070f7 100644
--- a/datafusion/core/src/datasource/file_format/write/orchestration.rs
+++ b/datafusion/core/src/datasource/file_format/write/orchestration.rs
@@ -141,7 +141,7 @@ pub(crate) async fn stateless_serialize_and_write_files(
     // tracks the specific error triggering abort
     let mut triggering_error = None;
     // tracks if any errors were encountered in the process of aborting writers.
-    // if true, we may not have a guarentee that all written data was cleaned up.
+    // if true, we may not have a guarantee that all written data was cleaned up.
     let mut any_abort_errors = false;
     let mut join_set = JoinSet::new();
     while let Some((data_rx, serializer, writer)) = rx.recv().await {
@@ -188,7 +188,7 @@ pub(crate) async fn stateless_serialize_and_write_files(
             true => return internal_err!("Error encountered during writing to ObjectStore and failed to abort all writers. Partial result may have been written."),
             false => match triggering_error {
                 Some(e) => return Err(e),
-                None => return internal_err!("Unknown Error encountered during writing to ObjectStore. All writers succesfully aborted.")
+                None => return internal_err!("Unknown Error encountered during writing to ObjectStore. All writers successfully aborted.")
             }
         }
     }
@@ -268,7 +268,7 @@ pub(crate) async fn stateless_multipart_put(
     r2?;
 
     let total_count = rx_row_cnt.await.map_err(|_| {
-        internal_datafusion_err!("Did not receieve row count from write coordinater")
+        internal_datafusion_err!("Did not receive row count from write coordinator")
     })?;
 
     Ok(total_count)
diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs
index c1ce4cc5b6c5a..bfc33ce0bd73f 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -759,7 +759,7 @@ mod tests {
                 .otherwise(lit(false))
                 .expect("valid case expr"))
         ));
-        // static expression not relvant in this context but we
+        // static expression not relevant in this context but we
         // test it as an edge case anyway in case we want to generalize
         // this helper function
         assert!(expr_applicable_for_cols(&[], &lit(true)));
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index ea4d396a14cb3..1a7390d46f898 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -695,8 +695,8 @@ impl ListingTable {
     }
 
     /// Specify the SQL definition for this table, if any
-    pub fn with_definition(mut self, defintion: Option<String>) -> Self {
-        self.definition = defintion;
+    pub fn with_definition(mut self, definition: Option<String>) -> Self {
+        self.definition = definition;
         self
     }
 
diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs
index 73fffd8abaeda..7566df628ed71 100644
--- a/datafusion/core/src/datasource/listing/url.rs
+++ b/datafusion/core/src/datasource/listing/url.rs
@@ -53,7 +53,7 @@ impl ListingTableUrl {
     /// subdirectories.
     ///
     /// Similarly `s3://BUCKET/blob.csv` refers to `blob.csv` in the S3 bucket `BUCKET`,
-    /// wherease `s3://BUCKET/foo/` refers to all objects with the prefix `foo/` in the
+    /// whereas `s3://BUCKET/foo/` refers to all objects with the prefix `foo/` in the
     /// S3 bucket `BUCKET`
     ///
     /// # URL Encoding
diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs
index aab42285a0b2f..5c49282095598 100644
--- a/datafusion/core/src/datasource/memory.rs
+++ b/datafusion/core/src/datasource/memory.rs
@@ -644,7 +644,7 @@ mod tests {
         Ok(partitions)
     }
 
-    /// Returns the value of results. For example, returns 6 given the follwing
+    /// Returns the value of results. For example, returns 6 given the following
     ///
     /// ```text
     /// +-------+,
diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
index f5d3c7a6410d7..17850ea7585aa 100644
--- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
@@ -56,7 +56,7 @@ pub fn wrap_partition_type_in_dict(val_type: DataType) -> DataType {
 }
 
 /// Convert a [`ScalarValue`] of partition columns to a type, as
-/// decribed in the documentation of [`wrap_partition_type_in_dict`],
+/// described in the documentation of [`wrap_partition_type_in_dict`],
 /// which can wrap the types.
 pub fn wrap_partition_value_in_dict(val: ScalarValue) -> ScalarValue {
     ScalarValue::Dictionary(Box::new(DataType::UInt16), Box::new(val))
@@ -682,7 +682,7 @@ mod tests {
             vec![table_partition_col.clone()],
         );
 
-        // verify the proj_schema inlcudes the last column and exactly the same the field it is defined
+        // verify the proj_schema includes the last column and exactly the same the field it is defined
         let (proj_schema, _proj_statistics, _) = conf.project();
         assert_eq!(proj_schema.fields().len(), file_schema.fields().len() + 1);
         assert_eq!(
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
index 9bc79805746f2..170beb15ead25 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
@@ -1123,7 +1123,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_row_group_bloom_filter_pruning_predicate_mutiple_expr() {
+    async fn test_row_group_bloom_filter_pruning_predicate_multiple_expr() {
         BloomFilterTest::new_data_index_bloom_encoding_stats()
             .with_expect_all_pruned()
             // generate pruning predicate `(String = "Hello_Not_exists" OR String = "Hello_Not_exists2")`
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
index 59369aba57a97..44e22f778075d 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
@@ -2532,10 +2532,10 @@ mod test {
 
     fn timestamp_seconds_array(
         input: impl IntoIterator<Item = Option<i64>>,
-        timzezone: Option<&str>,
+        timezone: Option<&str>,
     ) -> ArrayRef {
         let array: TimestampSecondArray = input.into_iter().collect();
-        match timzezone {
+        match timezone {
             Some(tz) => Arc::new(array.with_timezone(tz)),
             None => Arc::new(array),
         }
@@ -2543,10 +2543,10 @@ mod test {
 
     fn timestamp_milliseconds_array(
         input: impl IntoIterator<Item = Option<i64>>,
-        timzezone: Option<&str>,
+        timezone: Option<&str>,
     ) -> ArrayRef {
         let array: TimestampMillisecondArray = input.into_iter().collect();
-        match timzezone {
+        match timezone {
             Some(tz) => Arc::new(array.with_timezone(tz)),
             None => Arc::new(array),
         }
@@ -2554,10 +2554,10 @@ mod test {
 
     fn timestamp_microseconds_array(
         input: impl IntoIterator<Item = Option<i64>>,
-        timzezone: Option<&str>,
+        timezone: Option<&str>,
     ) -> ArrayRef {
         let array: TimestampMicrosecondArray = input.into_iter().collect();
-        match timzezone {
+        match timezone {
             Some(tz) => Arc::new(array.with_timezone(tz)),
             None => Arc::new(array),
         }
@@ -2565,10 +2565,10 @@ mod test {
 
     fn timestamp_nanoseconds_array(
         input: impl IntoIterator<Item = Option<i64>>,
-        timzezone: Option<&str>,
+        timezone: Option<&str>,
     ) -> ArrayRef {
         let array: TimestampNanosecondArray = input.into_iter().collect();
-        match timzezone {
+        match timezone {
             Some(tz) => Arc::new(array.with_timezone(tz)),
             None => Arc::new(array),
         }
diff --git a/datafusion/core/src/datasource/schema_adapter.rs b/datafusion/core/src/datasource/schema_adapter.rs
index e8b64e90900c7..715e2da5d9781 100644
--- a/datafusion/core/src/datasource/schema_adapter.rs
+++ b/datafusion/core/src/datasource/schema_adapter.rs
@@ -251,7 +251,7 @@ mod tests {
 
     #[tokio::test]
     async fn can_override_schema_adapter() {
-        // Test shows that SchemaAdapter can add a column that doesn't existin in the
+        // Test shows that SchemaAdapter can add a column that doesn't existing in the
         // record batches returned from parquet.  This can be useful for schema evolution
         // where older files may not have all columns.
         let tmp_dir = TempDir::new().unwrap();
diff --git a/datafusion/core/src/datasource/streaming.rs b/datafusion/core/src/datasource/streaming.rs
index 0ba6f85ec3e2b..205faee43334f 100644
--- a/datafusion/core/src/datasource/streaming.rs
+++ b/datafusion/core/src/datasource/streaming.rs
@@ -50,7 +50,7 @@ impl StreamingTable {
             if !schema.contains(partition_schema) {
                 debug!(
                     "target schema does not contain partition schema. \
-                        Target_schema: {schema:?}. Partiton Schema: {partition_schema:?}"
+                        Target_schema: {schema:?}. Partition Schema: {partition_schema:?}"
                 );
                 return plan_err!("Mismatch between schema and batches");
             }
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 640a9b14a65f1..ac48788edb197 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -500,7 +500,7 @@ impl SessionContext {
         self.execute_logical_plan(plan).await
     }
 
-    /// Creates logical expresssions from SQL query text.
+    /// Creates logical expressions from SQL query text.
     ///
     /// # Example: Parsing SQL queries
     ///
@@ -1352,7 +1352,7 @@ impl SessionContext {
         self.state.write().register_catalog_list(catalog_list)
     }
 
-    /// Registers a [`ConfigExtension`] as a table option extention that can be
+    /// Registers a [`ConfigExtension`] as a table option extension that can be
     /// referenced from SQL statements executed against this context.
     pub fn register_table_options_extension<T: ConfigExtension>(&self, extension: T) {
         self.state
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 03ce8d3b5892a..bc5062893c867 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -1019,7 +1019,7 @@ impl SessionStateBuilder {
         self
     }
 
-    /// Set tje [`PhysicalOptimizerRule`]s used to optimize plans.
+    /// Set the [`PhysicalOptimizerRule`]s used to optimize plans.
     pub fn with_physical_optimizer_rules(
         mut self,
         physical_optimizers: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,
diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index 940b09131a771..56cdbd645285d 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -354,7 +354,7 @@ mod tests {
             PhysicalGroupBy::default(),
             aggr_expr,
         );
-        // should combine the Partial/Final AggregateExecs to tne Single AggregateExec
+        // should combine the Partial/Final AggregateExecs to the Single AggregateExec
         let expected = &[
             "AggregateExec: mode=Single, gby=[], aggr=[COUNT(1)]",
             "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c]",
@@ -394,7 +394,7 @@ mod tests {
         let final_group_by = PhysicalGroupBy::new_single(groups);
 
         let plan = final_aggregate_exec(partial_agg, final_group_by, aggr_expr);
-        // should combine the Partial/Final AggregateExecs to tne Single AggregateExec
+        // should combine the Partial/Final AggregateExecs to the Single AggregateExec
         let expected = &[
             "AggregateExec: mode=Single, gby=[c@2 as c], aggr=[Sum(b)]",
             "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c]",
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index f7c2aee578ba2..818b2304fe097 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -392,7 +392,7 @@ fn adjust_input_keys_ordering(
         let expr = proj.expr();
         // For Projection, we need to transform the requirements to the columns before the Projection
         // And then to push down the requirements
-        // Construct a mapping from new name to the orginal Column
+        // Construct a mapping from new name to the original Column
         let new_required = map_columns_before_projection(&requirements.data, expr);
         if new_required.len() == requirements.data.len() {
             requirements.children[0].data = new_required;
@@ -566,7 +566,7 @@ fn shift_right_required(
         })
         .collect::<Vec<_>>();
 
-    // if the parent required are all comming from the right side, the requirements can be pushdown
+    // if the parent required are all coming from the right side, the requirements can be pushdown
     (new_right_required.len() == parent_required.len()).then_some(new_right_required)
 }
 
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index 1613e5089860c..6dfe17632a653 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -719,7 +719,7 @@ mod tests_statistical {
 
     use rstest::rstest;
 
-    /// Return statistcs for empty table
+    /// Return statistics for empty table
     fn empty_statistics() -> Statistics {
         Statistics {
             num_rows: Precision::Absent,
@@ -737,7 +737,7 @@ mod tests_statistical {
         )
     }
 
-    /// Return statistcs for small table
+    /// Return statistics for small table
     fn small_statistics() -> Statistics {
         let (threshold_num_rows, threshold_byte_size) = get_thresholds();
         Statistics {
@@ -747,7 +747,7 @@ mod tests_statistical {
         }
     }
 
-    /// Return statistcs for big table
+    /// Return statistics for big table
     fn big_statistics() -> Statistics {
         let (threshold_num_rows, threshold_byte_size) = get_thresholds();
         Statistics {
@@ -757,7 +757,7 @@ mod tests_statistical {
         }
     }
 
-    /// Return statistcs for big table
+    /// Return statistics for big table
     fn bigger_statistics() -> Statistics {
         let (threshold_num_rows, threshold_byte_size) = get_thresholds();
         Statistics {
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 3c2be59f75040..24d9f31687f92 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -18,7 +18,7 @@
 //! This file implements the `ProjectionPushdown` physical optimization rule.
 //! The function [`remove_unnecessary_projections`] tries to push down all
 //! projections one by one if the operator below is amenable to this. If a
-//! projection reaches a source, it can even dissappear from the plan entirely.
+//! projection reaches a source, it can even disappear from the plan entirely.
 
 use std::collections::HashMap;
 use std::sync::Arc;
diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs
index a7ce29bdc7e3b..a1ace229985e7 100644
--- a/datafusion/core/src/physical_optimizer/pruning.rs
+++ b/datafusion/core/src/physical_optimizer/pruning.rs
@@ -623,7 +623,7 @@ impl PruningPredicate {
     ///
     /// This is useful to avoid fetching statistics for columns that will not be
     /// used in the predicate. For example, it can be used to avoid reading
-    /// uneeded bloom filters (a non trivial operation).
+    /// unneeded bloom filters (a non trivial operation).
     pub fn literal_columns(&self) -> Vec<String> {
         let mut seen = HashSet::new();
         self.literal_guarantees
diff --git a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
index 1ad4179cefd8a..d6b4c33384856 100644
--- a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
+++ b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
@@ -128,7 +128,7 @@ impl PhysicalOptimizerRule for OptimizeAggregateOrder {
 /// # Returns
 ///
 /// Returns `Ok(converted_aggr_exprs)` if the conversion process completes
-/// successfully. Any errors occuring during the conversion process are
+/// successfully. Any errors occurring during the conversion process are
 /// passed through.
 fn try_convert_aggregate_if_better(
     aggr_exprs: Vec<Arc<dyn AggregateExpr>>,
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index 1b2a6770cf013..9f7bd5227e346 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -99,7 +99,7 @@ async fn test_count_wildcard_on_where_in() -> Result<()> {
 
     // In the same SessionContext, AliasGenerator will increase subquery_alias id by 1
     // https://github.com/apache/datafusion/blame/cf45eb9020092943b96653d70fafb143cc362e19/datafusion/optimizer/src/alias.rs#L40-L43
-    // for compare difference betwwen sql and df logical plan, we need to create a new SessionContext here
+    // for compare difference between sql and df logical plan, we need to create a new SessionContext here
     let ctx = create_join_context()?;
     let df_results = ctx
         .table("t1")
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index 1d151f9fd3683..f4f4f8cd89cb1 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -259,7 +259,7 @@ async fn sort_spill_reservation() {
         .with_query("select * from t ORDER BY a , b DESC")
     // enough memory to sort if we don't try to merge it all at once
         .with_memory_limit(partition_size)
-    // use a single partiton so only a sort is needed
+    // use a single partition so only a sort is needed
         .with_scenario(scenario)
         .with_disk_manager_config(DiskManagerConfig::NewOs)
         .with_expected_plan(
@@ -361,7 +361,7 @@ struct TestCase {
     /// How should the disk manager (that allows spilling) be
     /// configured? Defaults to `Disabled`
     disk_manager_config: DiskManagerConfig,
-    /// Expected explain plan, if non emptry
+    /// Expected explain plan, if non-empty
     expected_plan: Vec<String>,
     /// Is the plan expected to pass? Defaults to false
     expected_success: bool,
diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs
index 2b4ba0b171330..623f321ce152e 100644
--- a/datafusion/core/tests/parquet/arrow_statistics.rs
+++ b/datafusion/core/tests/parquet/arrow_statistics.rs
@@ -579,7 +579,7 @@ async fn test_data_page_stats_with_all_null_page() {
 
 /////////////// MORE GENERAL TESTS //////////////////////
 // . Many columns in a file
-// . Differnet data types
+// . Different data types
 // . Different row group sizes
 
 // Four different integer types
@@ -1733,7 +1733,7 @@ async fn test_float16() {
 
 #[tokio::test]
 async fn test_decimal() {
-    // This creates a parquet file of 1 column "decimal_col" with decimal data type and precicion 9, scale 2
+    // This creates a parquet file of 1 column "decimal_col" with decimal data type and precision 9, scale 2
     // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
     let reader = TestReader {
         scenario: Scenario::Decimal,
@@ -1763,7 +1763,7 @@ async fn test_decimal() {
 }
 #[tokio::test]
 async fn test_decimal_256() {
-    // This creates a parquet file of 1 column "decimal256_col" with decimal data type and precicion 9, scale 2
+    // This creates a parquet file of 1 column "decimal256_col" with decimal data type and precision 9, scale 2
     // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
     let reader = TestReader {
         scenario: Scenario::Decimal256,
diff --git a/datafusion/core/tests/parquet/schema.rs b/datafusion/core/tests/parquet/schema.rs
index 1b572914d7bda..e13fbad24426f 100644
--- a/datafusion/core/tests/parquet/schema.rs
+++ b/datafusion/core/tests/parquet/schema.rs
@@ -25,7 +25,7 @@ use datafusion_common::assert_batches_sorted_eq;
 
 #[tokio::test]
 async fn schema_merge_ignores_metadata_by_default() {
-    // Create several parquet files in same directoty / table with
+    // Create several parquet files in same directory / table with
     // same schema but different metadata
     let tmp_dir = TempDir::new().unwrap();
     let table_dir = tmp_dir.path().join("parquet_test");
@@ -103,7 +103,7 @@ async fn schema_merge_ignores_metadata_by_default() {
 
 #[tokio::test]
 async fn schema_merge_can_preserve_metadata() {
-    // Create several parquet files in same directoty / table with
+    // Create several parquet files in same directory / table with
     // same schema but different metadata
     let tmp_dir = TempDir::new().unwrap();
     let table_dir = tmp_dir.path().join("parquet_test");
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index 502590f9e2e22..fe4777b04396c 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -352,7 +352,7 @@ async fn csv_explain_verbose() {
     // flatten to a single string
     let actual = actual.into_iter().map(|r| r.join("\t")).collect::<String>();
 
-    // Don't actually test the contents of the debuging output (as
+    // Don't actually test the contents of the debugging output (as
     // that may change and keeping this test updated will be a
     // pain). Instead just check for a few key pieces.
     assert_contains!(&actual, "logical_plan");
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index 5847952ae6a61..219f6c26cf8f7 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -941,11 +941,11 @@ async fn create_scalar_function_from_sql_statement() -> Result<()> {
 
 /// Saves whatever is passed to it as a scalar function
 #[derive(Debug, Default)]
-struct RecordingFunctonFactory {
+struct RecordingFunctionFactory {
     calls: Mutex<Vec<CreateFunction>>,
 }
 
-impl RecordingFunctonFactory {
+impl RecordingFunctionFactory {
     fn new() -> Self {
         Self::default()
     }
@@ -957,7 +957,7 @@ impl RecordingFunctonFactory {
 }
 
 #[async_trait::async_trait]
-impl FunctionFactory for RecordingFunctonFactory {
+impl FunctionFactory for RecordingFunctionFactory {
     async fn create(
         &self,
         _state: &SessionState,
@@ -972,7 +972,7 @@ impl FunctionFactory for RecordingFunctonFactory {
 
 #[tokio::test]
 async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<()> {
-    let function_factory = Arc::new(RecordingFunctonFactory::new());
+    let function_factory = Arc::new(RecordingFunctionFactory::new());
     let ctx = SessionContext::new().with_function_factory(function_factory.clone());
 
     let sql = r#"

From adcfd85b7732c029e4d561db5a90e6f518cf763f Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Wed, 17 Jul 2024 21:13:27 +0200
Subject: [PATCH 077/357] Move spill related functions to spill.rs (#11509)

---
 .../physical-plan/src/aggregates/row_hash.rs  |   3 +-
 datafusion/physical-plan/src/lib.rs           | 116 ++++--------------
 datafusion/physical-plan/src/sorts/sort.rs    |   8 +-
 datafusion/physical-plan/src/spill.rs         |  87 +++++++++++++
 4 files changed, 120 insertions(+), 94 deletions(-)
 create mode 100644 datafusion/physical-plan/src/spill.rs

diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index a1d3378181c28..167ca72407503 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -31,8 +31,9 @@ use crate::common::IPCWriter;
 use crate::metrics::{BaselineMetrics, RecordOutput};
 use crate::sorts::sort::sort_batch;
 use crate::sorts::streaming_merge;
+use crate::spill::read_spill_as_stream;
 use crate::stream::RecordBatchStreamAdapter;
-use crate::{aggregates, read_spill_as_stream, ExecutionPlan, PhysicalExpr};
+use crate::{aggregates, ExecutionPlan, PhysicalExpr};
 use crate::{RecordBatchStream, SendableRecordBatchStream};
 
 use arrow::array::*;
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index dc736993a4533..046977da0a37e 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -21,31 +21,41 @@
 
 use std::any::Any;
 use std::fmt::Debug;
-use std::fs::File;
-use std::io::BufReader;
-use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
-use crate::coalesce_partitions::CoalescePartitionsExec;
-use crate::display::DisplayableExecutionPlan;
-use crate::metrics::MetricsSet;
-use crate::repartition::RepartitionExec;
-use crate::sorts::sort_preserving_merge::SortPreservingMergeExec;
-
 use arrow::datatypes::SchemaRef;
-use arrow::ipc::reader::FileReader;
 use arrow::record_batch::RecordBatch;
+use futures::stream::{StreamExt, TryStreamExt};
+use tokio::task::JoinSet;
+
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{exec_datafusion_err, exec_err, Result};
+pub use datafusion_common::hash_utils;
+pub use datafusion_common::utils::project_schema;
+use datafusion_common::{exec_err, Result};
+pub use datafusion_common::{internal_err, ColumnStatistics, Statistics};
 use datafusion_execution::TaskContext;
+pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
+pub use datafusion_expr::{Accumulator, ColumnarValue};
+pub use datafusion_physical_expr::window::WindowExpr;
+pub use datafusion_physical_expr::{
+    expressions, functions, udf, AggregateExpr, Distribution, Partitioning, PhysicalExpr,
+};
 use datafusion_physical_expr::{
     EquivalenceProperties, LexOrdering, PhysicalSortExpr, PhysicalSortRequirement,
 };
 
-use futures::stream::{StreamExt, TryStreamExt};
-use log::debug;
-use tokio::sync::mpsc::Sender;
-use tokio::task::JoinSet;
+use crate::coalesce_partitions::CoalescePartitionsExec;
+use crate::display::DisplayableExecutionPlan;
+pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
+pub use crate::metrics::Metric;
+use crate::metrics::MetricsSet;
+pub use crate::ordering::InputOrderMode;
+use crate::repartition::RepartitionExec;
+use crate::sorts::sort_preserving_merge::SortPreservingMergeExec;
+pub use crate::stream::EmptyRecordBatchStream;
+use crate::stream::RecordBatchStreamAdapter;
+pub use crate::topk::TopK;
+pub use crate::visitor::{accept, visit_execution_plan, ExecutionPlanVisitor};
 
 mod ordering;
 mod topk;
@@ -70,6 +80,7 @@ pub mod projection;
 pub mod recursive_query;
 pub mod repartition;
 pub mod sorts;
+pub mod spill;
 pub mod stream;
 pub mod streaming;
 pub mod tree_node;
@@ -79,29 +90,6 @@ pub mod values;
 pub mod windows;
 pub mod work_table;
 
-pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
-pub use crate::metrics::Metric;
-pub use crate::ordering::InputOrderMode;
-pub use crate::topk::TopK;
-pub use crate::visitor::{accept, visit_execution_plan, ExecutionPlanVisitor};
-
-pub use datafusion_common::hash_utils;
-pub use datafusion_common::utils::project_schema;
-pub use datafusion_common::{internal_err, ColumnStatistics, Statistics};
-pub use datafusion_expr::{Accumulator, ColumnarValue};
-pub use datafusion_physical_expr::window::WindowExpr;
-pub use datafusion_physical_expr::{
-    expressions, functions, udf, AggregateExpr, Distribution, Partitioning, PhysicalExpr,
-};
-
-// Backwards compatibility
-use crate::common::IPCWriter;
-pub use crate::stream::EmptyRecordBatchStream;
-use crate::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter};
-use datafusion_execution::disk_manager::RefCountedTempFile;
-use datafusion_execution::memory_pool::human_readable_size;
-pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
-
 pub mod udaf {
     pub use datafusion_physical_expr_common::aggregate::{
         create_aggregate_expr, AggregateFunctionExpr,
@@ -903,63 +891,13 @@ pub fn get_plan_string(plan: &Arc<dyn ExecutionPlan>) -> Vec<String> {
     actual.iter().map(|elem| elem.to_string()).collect()
 }
 
-/// Read spilled batches from the disk
-///
-/// `path` - temp file
-/// `schema` - batches schema, should be the same across batches
-/// `buffer` - internal buffer of capacity batches
-pub fn read_spill_as_stream(
-    path: RefCountedTempFile,
-    schema: SchemaRef,
-    buffer: usize,
-) -> Result<SendableRecordBatchStream> {
-    let mut builder = RecordBatchReceiverStream::builder(schema, buffer);
-    let sender = builder.tx();
-
-    builder.spawn_blocking(move || read_spill(sender, path.path()));
-
-    Ok(builder.build())
-}
-
-/// Spills in-memory `batches` to disk.
-///
-/// Returns total number of the rows spilled to disk.
-pub fn spill_record_batches(
-    batches: Vec<RecordBatch>,
-    path: PathBuf,
-    schema: SchemaRef,
-) -> Result<usize> {
-    let mut writer = IPCWriter::new(path.as_ref(), schema.as_ref())?;
-    for batch in batches {
-        writer.write(&batch)?;
-    }
-    writer.finish()?;
-    debug!(
-        "Spilled {} batches of total {} rows to disk, memory released {}",
-        writer.num_batches,
-        writer.num_rows,
-        human_readable_size(writer.num_bytes),
-    );
-    Ok(writer.num_rows)
-}
-
-fn read_spill(sender: Sender<Result<RecordBatch>>, path: &Path) -> Result<()> {
-    let file = BufReader::new(File::open(path)?);
-    let reader = FileReader::try_new(file, None)?;
-    for batch in reader {
-        sender
-            .blocking_send(batch.map_err(Into::into))
-            .map_err(|e| exec_datafusion_err!("{e}"))?;
-    }
-    Ok(())
-}
-
 #[cfg(test)]
 mod tests {
     use std::any::Any;
     use std::sync::Arc;
 
     use arrow_schema::{Schema, SchemaRef};
+
     use datafusion_common::{Result, Statistics};
     use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index f347a0f5b6d56..5b99f8bc71617 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -30,13 +30,13 @@ use crate::metrics::{
     BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet,
 };
 use crate::sorts::streaming_merge::streaming_merge;
+use crate::spill::{read_spill_as_stream, spill_record_batches};
 use crate::stream::RecordBatchStreamAdapter;
 use crate::topk::TopK;
 use crate::{
-    read_spill_as_stream, spill_record_batches, DisplayAs, DisplayFormatType,
-    Distribution, EmptyRecordBatchStream, ExecutionMode, ExecutionPlan,
-    ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream,
-    Statistics,
+    DisplayAs, DisplayFormatType, Distribution, EmptyRecordBatchStream, ExecutionMode,
+    ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties,
+    SendableRecordBatchStream, Statistics,
 };
 
 use arrow::compute::{concat_batches, lexsort_to_indices, take, SortColumn};
diff --git a/datafusion/physical-plan/src/spill.rs b/datafusion/physical-plan/src/spill.rs
new file mode 100644
index 0000000000000..0018a27bd22bb
--- /dev/null
+++ b/datafusion/physical-plan/src/spill.rs
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines the spilling functions
+
+use std::fs::File;
+use std::io::BufReader;
+use std::path::{Path, PathBuf};
+
+use arrow::datatypes::SchemaRef;
+use arrow::ipc::reader::FileReader;
+use arrow::record_batch::RecordBatch;
+use log::debug;
+use tokio::sync::mpsc::Sender;
+
+use datafusion_common::{exec_datafusion_err, Result};
+use datafusion_execution::disk_manager::RefCountedTempFile;
+use datafusion_execution::memory_pool::human_readable_size;
+use datafusion_execution::SendableRecordBatchStream;
+
+use crate::common::IPCWriter;
+use crate::stream::RecordBatchReceiverStream;
+
+/// Read spilled batches from the disk
+///
+/// `path` - temp file
+/// `schema` - batches schema, should be the same across batches
+/// `buffer` - internal buffer of capacity batches
+pub fn read_spill_as_stream(
+    path: RefCountedTempFile,
+    schema: SchemaRef,
+    buffer: usize,
+) -> Result<SendableRecordBatchStream> {
+    let mut builder = RecordBatchReceiverStream::builder(schema, buffer);
+    let sender = builder.tx();
+
+    builder.spawn_blocking(move || read_spill(sender, path.path()));
+
+    Ok(builder.build())
+}
+
+/// Spills in-memory `batches` to disk.
+///
+/// Returns total number of the rows spilled to disk.
+pub fn spill_record_batches(
+    batches: Vec<RecordBatch>,
+    path: PathBuf,
+    schema: SchemaRef,
+) -> Result<usize> {
+    let mut writer = IPCWriter::new(path.as_ref(), schema.as_ref())?;
+    for batch in batches {
+        writer.write(&batch)?;
+    }
+    writer.finish()?;
+    debug!(
+        "Spilled {} batches of total {} rows to disk, memory released {}",
+        writer.num_batches,
+        writer.num_rows,
+        human_readable_size(writer.num_bytes),
+    );
+    Ok(writer.num_rows)
+}
+
+fn read_spill(sender: Sender<Result<RecordBatch>>, path: &Path) -> Result<()> {
+    let file = BufReader::new(File::open(path)?);
+    let reader = FileReader::try_new(file, None)?;
+    for batch in reader {
+        sender
+            .blocking_send(batch.map_err(Into::into))
+            .map_err(|e| exec_datafusion_err!("{e}"))?;
+    }
+    Ok(())
+}

From 4b840c08830801f7ceafb7d4a072c6ffd565a8ca Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Wed, 17 Jul 2024 14:05:32 -0700
Subject: [PATCH 078/357] Add tests that show the different defaults for
 `ArrowWriter` and `TableParquetOptions`  (#11524)

* test(11367): define current behavior of parquet writer configuration defaults

* chore(11367): update code comments to make it more explicit on the mismatches
---
 .../common/src/file_options/parquet_writer.rs | 295 +++++++++++++++++-
 1 file changed, 294 insertions(+), 1 deletion(-)

diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index a14cbdecf6011..dd4bb8ce505ea 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -369,7 +369,13 @@ pub(crate) fn parse_statistics_string(str_setting: &str) -> Result<EnabledStatis
 #[cfg(feature = "parquet")]
 #[cfg(test)]
 mod tests {
-    use parquet::{basic::Compression, file::properties::EnabledStatistics};
+    use parquet::{
+        basic::Compression,
+        file::properties::{
+            BloomFilterProperties, EnabledStatistics, DEFAULT_BLOOM_FILTER_FPP,
+            DEFAULT_BLOOM_FILTER_NDV,
+        },
+    };
     use std::collections::HashMap;
 
     use crate::config::{ColumnOptions, ParquetOptions};
@@ -566,4 +572,291 @@ mod tests {
             "the writer_props should have the same configuration as the session's TableParquetOptions",
         );
     }
+
+    /// Ensure that the configuration defaults for writing parquet files are
+    /// consistent with the options in arrow-rs
+    #[test]
+    fn test_defaults_match() {
+        // ensure the global settings are the same
+        let default_table_writer_opts = TableParquetOptions::default();
+        let default_parquet_opts = ParquetOptions::default();
+        assert_eq!(
+            default_table_writer_opts.global,
+            default_parquet_opts,
+            "should have matching defaults for TableParquetOptions.global and ParquetOptions",
+        );
+
+        // WriterProperties::default, a.k.a. using extern parquet's defaults
+        let default_writer_props = WriterProperties::new();
+
+        // WriterProperties::try_from(TableParquetOptions::default), a.k.a. using datafusion's defaults
+        let from_datafusion_defaults =
+            WriterPropertiesBuilder::try_from(&default_table_writer_opts)
+                .unwrap()
+                .build();
+
+        // Expected: how the defaults should not match
+        assert_ne!(
+            default_writer_props.created_by(),
+            from_datafusion_defaults.created_by(),
+            "should have different created_by sources",
+        );
+        assert!(
+            default_writer_props.created_by().starts_with("parquet-rs version"),
+            "should indicate that writer_props defaults came from the extern parquet crate",
+        );
+        assert!(
+            default_table_writer_opts
+                .global
+                .created_by
+                .starts_with("datafusion version"),
+            "should indicate that table_parquet_opts defaults came from datafusion",
+        );
+
+        // Expected: the remaining should match
+        let same_created_by = default_table_writer_opts.global.created_by.clone();
+        let mut from_extern_parquet =
+            session_config_from_writer_props(&default_writer_props);
+        from_extern_parquet.global.created_by = same_created_by;
+        // TODO: the remaining defaults do not match!
+        // refer to https://github.com/apache/datafusion/issues/11367
+        assert_ne!(
+            default_table_writer_opts,
+            from_extern_parquet,
+            "the default writer_props should have the same configuration as the session's default TableParquetOptions",
+        );
+
+        // Below here itemizes how the defaults **should** match, but do not.
+
+        // TODO: compression defaults do not match
+        // refer to https://github.com/apache/datafusion/issues/11367
+        assert_eq!(
+            default_writer_props.compression(&"default".into()),
+            Compression::UNCOMPRESSED,
+            "extern parquet's default is None"
+        );
+        assert!(
+            matches!(
+                from_datafusion_defaults.compression(&"default".into()),
+                Compression::ZSTD(_)
+            ),
+            "datafusion's default is zstd"
+        );
+
+        // TODO: data_page_row_count_limit defaults do not match
+        // refer to https://github.com/apache/datafusion/issues/11367
+        assert_eq!(
+            default_writer_props.data_page_row_count_limit(),
+            20_000,
+            "extern parquet's default data_page_row_count_limit is 20_000"
+        );
+        assert_eq!(
+            from_datafusion_defaults.data_page_row_count_limit(),
+            usize::MAX,
+            "datafusion's default is usize::MAX"
+        );
+
+        // TODO: column_index_truncate_length do not match
+        // refer to https://github.com/apache/datafusion/issues/11367
+        assert_eq!(
+            default_writer_props.column_index_truncate_length(),
+            Some(64),
+            "extern parquet's default is 64"
+        );
+        assert_eq!(
+            from_datafusion_defaults.column_index_truncate_length(),
+            None,
+            "datafusion's default is None"
+        );
+
+        // The next few examples are where datafusion's default is None.
+        // But once datafusion's TableParquetOptions are converted to a WriterProperties,
+        // then we get the extern parquet's defaults.
+        //
+        // In other words, we do not get indeterminate behavior in the output writer props.
+        // But this is only because we use the extern parquet's defaults when we leave
+        // the datafusion setting as None.
+
+        // datafusion's `None` for Option<bool> => becomes parquet's true
+        // TODO: should this be changed?
+        // refer to https://github.com/apache/datafusion/issues/11367
+        assert!(
+            default_writer_props.dictionary_enabled(&"default".into()),
+            "extern parquet's default is true"
+        );
+        assert_eq!(
+            default_table_writer_opts.global.dictionary_enabled, None,
+            "datafusion's has no default"
+        );
+        assert!(
+            from_datafusion_defaults.dictionary_enabled(&"default".into()),
+            "should see the extern parquet's default over-riding datafusion's None",
+        );
+
+        // datafusion's `None` for Option<String> => becomes parquet's EnabledStatistics::Page
+        // TODO: should this be changed?
+        // refer to https://github.com/apache/datafusion/issues/11367
+        assert_eq!(
+            default_writer_props.statistics_enabled(&"default".into()),
+            EnabledStatistics::Page,
+            "extern parquet's default is page"
+        );
+        assert_eq!(
+            default_table_writer_opts.global.statistics_enabled, None,
+            "datafusion's has no default"
+        );
+        assert_eq!(
+            from_datafusion_defaults.statistics_enabled(&"default".into()),
+            EnabledStatistics::Page,
+            "should see the extern parquet's default over-riding datafusion's None",
+        );
+
+        // datafusion's `None` for Option<usize> => becomes parquet's 4096
+        // TODO: should this be changed?
+        // refer to https://github.com/apache/datafusion/issues/11367
+        assert_eq!(
+            default_writer_props.max_statistics_size(&"default".into()),
+            4096,
+            "extern parquet's default is 4096"
+        );
+        assert_eq!(
+            default_table_writer_opts.global.max_statistics_size, None,
+            "datafusion's has no default"
+        );
+        assert_eq!(
+            default_writer_props.max_statistics_size(&"default".into()),
+            4096,
+            "should see the extern parquet's default over-riding datafusion's None",
+        );
+
+        // Confirm all other settings are equal.
+        // First resolve the known discrepancies, (set as the same).
+        // TODO: once we fix the above mis-matches, we should be able to remove this.
+        let mut from_extern_parquet =
+            session_config_from_writer_props(&default_writer_props);
+        from_extern_parquet.global.compression = Some("zstd(3)".into());
+        from_extern_parquet.global.data_page_row_count_limit = usize::MAX;
+        from_extern_parquet.global.column_index_truncate_length = None;
+        from_extern_parquet.global.dictionary_enabled = None;
+        from_extern_parquet.global.statistics_enabled = None;
+        from_extern_parquet.global.max_statistics_size = None;
+
+        // Expected: the remaining should match
+        let same_created_by = default_table_writer_opts.global.created_by.clone(); // we expect these to be different
+        from_extern_parquet.global.created_by = same_created_by; // we expect these to be different
+        assert_eq!(
+            default_table_writer_opts,
+            from_extern_parquet,
+            "the default writer_props should have the same configuration as the session's default TableParquetOptions",
+        );
+    }
+
+    #[test]
+    fn test_bloom_filter_defaults() {
+        // the TableParquetOptions::default, with only the bloom filter turned on
+        let mut default_table_writer_opts = TableParquetOptions::default();
+        default_table_writer_opts.global.bloom_filter_on_write = true;
+
+        // the WriterProperties::default, with only the bloom filter turned on
+        let default_writer_props = WriterProperties::new();
+        let from_datafusion_defaults =
+            WriterPropertiesBuilder::try_from(&default_table_writer_opts)
+                .unwrap()
+                .set_bloom_filter_enabled(true)
+                .build();
+
+        // TODO: should have same behavior in either.
+        // refer to https://github.com/apache/datafusion/issues/11367
+        assert_ne!(
+            default_writer_props.bloom_filter_properties(&"default".into()),
+            from_datafusion_defaults.bloom_filter_properties(&"default".into()),
+            "parquet and datafusion props, will not have the same bloom filter props",
+        );
+        assert_eq!(
+            default_writer_props.bloom_filter_properties(&"default".into()),
+            None,
+            "extern parquet's default remains None"
+        );
+        assert_eq!(
+            from_datafusion_defaults.bloom_filter_properties(&"default".into()),
+            Some(&BloomFilterProperties::default()),
+            "datafusion's has BloomFilterProperties::default",
+        );
+    }
+
+    #[test]
+    fn test_bloom_filter_set_fpp_only() {
+        // the TableParquetOptions::default, with only fpp set
+        let mut default_table_writer_opts = TableParquetOptions::default();
+        default_table_writer_opts.global.bloom_filter_on_write = true;
+        default_table_writer_opts.global.bloom_filter_fpp = Some(0.42);
+
+        // the WriterProperties::default, with only fpp set
+        let default_writer_props = WriterProperties::new();
+        let from_datafusion_defaults =
+            WriterPropertiesBuilder::try_from(&default_table_writer_opts)
+                .unwrap()
+                .set_bloom_filter_enabled(true)
+                .set_bloom_filter_fpp(0.42)
+                .build();
+
+        // TODO: should have same behavior in either.
+        // refer to https://github.com/apache/datafusion/issues/11367
+        assert_ne!(
+            default_writer_props.bloom_filter_properties(&"default".into()),
+            from_datafusion_defaults.bloom_filter_properties(&"default".into()),
+            "parquet and datafusion props, will not have the same bloom filter props",
+        );
+        assert_eq!(
+            default_writer_props.bloom_filter_properties(&"default".into()),
+            None,
+            "extern parquet's default remains None"
+        );
+        assert_eq!(
+            from_datafusion_defaults.bloom_filter_properties(&"default".into()),
+            Some(&BloomFilterProperties {
+                fpp: 0.42,
+                ndv: DEFAULT_BLOOM_FILTER_NDV
+            }),
+            "datafusion's has BloomFilterProperties",
+        );
+    }
+
+    #[test]
+    fn test_bloom_filter_set_ndv_only() {
+        // the TableParquetOptions::default, with only ndv set
+        let mut default_table_writer_opts = TableParquetOptions::default();
+        default_table_writer_opts.global.bloom_filter_on_write = true;
+        default_table_writer_opts.global.bloom_filter_ndv = Some(42);
+
+        // the WriterProperties::default, with only ndv set
+        let default_writer_props = WriterProperties::new();
+        let from_datafusion_defaults =
+            WriterPropertiesBuilder::try_from(&default_table_writer_opts)
+                .unwrap()
+                .set_bloom_filter_enabled(true)
+                .set_bloom_filter_ndv(42)
+                .build();
+
+        // TODO: should have same behavior in either.
+        // refer to https://github.com/apache/datafusion/issues/11367
+        assert_ne!(
+            default_writer_props.bloom_filter_properties(&"default".into()),
+            from_datafusion_defaults.bloom_filter_properties(&"default".into()),
+            "parquet and datafusion props, will not have the same bloom filter props",
+        );
+        assert_eq!(
+            default_writer_props.bloom_filter_properties(&"default".into()),
+            None,
+            "extern parquet's default remains None"
+        );
+        assert_eq!(
+            from_datafusion_defaults.bloom_filter_properties(&"default".into()),
+            Some(&BloomFilterProperties {
+                fpp: DEFAULT_BLOOM_FILTER_FPP,
+                ndv: 42
+            }),
+            "datafusion's has BloomFilterProperties",
+        );
+    }
 }

From 58da9690f208261547e358728cc89a4cfc6e7a68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Thu, 18 Jul 2024 05:13:28 +0800
Subject: [PATCH 079/357] Create `datafusion-physical-optimizer` crate (#11507)

* Create datafusion-physical-optimizer crate

* fmt .md

* Update Cargo.lock in datafusion-cli

* fmt toml and fix doc
---
 Cargo.toml                                    |  2 +
 datafusion-cli/Cargo.lock                     | 92 ++++++++++---------
 datafusion/core/Cargo.toml                    |  1 +
 .../core/src/execution/session_state.rs       |  2 +-
 datafusion/core/src/lib.rs                    |  2 +-
 .../aggregate_statistics.rs                   |  2 +-
 .../physical_optimizer/coalesce_batches.rs    |  2 +-
 .../combine_partial_final_agg.rs              |  2 +-
 .../enforce_distribution.rs                   |  2 +-
 .../src/physical_optimizer/enforce_sorting.rs |  3 +-
 .../src/physical_optimizer/join_selection.rs  |  2 +-
 .../limited_distinct_aggregation.rs           |  2 +-
 datafusion/core/src/physical_optimizer/mod.rs |  2 +-
 .../core/src/physical_optimizer/optimizer.rs  | 28 +-----
 .../physical_optimizer/output_requirements.rs |  2 +-
 .../physical_optimizer/projection_pushdown.rs |  2 +-
 .../src/physical_optimizer/sanity_checker.rs  |  2 +-
 .../physical_optimizer/topk_aggregation.rs    |  2 +-
 .../physical_optimizer/update_aggr_exprs.rs   |  3 +-
 datafusion/core/src/physical_planner.rs       |  2 +-
 datafusion/physical-optimizer/Cargo.toml      | 36 ++++++++
 datafusion/physical-optimizer/README.md       | 25 +++++
 datafusion/physical-optimizer/src/lib.rs      | 22 +++++
 .../physical-optimizer/src/optimizer.rs       | 48 ++++++++++
 24 files changed, 202 insertions(+), 86 deletions(-)
 create mode 100644 datafusion/physical-optimizer/Cargo.toml
 create mode 100644 datafusion/physical-optimizer/README.md
 create mode 100644 datafusion/physical-optimizer/src/lib.rs
 create mode 100644 datafusion/physical-optimizer/src/optimizer.rs

diff --git a/Cargo.toml b/Cargo.toml
index f61ed7e58fe37..24bde78b3001b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,6 +29,7 @@ members = [
     "datafusion/optimizer",
     "datafusion/physical-expr-common",
     "datafusion/physical-expr",
+    "datafusion/physical-optimizer",
     "datafusion/physical-plan",
     "datafusion/proto",
     "datafusion/proto/gen",
@@ -97,6 +98,7 @@ datafusion-functions-array = { path = "datafusion/functions-array", version = "4
 datafusion-optimizer = { path = "datafusion/optimizer", version = "40.0.0", default-features = false }
 datafusion-physical-expr = { path = "datafusion/physical-expr", version = "40.0.0", default-features = false }
 datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "40.0.0", default-features = false }
+datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "40.0.0" }
 datafusion-physical-plan = { path = "datafusion/physical-plan", version = "40.0.0" }
 datafusion-proto = { path = "datafusion/proto", version = "40.0.0" }
 datafusion-proto-common = { path = "datafusion/proto-common", version = "40.0.0" }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index e48c6b081e1a5..cdf0e7f573163 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -387,7 +387,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -772,9 +772,9 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.5.1"
+version = "1.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52"
+checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210"
 dependencies = [
  "arrayref",
  "arrayvec",
@@ -838,9 +838,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.6.0"
+version = "1.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
+checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952"
 
 [[package]]
 name = "bytes-utils"
@@ -875,13 +875,12 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.0"
+version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eaff6f8ce506b9773fa786672d63fc7a191ffea1be33f72bbd4aeacefca9ffc8"
+checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
 dependencies = [
  "jobserver",
  "libc",
- "once_cell",
 ]
 
 [[package]]
@@ -1105,7 +1104,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -1154,6 +1153,7 @@ dependencies = [
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
+ "datafusion-physical-optimizer",
  "datafusion-physical-plan",
  "datafusion-sql",
  "flate2",
@@ -1391,6 +1391,14 @@ dependencies = [
  "rand",
 ]
 
+[[package]]
+name = "datafusion-physical-optimizer"
+version = "40.0.0"
+dependencies = [
+ "datafusion-common",
+ "datafusion-physical-plan",
+]
+
 [[package]]
 name = "datafusion-physical-plan"
 version = "40.0.0"
@@ -1694,7 +1702,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -1908,9 +1916,9 @@ dependencies = [
 
 [[package]]
 name = "http-body"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
 dependencies = [
  "bytes",
  "http 1.1.0",
@@ -1925,7 +1933,7 @@ dependencies = [
  "bytes",
  "futures-util",
  "http 1.1.0",
- "http-body 1.0.0",
+ "http-body 1.0.1",
  "pin-project-lite",
 ]
 
@@ -1982,7 +1990,7 @@ dependencies = [
  "futures-util",
  "h2 0.4.5",
  "http 1.1.0",
- "http-body 1.0.0",
+ "http-body 1.0.1",
  "httparse",
  "itoa",
  "pin-project-lite",
@@ -2034,7 +2042,7 @@ dependencies = [
  "futures-channel",
  "futures-util",
  "http 1.1.0",
- "http-body 1.0.0",
+ "http-body 1.0.1",
  "hyper 1.4.1",
  "pin-project-lite",
  "socket2",
@@ -2707,7 +2715,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -2917,9 +2925,9 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.2"
+version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd"
+checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4"
 dependencies = [
  "bitflags 2.6.0",
 ]
@@ -2982,7 +2990,7 @@ dependencies = [
  "futures-util",
  "h2 0.4.5",
  "http 1.1.0",
- "http-body 1.0.0",
+ "http-body 1.0.1",
  "http-body-util",
  "hyper 1.4.1",
  "hyper-rustls 0.27.2",
@@ -3269,9 +3277,9 @@ dependencies = [
 
 [[package]]
 name = "security-framework"
-version = "2.11.0"
+version = "2.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0"
+checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
 dependencies = [
  "bitflags 2.6.0",
  "core-foundation",
@@ -3282,9 +3290,9 @@ dependencies = [
 
 [[package]]
 name = "security-framework-sys"
-version = "2.11.0"
+version = "2.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7"
+checksum = "75da29fe9b9b08fe9d6b22b5b4bcbc75d8db3aa31e639aa56bb62e9d46bfceaf"
 dependencies = [
  "core-foundation-sys",
  "libc",
@@ -3319,7 +3327,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3454,7 +3462,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3500,7 +3508,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3513,7 +3521,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3535,9 +3543,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.70"
+version = "2.0.71"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16"
+checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3585,22 +3593,22 @@ checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
 
 [[package]]
 name = "thiserror"
-version = "1.0.61"
+version = "1.0.62"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
+checksum = "f2675633b1499176c2dff06b0856a27976a8f9d436737b4cf4f312d4d91d8bbb"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.61"
+version = "1.0.62"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
+checksum = "d20468752b09f49e909e55a5d338caa8bedf615594e9d80bc4c565d30faf798c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3670,9 +3678,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.38.0"
+version = "1.38.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a"
+checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df"
 dependencies = [
  "backtrace",
  "bytes",
@@ -3695,7 +3703,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3792,7 +3800,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3837,7 +3845,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
@@ -3991,7 +3999,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
  "wasm-bindgen-shared",
 ]
 
@@ -4025,7 +4033,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -4290,7 +4298,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.70",
+ "syn 2.0.71",
 ]
 
 [[package]]
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 532ca8fde9e73..c937a6f6e59a9 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -106,6 +106,7 @@ datafusion-functions-array = { workspace = true, optional = true }
 datafusion-optimizer = { workspace = true }
 datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
+datafusion-physical-optimizer = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 datafusion-sql = { workspace = true }
 flate2 = { version = "1.0.24", optional = true }
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index bc5062893c867..0824b249b7d15 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -39,7 +39,6 @@ use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryP
 #[cfg(feature = "array_expressions")]
 use crate::functions_array;
 use crate::physical_optimizer::optimizer::PhysicalOptimizer;
-use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
 use crate::{functions, functions_aggregate};
 use arrow_schema::{DataType, SchemaRef};
@@ -74,6 +73,7 @@ use datafusion_optimizer::{
 };
 use datafusion_physical_expr::create_physical_expr;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::ExecutionPlan;
 use datafusion_sql::parser::{DFParser, Statement};
 use datafusion_sql::planner::{ContextProvider, ParserOptions, PlannerContext, SqlToRel};
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 81c1c4629a3ad..9b9b1db8ff817 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -199,7 +199,7 @@
 //! [`QueryPlanner`]: execution::context::QueryPlanner
 //! [`OptimizerRule`]: datafusion_optimizer::optimizer::OptimizerRule
 //! [`AnalyzerRule`]:  datafusion_optimizer::analyzer::AnalyzerRule
-//! [`PhysicalOptimizerRule`]: crate::physical_optimizer::optimizer::PhysicalOptimizerRule
+//! [`PhysicalOptimizerRule`]: crate::physical_optimizer::PhysicalOptimizerRule
 //!
 //! ## Query Planning and Execution Overview
 //!
diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
index 66067d8cb5c42..e412d814239d1 100644
--- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
@@ -18,7 +18,6 @@
 //! Utilizing exact statistics from sources to avoid scanning data
 use std::sync::Arc;
 
-use super::optimizer::PhysicalOptimizerRule;
 use crate::config::ConfigOptions;
 use crate::error::Result;
 use crate::physical_plan::aggregates::AggregateExec;
@@ -29,6 +28,7 @@ use crate::scalar::ScalarValue;
 use datafusion_common::stats::Precision;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_expr::utils::COUNT_STAR_EXPANSION;
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_physical_plan::udaf::AggregateFunctionExpr;
 
diff --git a/datafusion/core/src/physical_optimizer/coalesce_batches.rs b/datafusion/core/src/physical_optimizer/coalesce_batches.rs
index 42b7463600dcb..da0e44c8de4e3 100644
--- a/datafusion/core/src/physical_optimizer/coalesce_batches.rs
+++ b/datafusion/core/src/physical_optimizer/coalesce_batches.rs
@@ -23,7 +23,6 @@ use std::sync::Arc;
 use crate::{
     config::ConfigOptions,
     error::Result,
-    physical_optimizer::PhysicalOptimizerRule,
     physical_plan::{
         coalesce_batches::CoalesceBatchesExec, filter::FilterExec, joins::HashJoinExec,
         repartition::RepartitionExec, Partitioning,
@@ -31,6 +30,7 @@ use crate::{
 };
 
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 
 /// Optimizer rule that introduces CoalesceBatchesExec to avoid overhead with small batches that
 /// are produced by highly selective filters
diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index 56cdbd645285d..29148a594f31c 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -21,13 +21,13 @@
 use std::sync::Arc;
 
 use crate::error::Result;
-use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy};
 use crate::physical_plan::ExecutionPlan;
 
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_physical_expr::{physical_exprs_equal, AggregateExpr, PhysicalExpr};
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 
 /// CombinePartialFinalAggregate optimizer rule combines the adjacent Partial and Final AggregateExecs
 /// into a Single AggregateExec if their grouping exprs and aggregate exprs equal.
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 818b2304fe097..afed5dd375351 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -31,7 +31,6 @@ use crate::physical_optimizer::utils::{
     add_sort_above_with_check, is_coalesce_partitions, is_repartition,
     is_sort_preserving_merge,
 };
-use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy};
 use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use crate::physical_plan::joins::{
@@ -56,6 +55,7 @@ use datafusion_physical_expr::{
 use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec};
 use datafusion_physical_plan::ExecutionPlanProperties;
 
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use itertools::izip;
 
 /// The `EnforceDistribution` rule ensures that distribution requirements are
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index 24306647c6867..e577c5336086a 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -49,7 +49,6 @@ use crate::physical_optimizer::utils::{
     is_coalesce_partitions, is_limit, is_repartition, is_sort, is_sort_preserving_merge,
     is_union, is_window,
 };
-use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use crate::physical_plan::sorts::sort::SortExec;
 use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
@@ -66,6 +65,7 @@ use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::partial_sort::PartialSortExec;
 use datafusion_physical_plan::ExecutionPlanProperties;
 
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use itertools::izip;
 
 /// This rule inspects [`SortExec`]'s in the given physical plan and removes the
@@ -631,6 +631,7 @@ mod tests {
     use datafusion_expr::JoinType;
     use datafusion_physical_expr::expressions::{col, Column, NotExpr};
 
+    use datafusion_physical_optimizer::PhysicalOptimizerRule;
     use rstest::rstest;
 
     fn create_test_schema() -> Result<SchemaRef> {
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index 6dfe17632a653..b849df88e4aaf 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -27,7 +27,6 @@ use std::sync::Arc;
 
 use crate::config::ConfigOptions;
 use crate::error::Result;
-use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::joins::utils::{ColumnIndex, JoinFilter};
 use crate::physical_plan::joins::{
     CrossJoinExec, HashJoinExec, NestedLoopJoinExec, PartitionMode,
@@ -42,6 +41,7 @@ use datafusion_common::{internal_err, JoinSide, JoinType};
 use datafusion_expr::sort_properties::SortProperties;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 
 /// The [`JoinSelection`] rule tries to modify a given plan so that it can
 /// accommodate infinite sources and optimize joins in the plan according to
diff --git a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
index f9d5a4c186eee..b5d3f432d84d0 100644
--- a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
@@ -20,7 +20,6 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::aggregates::AggregateExec;
 use crate::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
@@ -29,6 +28,7 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::Result;
 
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use itertools::Itertools;
 
 /// An optimizer rule that passes a `limit` hint into grouped aggregations which don't require all
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index 9ad05bf496e59..582f340151ae5 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -42,4 +42,4 @@ mod utils;
 #[cfg(test)]
 pub mod test_utils;
 
-pub use optimizer::PhysicalOptimizerRule;
+pub use datafusion_physical_optimizer::*;
diff --git a/datafusion/core/src/physical_optimizer/optimizer.rs b/datafusion/core/src/physical_optimizer/optimizer.rs
index 2d9744ad23dd3..6449dbea0ddf0 100644
--- a/datafusion/core/src/physical_optimizer/optimizer.rs
+++ b/datafusion/core/src/physical_optimizer/optimizer.rs
@@ -17,11 +17,11 @@
 
 //! Physical optimizer traits
 
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use std::sync::Arc;
 
 use super::projection_pushdown::ProjectionPushdown;
 use super::update_aggr_exprs::OptimizeAggregateOrder;
-use crate::config::ConfigOptions;
 use crate::physical_optimizer::aggregate_statistics::AggregateStatistics;
 use crate::physical_optimizer::coalesce_batches::CoalesceBatches;
 use crate::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate;
@@ -32,32 +32,6 @@ use crate::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggr
 use crate::physical_optimizer::output_requirements::OutputRequirements;
 use crate::physical_optimizer::sanity_checker::SanityCheckPlan;
 use crate::physical_optimizer::topk_aggregation::TopKAggregation;
-use crate::{error::Result, physical_plan::ExecutionPlan};
-
-/// `PhysicalOptimizerRule` transforms one ['ExecutionPlan'] into another which
-/// computes the same results, but in a potentially more efficient way.
-///
-/// Use [`SessionState::add_physical_optimizer_rule`] to register additional
-/// `PhysicalOptimizerRule`s.
-///
-/// [`SessionState::add_physical_optimizer_rule`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html#method.add_physical_optimizer_rule
-pub trait PhysicalOptimizerRule {
-    /// Rewrite `plan` to an optimized form
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        config: &ConfigOptions,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-
-    /// A human readable name for this optimizer rule
-    fn name(&self) -> &str;
-
-    /// A flag to indicate whether the physical planner should valid the rule will not
-    /// change the schema of the plan after the rewriting.
-    /// Some of the optimization rules might change the nullable properties of the schema
-    /// and should disable the schema check.
-    fn schema_check(&self) -> bool;
-}
 
 /// A rule-based physical optimizer.
 #[derive(Clone)]
diff --git a/datafusion/core/src/physical_optimizer/output_requirements.rs b/datafusion/core/src/physical_optimizer/output_requirements.rs
index 671bb437d5fa2..cb9a0cb90e6c7 100644
--- a/datafusion/core/src/physical_optimizer/output_requirements.rs
+++ b/datafusion/core/src/physical_optimizer/output_requirements.rs
@@ -24,7 +24,6 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::sorts::sort::SortExec;
 use crate::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 
@@ -32,6 +31,7 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{Result, Statistics};
 use datafusion_physical_expr::{Distribution, LexRequirement, PhysicalSortRequirement};
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::{ExecutionPlanProperties, PlanProperties};
 
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 24d9f31687f92..84f898431762b 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -24,7 +24,6 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use super::output_requirements::OutputRequirementExec;
-use super::PhysicalOptimizerRule;
 use crate::datasource::physical_plan::CsvExec;
 use crate::error::Result;
 use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
@@ -55,6 +54,7 @@ use datafusion_physical_expr::{
 use datafusion_physical_plan::streaming::StreamingTableExec;
 use datafusion_physical_plan::union::UnionExec;
 
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use itertools::Itertools;
 
 /// This rule inspects [`ProjectionExec`]'s in the given physical plan and tries to
diff --git a/datafusion/core/src/physical_optimizer/sanity_checker.rs b/datafusion/core/src/physical_optimizer/sanity_checker.rs
index 083b42f7400bc..6e37c3f40ffaf 100644
--- a/datafusion/core/src/physical_optimizer/sanity_checker.rs
+++ b/datafusion/core/src/physical_optimizer/sanity_checker.rs
@@ -24,7 +24,6 @@
 use std::sync::Arc;
 
 use crate::error::Result;
-use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::ExecutionPlan;
 
 use datafusion_common::config::{ConfigOptions, OptimizerOptions};
@@ -34,6 +33,7 @@ use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supp
 use datafusion_physical_plan::joins::SymmetricHashJoinExec;
 use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
 
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use itertools::izip;
 
 /// The SanityCheckPlan rule rejects the following query plans:
diff --git a/datafusion/core/src/physical_optimizer/topk_aggregation.rs b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
index b754ee75ef3e8..82cf44ad77962 100644
--- a/datafusion/core/src/physical_optimizer/topk_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
@@ -19,7 +19,6 @@
 
 use std::sync::Arc;
 
-use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::aggregates::AggregateExec;
 use crate::physical_plan::coalesce_batches::CoalesceBatchesExec;
 use crate::physical_plan::filter::FilterExec;
@@ -34,6 +33,7 @@ use datafusion_common::Result;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::PhysicalSortExpr;
 
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use itertools::Itertools;
 
 /// An optimizer rule that passes a `limit` hint to aggregations if the whole result is not needed
diff --git a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
index d6b4c33384856..f8edf73e3d2af 100644
--- a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
+++ b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
@@ -20,14 +20,13 @@
 
 use std::sync::Arc;
 
-use super::PhysicalOptimizerRule;
-
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{plan_datafusion_err, Result};
 use datafusion_physical_expr::{
     reverse_order_bys, AggregateExpr, EquivalenceProperties, PhysicalSortRequirement,
 };
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::aggregates::concat_slices;
 use datafusion_physical_plan::windows::get_ordered_partition_by_indices;
 use datafusion_physical_plan::{
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index efc83d8f6b5c2..0accf9d83516a 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -38,7 +38,6 @@ use crate::logical_expr::{
 };
 use crate::logical_expr::{Limit, Values};
 use crate::physical_expr::{create_physical_expr, create_physical_exprs};
-use crate::physical_optimizer::optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy};
 use crate::physical_plan::analyze::AnalyzeExec;
 use crate::physical_plan::empty::EmptyExec;
@@ -91,6 +90,7 @@ use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_sql::utils::window_expr_common_partition_keys;
 
 use async_trait::async_trait;
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use futures::{StreamExt, TryStreamExt};
 use itertools::{multiunzip, Itertools};
 use log::{debug, trace};
diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml
new file mode 100644
index 0000000000000..9c0ee61da52a0
--- /dev/null
+++ b/datafusion/physical-optimizer/Cargo.toml
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-physical-optimizer"
+description = "DataFusion Physical Optimizer"
+keywords = ["datafusion", "query", "optimizer"]
+readme = "README.md"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+rust-version = { workspace = true }
+
+[lints]
+workspace = true
+
+[dependencies]
+datafusion-common = { workspace = true, default-features = true }
+datafusion-physical-plan = { workspace = true }
diff --git a/datafusion/physical-optimizer/README.md b/datafusion/physical-optimizer/README.md
new file mode 100644
index 0000000000000..eb361d3f67792
--- /dev/null
+++ b/datafusion/physical-optimizer/README.md
@@ -0,0 +1,25 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# DataFusion Physical Optimizer
+
+DataFusion is an extensible query execution framework, written in Rust,
+that uses Apache Arrow as its in-memory format.
+
+This crate contains the physical optimizer for DataFusion.
diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs
new file mode 100644
index 0000000000000..c5a49216f5fdc
--- /dev/null
+++ b/datafusion/physical-optimizer/src/lib.rs
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
+
+mod optimizer;
+
+pub use optimizer::PhysicalOptimizerRule;
diff --git a/datafusion/physical-optimizer/src/optimizer.rs b/datafusion/physical-optimizer/src/optimizer.rs
new file mode 100644
index 0000000000000..885dc4a95b8c6
--- /dev/null
+++ b/datafusion/physical-optimizer/src/optimizer.rs
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Physical optimizer traits
+
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::Result;
+use datafusion_physical_plan::ExecutionPlan;
+use std::sync::Arc;
+
+/// `PhysicalOptimizerRule` transforms one ['ExecutionPlan'] into another which
+/// computes the same results, but in a potentially more efficient way.
+///
+/// Use [`SessionState::add_physical_optimizer_rule`] to register additional
+/// `PhysicalOptimizerRule`s.
+///
+/// [`SessionState::add_physical_optimizer_rule`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html#method.add_physical_optimizer_rule
+pub trait PhysicalOptimizerRule {
+    /// Rewrite `plan` to an optimized form
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        config: &ConfigOptions,
+    ) -> Result<Arc<dyn ExecutionPlan>>;
+
+    /// A human readable name for this optimizer rule
+    fn name(&self) -> &str;
+
+    /// A flag to indicate whether the physical planner should valid the rule will not
+    /// change the schema of the plan after the rewriting.
+    /// Some of the optimization rules might change the nullable properties of the schema
+    /// and should disable the schema check.
+    fn schema_check(&self) -> bool;
+}

From 1b9b35c4a50636a4385433c38665f985679b7527 Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Wed, 17 Jul 2024 14:24:52 -0700
Subject: [PATCH 080/357] Minor: Assert `test_enabled_backtrace` requirements
 to run (#11525)

---
 datafusion/common/src/error.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 9be662ca283e6..58ff1121e36d0 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -661,11 +661,16 @@ mod test {
         assert_eq!(res.strip_backtrace(), "Arrow error: Schema error: bar");
     }
 
-    // RUST_BACKTRACE=1 cargo test --features backtrace --package datafusion-common --lib -- error::test::test_backtrace
+    // To pass the test the environment variable RUST_BACKTRACE should be set to 1 to enforce backtrace
     #[cfg(feature = "backtrace")]
     #[test]
     #[allow(clippy::unnecessary_literal_unwrap)]
     fn test_enabled_backtrace() {
+        match std::env::var("RUST_BACKTRACE") {
+            Ok(val) if val == "1" => {}
+            _ => panic!("Environment variable RUST_BACKTRACE must be set to 1"),
+        };
+
         let res: Result<(), DataFusionError> = plan_err!("Err");
         let err = res.unwrap_err().to_string();
         assert!(err.contains(DataFusionError::BACK_TRACE_SEP));

From d91a03f495b0856c1c2bfe5fc4db2ed1fe5352bf Mon Sep 17 00:00:00 2001
From: Xin Li <33629085+xinlifoobar@users.noreply.github.com>
Date: Thu, 18 Jul 2024 02:44:15 -0700
Subject: [PATCH 081/357] Move handlign of NULL literals in where clause to
 type coercion pass  (#11491)

* Revert "Support `NULL` literals in where clause  (#11266)"

This reverts commit fa0191772e87e04da2598aedb7fe11dd49f88f88.

* Followup Support NULL literals in where clause

* misc err change

* adopt comparison_coercion

* Fix comments

* Fix comments
---
 .../optimizer/src/analyzer/type_coercion.rs   | 12 +++++-
 datafusion/physical-plan/src/filter.rs        | 39 +++++--------------
 2 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 80a8c864e4311..337492d1a55ba 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -84,7 +84,7 @@ impl AnalyzerRule for TypeCoercion {
 /// Assumes that children have already been optimized
 fn analyze_internal(
     external_schema: &DFSchema,
-    plan: LogicalPlan,
+    mut plan: LogicalPlan,
 ) -> Result<Transformed<LogicalPlan>> {
     // get schema representing all available input fields. This is used for data type
     // resolution only, so order does not matter here
@@ -103,6 +103,16 @@ fn analyze_internal(
     // select t2.c2 from t1 where t1.c1 in (select t2.c1 from t2 where t2.c2=t1.c3)
     schema.merge(external_schema);
 
+    if let LogicalPlan::Filter(filter) = &mut plan {
+        if let Ok(new_predicate) = filter
+            .predicate
+            .clone()
+            .cast_to(&DataType::Boolean, filter.input.schema())
+        {
+            filter.predicate = new_predicate;
+        }
+    }
+
     let mut expr_rewrite = TypeCoercionRewriter::new(&schema);
 
     let name_preserver = NamePreserver::new(&plan);
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index c5ba3992d3b41..a9d78d059f5c1 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -28,11 +28,11 @@ use crate::{
     metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
     DisplayFormatType, ExecutionPlan,
 };
+
 use arrow::compute::filter_record_batch;
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow::record_batch::RecordBatch;
-use arrow_array::{Array, BooleanArray};
-use datafusion_common::cast::{as_boolean_array, as_null_array};
+use datafusion_common::cast::as_boolean_array;
 use datafusion_common::stats::Precision;
 use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
@@ -81,19 +81,6 @@ impl FilterExec {
                     cache,
                 })
             }
-            DataType::Null => {
-                let default_selectivity = 0;
-                let cache =
-                    Self::compute_properties(&input, &predicate, default_selectivity)?;
-
-                Ok(Self {
-                    predicate,
-                    input: Arc::clone(&input),
-                    metrics: ExecutionPlanMetricsSet::new(),
-                    default_selectivity,
-                    cache,
-                })
-            }
             other => {
                 plan_err!("Filter predicate must return BOOLEAN values, got {other:?}")
             }
@@ -367,23 +354,15 @@ pub(crate) fn batch_filter(
         .evaluate(batch)
         .and_then(|v| v.into_array(batch.num_rows()))
         .and_then(|array| {
-            let filter_array = match as_boolean_array(&array) {
-                Ok(boolean_array) => Ok(boolean_array.to_owned()),
+            Ok(match as_boolean_array(&array) {
+                // apply filter array to record batch
+                Ok(filter_array) => filter_record_batch(batch, filter_array)?,
                 Err(_) => {
-                    let Ok(null_array) = as_null_array(&array) else {
-                        return internal_err!(
-                            "Cannot create filter_array from non-boolean predicates"
-                        );
-                    };
-
-                    // if the predicate is null, then the result is also null
-                    Ok::<BooleanArray, DataFusionError>(BooleanArray::new_null(
-                        null_array.len(),
-                    ))
+                    return internal_err!(
+                        "Cannot create filter_array from non-boolean predicates"
+                    );
                 }
-            }?;
-
-            Ok(filter_record_batch(batch, &filter_array)?)
+            })
         })
 }
 

From dff2f3c3c637fd5c3b30ed0cf26fac75c22973ac Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 18 Jul 2024 06:03:39 -0400
Subject: [PATCH 082/357] Minor: Clarify which parquet options are used for
 reading/writing (#11511)

---
 datafusion/common/src/config.rs               | 63 ++++++++++---------
 .../common/src/file_options/parquet_writer.rs |  1 +
 .../test_files/information_schema.slt         | 52 +++++++--------
 docs/source/user-guide/configs.md             | 52 +++++++--------
 4 files changed, 87 insertions(+), 81 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 880f0119ce0da..b46b002baac02 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -315,93 +315,96 @@ config_namespace! {
 }
 
 config_namespace! {
-    /// Options related to parquet files
+    /// Options for reading and writing parquet files
     ///
     /// See also: [`SessionConfig`]
     ///
     /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
     pub struct ParquetOptions {
-        /// If true, reads the Parquet data page level metadata (the
+        // The following options affect reading parquet files
+
+        /// (reading) If true, reads the Parquet data page level metadata (the
         /// Page Index), if present, to reduce the I/O and number of
         /// rows decoded.
         pub enable_page_index: bool, default = true
 
-        /// If true, the parquet reader attempts to skip entire row groups based
+        /// (reading) If true, the parquet reader attempts to skip entire row groups based
         /// on the predicate in the query and the metadata (min/max values) stored in
         /// the parquet file
         pub pruning: bool, default = true
 
-        /// If true, the parquet reader skip the optional embedded metadata that may be in
+        /// (reading) If true, the parquet reader skip the optional embedded metadata that may be in
         /// the file Schema. This setting can help avoid schema conflicts when querying
         /// multiple parquet files with schemas containing compatible types but different metadata
         pub skip_metadata: bool, default = true
 
-        /// If specified, the parquet reader will try and fetch the last `size_hint`
+        /// (reading) If specified, the parquet reader will try and fetch the last `size_hint`
         /// bytes of the parquet file optimistically. If not specified, two reads are required:
         /// One read to fetch the 8-byte parquet footer and
         /// another to fetch the metadata length encoded in the footer
         pub metadata_size_hint: Option<usize>, default = None
 
-        /// If true, filter expressions are be applied during the parquet decoding operation to
+        /// (reading) If true, filter expressions are be applied during the parquet decoding operation to
         /// reduce the number of rows decoded. This optimization is sometimes called "late materialization".
         pub pushdown_filters: bool, default = false
 
-        /// If true, filter expressions evaluated during the parquet decoding operation
+        /// (reading) If true, filter expressions evaluated during the parquet decoding operation
         /// will be reordered heuristically to minimize the cost of evaluation. If false,
         /// the filters are applied in the same order as written in the query
         pub reorder_filters: bool, default = false
 
-        // The following map to parquet::file::properties::WriterProperties
+        // The following options affect writing to parquet files
+        // and map to parquet::file::properties::WriterProperties
 
-        /// Sets best effort maximum size of data page in bytes
+        /// (writing) Sets best effort maximum size of data page in bytes
         pub data_pagesize_limit: usize, default = 1024 * 1024
 
-        /// Sets write_batch_size in bytes
+        /// (writing) Sets write_batch_size in bytes
         pub write_batch_size: usize, default = 1024
 
-        /// Sets parquet writer version
+        /// (writing) Sets parquet writer version
         /// valid values are "1.0" and "2.0"
         pub writer_version: String, default = "1.0".into()
 
-        /// Sets default parquet compression codec
+        /// (writing) Sets default parquet compression codec.
         /// Valid values are: uncompressed, snappy, gzip(level),
         /// lzo, brotli(level), lz4, zstd(level), and lz4_raw.
         /// These values are not case sensitive. If NULL, uses
         /// default parquet writer setting
         pub compression: Option<String>, default = Some("zstd(3)".into())
 
-        /// Sets if dictionary encoding is enabled. If NULL, uses
+        /// (writing) Sets if dictionary encoding is enabled. If NULL, uses
         /// default parquet writer setting
         pub dictionary_enabled: Option<bool>, default = None
 
-        /// Sets best effort maximum dictionary page size, in bytes
+        /// (writing) Sets best effort maximum dictionary page size, in bytes
         pub dictionary_page_size_limit: usize, default = 1024 * 1024
 
-        /// Sets if statistics are enabled for any column
+        /// (writing) Sets if statistics are enabled for any column
         /// Valid values are: "none", "chunk", and "page"
         /// These values are not case sensitive. If NULL, uses
         /// default parquet writer setting
         pub statistics_enabled: Option<String>, default = None
 
-        /// Sets max statistics size for any column. If NULL, uses
+        /// (writing) Sets max statistics size for any column. If NULL, uses
         /// default parquet writer setting
         pub max_statistics_size: Option<usize>, default = None
 
-        /// Target maximum number of rows in each row group (defaults to 1M
+        /// (writing) Target maximum number of rows in each row group (defaults to 1M
         /// rows). Writing larger row groups requires more memory to write, but
         /// can get better compression and be faster to read.
         pub max_row_group_size: usize, default = 1024 * 1024
 
-        /// Sets "created by" property
+        /// (writing) Sets "created by" property
         pub created_by: String, default = concat!("datafusion version ", env!("CARGO_PKG_VERSION")).into()
 
-        /// Sets column index truncate length
+        /// (writing) Sets column index truncate length
         pub column_index_truncate_length: Option<usize>, default = None
 
-        /// Sets best effort maximum number of rows in data page
+        /// (writing) Sets best effort maximum number of rows in data page
         pub data_page_row_count_limit: usize, default = usize::MAX
 
-        /// Sets default encoding for any column
+        /// (writing)  Sets default encoding for any column.
         /// Valid values are: plain, plain_dictionary, rle,
         /// bit_packed, delta_binary_packed, delta_length_byte_array,
         /// delta_byte_array, rle_dictionary, and byte_stream_split.
@@ -409,27 +412,27 @@ config_namespace! {
         /// default parquet writer setting
         pub encoding: Option<String>, default = None
 
-        /// Use any available bloom filters when reading parquet files
+        /// (writing) Use any available bloom filters when reading parquet files
         pub bloom_filter_on_read: bool, default = true
 
-        /// Write bloom filters for all columns when creating parquet files
+        /// (writing) Write bloom filters for all columns when creating parquet files
         pub bloom_filter_on_write: bool, default = false
 
-        /// Sets bloom filter false positive probability. If NULL, uses
+        /// (writing) Sets bloom filter false positive probability. If NULL, uses
         /// default parquet writer setting
         pub bloom_filter_fpp: Option<f64>, default = None
 
-        /// Sets bloom filter number of distinct values. If NULL, uses
+        /// (writing) Sets bloom filter number of distinct values. If NULL, uses
         /// default parquet writer setting
         pub bloom_filter_ndv: Option<u64>, default = None
 
-        /// Controls whether DataFusion will attempt to speed up writing
+        /// (writing) Controls whether DataFusion will attempt to speed up writing
         /// parquet files by serializing them in parallel. Each column
         /// in each row group in each output file are serialized in parallel
         /// leveraging a maximum possible core count of n_files*n_row_groups*n_columns.
         pub allow_single_file_parallelism: bool, default = true
 
-        /// By default parallel parquet writer is tuned for minimum
+        /// (writing) By default parallel parquet writer is tuned for minimum
         /// memory usage in a streaming execution plan. You may see
         /// a performance benefit when writing large parquet files
         /// by increasing maximum_parallel_row_group_writers and
@@ -440,7 +443,7 @@ config_namespace! {
         /// data frame.
         pub maximum_parallel_row_group_writers: usize, default = 1
 
-        /// By default parallel parquet writer is tuned for minimum
+        /// (writing) By default parallel parquet writer is tuned for minimum
         /// memory usage in a streaming execution plan. You may see
         /// a performance benefit when writing large parquet files
         /// by increasing maximum_parallel_row_group_writers and
@@ -450,7 +453,6 @@ config_namespace! {
         /// writing out already in-memory data, such as from a cached
         /// data frame.
         pub maximum_buffered_record_batches_per_stream: usize, default = 2
-
     }
 }
 
@@ -1534,6 +1536,9 @@ macro_rules! config_namespace_with_hashmap {
 }
 
 config_namespace_with_hashmap! {
+    /// Options controlling parquet format for individual columns.
+    ///
+    /// See [`ParquetOptions`] for more details
     pub struct ColumnOptions {
         /// Sets if bloom filter is enabled for the column path.
         pub bloom_filter_enabled: Option<bool>, default = None
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index dd4bb8ce505ea..abe7db2009a25 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -35,6 +35,7 @@ use parquet::{
 /// Options for writing parquet files
 #[derive(Clone, Debug)]
 pub struct ParquetWriterOptions {
+    /// parquet-rs writer properties
     pub writer_options: WriterProperties,
 }
 
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 95bea1223a9ce..f7b755b019118 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -262,32 +262,32 @@ datafusion.execution.listing_table_ignore_subdirectory true Should sub directori
 datafusion.execution.max_buffered_batches_per_output_file 2 This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption
 datafusion.execution.meta_fetch_concurrency 32 Number of files to read in parallel when inferring schema and statistics
 datafusion.execution.minimum_parallel_output_files 4 Guarantees a minimum level of output files running in parallel. RecordBatches will be distributed in round robin fashion to each parallel writer. Each writer is closed and a new file opened once soft_max_rows_per_output_file is reached.
-datafusion.execution.parquet.allow_single_file_parallelism true Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.
-datafusion.execution.parquet.bloom_filter_fpp NULL Sets bloom filter false positive probability. If NULL, uses default parquet writer setting
-datafusion.execution.parquet.bloom_filter_ndv NULL Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting
-datafusion.execution.parquet.bloom_filter_on_read true Use any available bloom filters when reading parquet files
-datafusion.execution.parquet.bloom_filter_on_write false Write bloom filters for all columns when creating parquet files
-datafusion.execution.parquet.column_index_truncate_length NULL Sets column index truncate length
-datafusion.execution.parquet.compression zstd(3) Sets default parquet compression codec Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting
-datafusion.execution.parquet.created_by datafusion Sets "created by" property
-datafusion.execution.parquet.data_page_row_count_limit 18446744073709551615 Sets best effort maximum number of rows in data page
-datafusion.execution.parquet.data_pagesize_limit 1048576 Sets best effort maximum size of data page in bytes
-datafusion.execution.parquet.dictionary_enabled NULL Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting
-datafusion.execution.parquet.dictionary_page_size_limit 1048576 Sets best effort maximum dictionary page size, in bytes
-datafusion.execution.parquet.enable_page_index true If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded.
-datafusion.execution.parquet.encoding NULL Sets default encoding for any column Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting
-datafusion.execution.parquet.max_row_group_size 1048576 Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.
-datafusion.execution.parquet.max_statistics_size NULL Sets max statistics size for any column. If NULL, uses default parquet writer setting
-datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2 By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
-datafusion.execution.parquet.maximum_parallel_row_group_writers 1 By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
-datafusion.execution.parquet.metadata_size_hint NULL If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer
-datafusion.execution.parquet.pruning true If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file
-datafusion.execution.parquet.pushdown_filters false If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".
-datafusion.execution.parquet.reorder_filters false If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query
-datafusion.execution.parquet.skip_metadata true If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata
-datafusion.execution.parquet.statistics_enabled NULL Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting
-datafusion.execution.parquet.write_batch_size 1024 Sets write_batch_size in bytes
-datafusion.execution.parquet.writer_version 1.0 Sets parquet writer version valid values are "1.0" and "2.0"
+datafusion.execution.parquet.allow_single_file_parallelism true (writing) Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.
+datafusion.execution.parquet.bloom_filter_fpp NULL (writing) Sets bloom filter false positive probability. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.bloom_filter_ndv NULL (writing) Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.bloom_filter_on_read true (writing) Use any available bloom filters when reading parquet files
+datafusion.execution.parquet.bloom_filter_on_write false (writing) Write bloom filters for all columns when creating parquet files
+datafusion.execution.parquet.column_index_truncate_length NULL (writing) Sets column index truncate length
+datafusion.execution.parquet.compression zstd(3) (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.created_by datafusion (writing) Sets "created by" property
+datafusion.execution.parquet.data_page_row_count_limit 18446744073709551615 (writing) Sets best effort maximum number of rows in data page
+datafusion.execution.parquet.data_pagesize_limit 1048576 (writing) Sets best effort maximum size of data page in bytes
+datafusion.execution.parquet.dictionary_enabled NULL (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.dictionary_page_size_limit 1048576 (writing) Sets best effort maximum dictionary page size, in bytes
+datafusion.execution.parquet.enable_page_index true (reading) If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded.
+datafusion.execution.parquet.encoding NULL (writing)  Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.max_row_group_size 1048576 (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.
+datafusion.execution.parquet.max_statistics_size NULL (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
+datafusion.execution.parquet.maximum_parallel_row_group_writers 1 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
+datafusion.execution.parquet.metadata_size_hint NULL (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer
+datafusion.execution.parquet.pruning true (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file
+datafusion.execution.parquet.pushdown_filters false (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".
+datafusion.execution.parquet.reorder_filters false (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query
+datafusion.execution.parquet.skip_metadata true (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata
+datafusion.execution.parquet.statistics_enabled NULL (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.write_batch_size 1024 (writing) Sets write_batch_size in bytes
+datafusion.execution.parquet.writer_version 1.0 (writing) Sets parquet writer version valid values are "1.0" and "2.0"
 datafusion.execution.planning_concurrency 13 Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system
 datafusion.execution.soft_max_rows_per_output_file 50000000 Target number of rows in output files when writing multiple. This is a soft max, so it can be exceeded slightly. There also will be one file smaller than the limit if the total number of rows written is not roughly divisible by the soft max
 datafusion.execution.sort_in_place_threshold_bytes 1048576 When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged.
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 5130b0a56d0e9..8d3ecbc985447 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -49,32 +49,32 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.collect_statistics                                 | false                     | Should DataFusion collect statistics after listing files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | datafusion.execution.target_partitions                                  | 0                         | Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datafusion.execution.time_zone                                          | +00:00                    | The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.parquet.enable_page_index                          | true                      | If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded.                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.parquet.pruning                                    | true                      | If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.parquet.skip_metadata                              | true                      | If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata                                                                                                                                                                                                                                                                                                                                                       |
-| datafusion.execution.parquet.metadata_size_hint                         | NULL                      | If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer                                                                                                                                                                                                                                                                                                                                 |
-| datafusion.execution.parquet.pushdown_filters                           | false                     | If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.reorder_filters                            | false                     | If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.execution.parquet.data_pagesize_limit                        | 1048576                   | Sets best effort maximum size of data page in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| datafusion.execution.parquet.write_batch_size                           | 1024                      | Sets write_batch_size in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.writer_version                             | 1.0                       | Sets parquet writer version valid values are "1.0" and "2.0"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| datafusion.execution.parquet.compression                                | zstd(3)                   | Sets default parquet compression codec Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                        |
-| datafusion.execution.parquet.dictionary_enabled                         | NULL                      | Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.dictionary_page_size_limit                 | 1048576                   | Sets best effort maximum dictionary page size, in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
-| datafusion.execution.parquet.statistics_enabled                         | NULL                      | Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| datafusion.execution.parquet.max_statistics_size                        | NULL                      | Sets max statistics size for any column. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.execution.parquet.max_row_group_size                         | 1048576                   | Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| datafusion.execution.parquet.created_by                                 | datafusion version 40.0.0 | Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.execution.parquet.column_index_truncate_length               | NULL                      | Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| datafusion.execution.parquet.data_page_row_count_limit                  | 18446744073709551615      | Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.encoding                                   | NULL                      | Sets default encoding for any column Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                               |
-| datafusion.execution.parquet.bloom_filter_on_read                       | true                      | Use any available bloom filters when reading parquet files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.execution.parquet.bloom_filter_on_write                      | false                     | Write bloom filters for all columns when creating parquet files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.parquet.bloom_filter_fpp                           | NULL                      | Sets bloom filter false positive probability. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.execution.parquet.bloom_filter_ndv                           | NULL                      | Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| datafusion.execution.parquet.allow_single_file_parallelism              | true                      | Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.maximum_parallel_row_group_writers         | 1                         | By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                                    |
-| datafusion.execution.parquet.maximum_buffered_record_batches_per_stream | 2                         | By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                                    |
+| datafusion.execution.parquet.enable_page_index                          | true                      | (reading) If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded.                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.pruning                                    | true                      | (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.skip_metadata                              | true                      | (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata                                                                                                                                                                                                                                                                                                                                             |
+| datafusion.execution.parquet.metadata_size_hint                         | NULL                      | (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer                                                                                                                                                                                                                                                                                                                       |
+| datafusion.execution.parquet.pushdown_filters                           | false                     | (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.parquet.reorder_filters                            | false                     | (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.execution.parquet.data_pagesize_limit                        | 1048576                   | (writing) Sets best effort maximum size of data page in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| datafusion.execution.parquet.write_batch_size                           | 1024                      | (writing) Sets write_batch_size in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.execution.parquet.writer_version                             | 1.0                       | (writing) Sets parquet writer version valid values are "1.0" and "2.0"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.execution.parquet.compression                                | zstd(3)                   | (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                             |
+| datafusion.execution.parquet.dictionary_enabled                         | NULL                      | (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.parquet.dictionary_page_size_limit                 | 1048576                   | (writing) Sets best effort maximum dictionary page size, in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| datafusion.execution.parquet.statistics_enabled                         | NULL                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| datafusion.execution.parquet.max_statistics_size                        | NULL                      | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.parquet.created_by                                 | datafusion version 40.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.column_index_truncate_length               | NULL                      | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| datafusion.execution.parquet.data_page_row_count_limit                  | 18446744073709551615      | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.parquet.encoding                                   | NULL                      | (writing) Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.bloom_filter_on_read                       | true                      | (writing) Use any available bloom filters when reading parquet files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.bloom_filter_on_write                      | false                     | (writing) Write bloom filters for all columns when creating parquet files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.parquet.bloom_filter_fpp                           | NULL                      | (writing) Sets bloom filter false positive probability. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.bloom_filter_ndv                           | NULL                      | (writing) Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.parquet.allow_single_file_parallelism              | true                      | (writing) Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.parquet.maximum_parallel_row_group_writers         | 1                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                          |
+| datafusion.execution.parquet.maximum_buffered_record_batches_per_stream | 2                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                          |
 | datafusion.execution.aggregate.scalar_update_factor                     | 10                        | Specifies the threshold for using `ScalarValue`s to update accumulators during high-cardinality aggregations for each input batch. The aggregation is considered high-cardinality if the number of affected groups is greater than or equal to `batch_size / scalar_update_factor`. In such cases, `ScalarValue`s are utilized for updating accumulators, rather than the default batch-slice approach. This can lead to performance improvements. By adjusting the `scalar_update_factor`, you can balance the trade-off between more efficient accumulator updates and the number of groups affected. |
 | datafusion.execution.planning_concurrency                               | 0                         | Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.sort_spill_reservation_bytes                       | 10485760                  | Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).                                                                                                                                                                       |

From b19744968770c4ab426d065dec3cc5147534e87a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 18 Jul 2024 06:04:25 -0400
Subject: [PATCH 083/357] Update parquet page pruning code to use the
 `StatisticsExtractor` (#11483)

* Update the parquet code prune_pages_in_one_row_group to use the `StatisticsExtractor`

* fix doc

* Increase evaluation error counter if error determining data page row counts

* Optimize `single_column`
---
 .../datasource/physical_plan/parquet/mod.rs   |  51 +-
 .../physical_plan/parquet/opener.rs           |   4 +-
 .../physical_plan/parquet/page_filter.rs      | 556 ++++++++----------
 .../physical_plan/parquet/statistics.rs       |  10 +
 .../core/src/physical_optimizer/pruning.rs    |  27 +-
 5 files changed, 279 insertions(+), 369 deletions(-)

diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index ed0fc5f0169ee..1eea4eab8ba20 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
 use crate::datasource::listing::PartitionedFile;
 use crate::datasource::physical_plan::file_stream::FileStream;
 use crate::datasource::physical_plan::{
-    parquet::page_filter::PagePruningPredicate, DisplayAs, FileGroupPartitioner,
+    parquet::page_filter::PagePruningAccessPlanFilter, DisplayAs, FileGroupPartitioner,
     FileScanConfig,
 };
 use crate::{
@@ -39,13 +39,11 @@ use crate::{
     },
 };
 
-use arrow::datatypes::{DataType, SchemaRef};
+use arrow::datatypes::SchemaRef;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalExpr};
 
 use itertools::Itertools;
 use log::debug;
-use parquet::basic::{ConvertedType, LogicalType};
-use parquet::schema::types::ColumnDescriptor;
 
 mod access_plan;
 mod metrics;
@@ -225,7 +223,7 @@ pub struct ParquetExec {
     /// Optional predicate for pruning row groups (derived from `predicate`)
     pruning_predicate: Option<Arc<PruningPredicate>>,
     /// Optional predicate for pruning pages (derived from `predicate`)
-    page_pruning_predicate: Option<Arc<PagePruningPredicate>>,
+    page_pruning_predicate: Option<Arc<PagePruningAccessPlanFilter>>,
     /// Optional hint for the size of the parquet metadata
     metadata_size_hint: Option<usize>,
     /// Optional user defined parquet file reader factory
@@ -381,19 +379,12 @@ impl ParquetExecBuilder {
             })
             .filter(|p| !p.always_true());
 
-        let page_pruning_predicate = predicate.as_ref().and_then(|predicate_expr| {
-            match PagePruningPredicate::try_new(predicate_expr, file_schema.clone()) {
-                Ok(pruning_predicate) => Some(Arc::new(pruning_predicate)),
-                Err(e) => {
-                    debug!(
-                        "Could not create page pruning predicate for '{:?}': {}",
-                        pruning_predicate, e
-                    );
-                    predicate_creation_errors.add(1);
-                    None
-                }
-            }
-        });
+        let page_pruning_predicate = predicate
+            .as_ref()
+            .map(|predicate_expr| {
+                PagePruningAccessPlanFilter::new(predicate_expr, file_schema.clone())
+            })
+            .map(Arc::new);
 
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
@@ -739,7 +730,7 @@ impl ExecutionPlan for ParquetExec {
 
 fn should_enable_page_index(
     enable_page_index: bool,
-    page_pruning_predicate: &Option<Arc<PagePruningPredicate>>,
+    page_pruning_predicate: &Option<Arc<PagePruningAccessPlanFilter>>,
 ) -> bool {
     enable_page_index
         && page_pruning_predicate.is_some()
@@ -749,26 +740,6 @@ fn should_enable_page_index(
             .unwrap_or(false)
 }
 
-// Convert parquet column schema to arrow data type, and just consider the
-// decimal data type.
-pub(crate) fn parquet_to_arrow_decimal_type(
-    parquet_column: &ColumnDescriptor,
-) -> Option<DataType> {
-    let type_ptr = parquet_column.self_type_ptr();
-    match type_ptr.get_basic_info().logical_type() {
-        Some(LogicalType::Decimal { scale, precision }) => {
-            Some(DataType::Decimal128(precision as u8, scale as i8))
-        }
-        _ => match type_ptr.get_basic_info().converted_type() {
-            ConvertedType::DECIMAL => Some(DataType::Decimal128(
-                type_ptr.get_precision() as u8,
-                type_ptr.get_scale() as i8,
-            )),
-            _ => None,
-        },
-    }
-}
-
 #[cfg(test)]
 mod tests {
     // See also `parquet_exec` integration test
@@ -798,7 +769,7 @@ mod tests {
     };
     use arrow::datatypes::{Field, Schema, SchemaBuilder};
     use arrow::record_batch::RecordBatch;
-    use arrow_schema::Fields;
+    use arrow_schema::{DataType, Fields};
     use datafusion_common::{assert_contains, ScalarValue};
     use datafusion_expr::{col, lit, when, Expr};
     use datafusion_physical_expr::planner::logical2physical;
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs
index c97b0282626a7..ffe879eb8de00 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs
@@ -17,7 +17,7 @@
 
 //! [`ParquetOpener`] for opening Parquet files
 
-use crate::datasource::physical_plan::parquet::page_filter::PagePruningPredicate;
+use crate::datasource::physical_plan::parquet::page_filter::PagePruningAccessPlanFilter;
 use crate::datasource::physical_plan::parquet::row_group_filter::RowGroupAccessPlanFilter;
 use crate::datasource::physical_plan::parquet::{
     row_filter, should_enable_page_index, ParquetAccessPlan,
@@ -46,7 +46,7 @@ pub(super) struct ParquetOpener {
     pub limit: Option<usize>,
     pub predicate: Option<Arc<dyn PhysicalExpr>>,
     pub pruning_predicate: Option<Arc<PruningPredicate>>,
-    pub page_pruning_predicate: Option<Arc<PagePruningPredicate>>,
+    pub page_pruning_predicate: Option<Arc<PagePruningAccessPlanFilter>>,
     pub table_schema: SchemaRef,
     pub metadata_size_hint: Option<usize>,
     pub metrics: ExecutionPlanMetricsSet,
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
index 7429ca5938203..d658608ab4f1b 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
@@ -17,40 +17,33 @@
 
 //! Contains code to filter entire pages
 
-use arrow::array::{
-    BooleanArray, Decimal128Array, Float32Array, Float64Array, Int32Array, Int64Array,
-    StringArray,
-};
-use arrow::datatypes::DataType;
+use crate::datasource::physical_plan::parquet::ParquetAccessPlan;
+use crate::datasource::physical_plan::parquet::StatisticsConverter;
+use crate::physical_optimizer::pruning::{PruningPredicate, PruningStatistics};
+use arrow::array::BooleanArray;
 use arrow::{array::ArrayRef, datatypes::SchemaRef};
 use arrow_schema::Schema;
-use datafusion_common::{Result, ScalarValue};
-use datafusion_physical_expr::expressions::Column;
+use datafusion_common::ScalarValue;
 use datafusion_physical_expr::{split_conjunction, PhysicalExpr};
 use log::{debug, trace};
-use parquet::schema::types::{ColumnDescriptor, SchemaDescriptor};
+use parquet::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex};
+use parquet::format::PageLocation;
+use parquet::schema::types::SchemaDescriptor;
 use parquet::{
     arrow::arrow_reader::{RowSelection, RowSelector},
-    file::{
-        metadata::{ParquetMetaData, RowGroupMetaData},
-        page_index::index::Index,
-    },
-    format::PageLocation,
+    file::metadata::{ParquetMetaData, RowGroupMetaData},
 };
 use std::collections::HashSet;
 use std::sync::Arc;
 
-use crate::datasource::physical_plan::parquet::parquet_to_arrow_decimal_type;
-use crate::datasource::physical_plan::parquet::statistics::{
-    from_bytes_to_i128, parquet_column,
-};
-use crate::datasource::physical_plan::parquet::ParquetAccessPlan;
-use crate::physical_optimizer::pruning::{PruningPredicate, PruningStatistics};
-
 use super::metrics::ParquetFileMetrics;
 
-/// A [`PagePruningPredicate`] provides the ability to construct a [`RowSelection`]
-/// based on parquet page level statistics, if any
+/// Filters a [`ParquetAccessPlan`] based on the [Parquet PageIndex], if present
+///
+/// It does so by evaluating statistics from the [`ParquetColumnIndex`] and
+/// [`ParquetOffsetIndex`] and converting them to [`RowSelection`].
+///
+/// [Parquet PageIndex]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
 ///
 /// For example, given a row group with two column (chunks) for `A`
 /// and `B` with the following with page level statistics:
@@ -103,30 +96,52 @@ use super::metrics::ParquetFileMetrics;
 ///
 /// So we can entirely skip rows 0->199 and 250->299 as we know they
 /// can not contain rows that match the predicate.
+///
+/// # Implementation notes
+///
+/// Single column predicates are evaluated using the PageIndex information
+/// for that column to determine which row ranges can be skipped based.
+///
+/// The resulting [`RowSelection`]'s are combined into a final
+/// row selection that is added to the [`ParquetAccessPlan`].
 #[derive(Debug)]
-pub struct PagePruningPredicate {
+pub struct PagePruningAccessPlanFilter {
+    /// single column predicates (e.g. (`col = 5`) extracted from the overall
+    /// predicate. Must all be true for a row to be included in the result.
     predicates: Vec<PruningPredicate>,
 }
 
-impl PagePruningPredicate {
-    /// Create a new [`PagePruningPredicate`]
-    // TODO: this is infallaible -- it can not return an error
-    pub fn try_new(expr: &Arc<dyn PhysicalExpr>, schema: SchemaRef) -> Result<Self> {
+impl PagePruningAccessPlanFilter {
+    /// Create a new [`PagePruningAccessPlanFilter`] from a physical
+    /// expression.
+    pub fn new(expr: &Arc<dyn PhysicalExpr>, schema: SchemaRef) -> Self {
+        // extract any single column predicates
         let predicates = split_conjunction(expr)
             .into_iter()
             .filter_map(|predicate| {
-                match PruningPredicate::try_new(predicate.clone(), schema.clone()) {
-                    Ok(p)
-                        if (!p.always_true())
-                            && (p.required_columns().n_columns() < 2) =>
-                    {
-                        Some(Ok(p))
-                    }
-                    _ => None,
+                let pp =
+                    match PruningPredicate::try_new(predicate.clone(), schema.clone()) {
+                        Ok(pp) => pp,
+                        Err(e) => {
+                            debug!("Ignoring error creating page pruning predicate: {e}");
+                            return None;
+                        }
+                    };
+
+                if pp.always_true() {
+                    debug!("Ignoring always true page pruning predicate: {predicate}");
+                    return None;
+                }
+
+                if pp.required_columns().single_column().is_none() {
+                    debug!("Ignoring multi-column page pruning predicate: {predicate}");
+                    return None;
                 }
+
+                Some(pp)
             })
-            .collect::<Result<Vec<_>>>()?;
-        Ok(Self { predicates })
+            .collect::<Vec<_>>();
+        Self { predicates }
     }
 
     /// Returns an updated [`ParquetAccessPlan`] by applying predicates to the
@@ -136,7 +151,7 @@ impl PagePruningPredicate {
         mut access_plan: ParquetAccessPlan,
         arrow_schema: &Schema,
         parquet_schema: &SchemaDescriptor,
-        file_metadata: &ParquetMetaData,
+        parquet_metadata: &ParquetMetaData,
         file_metrics: &ParquetFileMetrics,
     ) -> ParquetAccessPlan {
         // scoped timer updates on drop
@@ -146,18 +161,18 @@ impl PagePruningPredicate {
         }
 
         let page_index_predicates = &self.predicates;
-        let groups = file_metadata.row_groups();
+        let groups = parquet_metadata.row_groups();
 
         if groups.is_empty() {
             return access_plan;
         }
 
-        let (Some(file_offset_indexes), Some(file_page_indexes)) =
-            (file_metadata.offset_index(), file_metadata.column_index())
-        else {
-            trace!(
-                    "skip page pruning due to lack of indexes. Have offset: {}, column index: {}",
-                    file_metadata.offset_index().is_some(), file_metadata.column_index().is_some()
+        if parquet_metadata.offset_index().is_none()
+            || parquet_metadata.column_index().is_none()
+        {
+            debug!(
+                    "Can not prune pages due to lack of indexes. Have offset: {}, column index: {}",
+                    parquet_metadata.offset_index().is_some(), parquet_metadata.column_index().is_some()
                 );
             return access_plan;
         };
@@ -165,33 +180,39 @@ impl PagePruningPredicate {
         // track the total number of rows that should be skipped
         let mut total_skip = 0;
 
+        // for each row group specified in the access plan
         let row_group_indexes = access_plan.row_group_indexes();
-        for r in row_group_indexes {
+        for row_group_index in row_group_indexes {
             // The selection for this particular row group
             let mut overall_selection = None;
             for predicate in page_index_predicates {
-                // find column index in the parquet schema
-                let col_idx = find_column_index(predicate, arrow_schema, parquet_schema);
-                let row_group_metadata = &groups[r];
-
-                let (Some(rg_page_indexes), Some(rg_offset_indexes), Some(col_idx)) = (
-                    file_page_indexes.get(r),
-                    file_offset_indexes.get(r),
-                    col_idx,
-                ) else {
-                    trace!(
-                        "Did not have enough metadata to prune with page indexes, \
-                     falling back to all rows",
-                    );
-                    continue;
+                let column = predicate
+                    .required_columns()
+                    .single_column()
+                    .expect("Page pruning requires single column predicates");
+
+                let converter = StatisticsConverter::try_new(
+                    column.name(),
+                    arrow_schema,
+                    parquet_schema,
+                );
+
+                let converter = match converter {
+                    Ok(converter) => converter,
+                    Err(e) => {
+                        debug!(
+                            "Could not create statistics converter for column {}: {e}",
+                            column.name()
+                        );
+                        continue;
+                    }
                 };
 
                 let selection = prune_pages_in_one_row_group(
-                    row_group_metadata,
+                    row_group_index,
                     predicate,
-                    rg_offset_indexes.get(col_idx),
-                    rg_page_indexes.get(col_idx),
-                    groups[r].column(col_idx).column_descr(),
+                    converter,
+                    parquet_metadata,
                     file_metrics,
                 );
 
@@ -224,15 +245,15 @@ impl PagePruningPredicate {
                     let rows_skipped = rows_skipped(&overall_selection);
                     trace!("Overall selection from predicate skipped {rows_skipped}: {overall_selection:?}");
                     total_skip += rows_skipped;
-                    access_plan.scan_selection(r, overall_selection)
+                    access_plan.scan_selection(row_group_index, overall_selection)
                 } else {
                     // Selection skips all rows, so skip the entire row group
-                    let rows_skipped = groups[r].num_rows() as usize;
-                    access_plan.skip(r);
+                    let rows_skipped = groups[row_group_index].num_rows() as usize;
+                    access_plan.skip(row_group_index);
                     total_skip += rows_skipped;
                     trace!(
                         "Overall selection from predicate is empty, \
-                        skipping all {rows_skipped} rows in row group {r}"
+                        skipping all {rows_skipped} rows in row group {row_group_index}"
                     );
                 }
             }
@@ -242,7 +263,7 @@ impl PagePruningPredicate {
         access_plan
     }
 
-    /// Returns the number of filters in the [`PagePruningPredicate`]
+    /// Returns the number of filters in the [`PagePruningAccessPlanFilter`]
     pub fn filter_number(&self) -> usize {
         self.predicates.len()
     }
@@ -266,97 +287,53 @@ fn update_selection(
     }
 }
 
-/// Returns the column index in the row parquet schema for the single
-/// column of a single column pruning predicate.
-///
-/// For example, give the predicate `y > 5`
+/// Returns a [`RowSelection`] for the rows in this row group to scan.
 ///
-/// And columns in the RowGroupMetadata like `['x', 'y', 'z']` will
-/// return 1.
+/// This Row Selection is formed from the page index and the predicate skips row
+/// ranges that can be ruled out based on the predicate.
 ///
-/// Returns `None` if the column is not found, or if there are no
-/// required columns, which is the case for predicate like `abs(i) =
-/// 1` which are rewritten to `lit(true)`
-///
-/// Panics:
-///
-/// If the predicate contains more than one column reference (assumes
-/// that `extract_page_index_push_down_predicates` only returns
-/// predicate with one col)
-fn find_column_index(
-    predicate: &PruningPredicate,
-    arrow_schema: &Schema,
-    parquet_schema: &SchemaDescriptor,
-) -> Option<usize> {
-    let mut found_required_column: Option<&Column> = None;
-
-    for required_column_details in predicate.required_columns().iter() {
-        let column = &required_column_details.0;
-        if let Some(found_required_column) = found_required_column.as_ref() {
-            // make sure it is the same name we have seen previously
-            assert_eq!(
-                column.name(),
-                found_required_column.name(),
-                "Unexpected multi column predicate"
-            );
-        } else {
-            found_required_column = Some(column);
-        }
-    }
-
-    let Some(column) = found_required_column.as_ref() else {
-        trace!("No column references in pruning predicate");
-        return None;
-    };
-
-    parquet_column(parquet_schema, arrow_schema, column.name()).map(|x| x.0)
-}
-
-/// Returns a `RowSelection` for the pages in this RowGroup if any
-/// rows can be pruned based on the page index
+/// Returns `None` if there is an error evaluating the predicate or the required
+/// page information is not present.
 fn prune_pages_in_one_row_group(
-    group: &RowGroupMetaData,
-    predicate: &PruningPredicate,
-    col_offset_indexes: Option<&Vec<PageLocation>>,
-    col_page_indexes: Option<&Index>,
-    col_desc: &ColumnDescriptor,
+    row_group_index: usize,
+    pruning_predicate: &PruningPredicate,
+    converter: StatisticsConverter<'_>,
+    parquet_metadata: &ParquetMetaData,
     metrics: &ParquetFileMetrics,
 ) -> Option<RowSelection> {
-    let num_rows = group.num_rows() as usize;
-    let (Some(col_offset_indexes), Some(col_page_indexes)) =
-        (col_offset_indexes, col_page_indexes)
-    else {
-        return None;
-    };
-
-    let target_type = parquet_to_arrow_decimal_type(col_desc);
-    let pruning_stats = PagesPruningStatistics {
-        col_page_indexes,
-        col_offset_indexes,
-        target_type: &target_type,
-        num_rows_in_row_group: group.num_rows(),
-    };
+    let pruning_stats =
+        PagesPruningStatistics::try_new(row_group_index, converter, parquet_metadata)?;
 
-    let values = match predicate.prune(&pruning_stats) {
+    // Each element in values is a boolean indicating whether the page may have
+    // values that match the predicate (true) or could not possibly have values
+    // that match the predicate (false).
+    let values = match pruning_predicate.prune(&pruning_stats) {
         Ok(values) => values,
         Err(e) => {
-            // stats filter array could not be built
-            // return a result which will not filter out any pages
             debug!("Error evaluating page index predicate values {e}");
             metrics.predicate_evaluation_errors.add(1);
             return None;
         }
     };
 
+    // Convert the information of which pages to skip into a RowSelection
+    // that describes the ranges of rows to skip.
+    let Some(page_row_counts) = pruning_stats.page_row_counts() else {
+        debug!(
+            "Can not determine page row counts for row group {row_group_index}, skipping"
+        );
+        metrics.predicate_evaluation_errors.add(1);
+        return None;
+    };
+
     let mut vec = Vec::with_capacity(values.len());
-    let row_vec = create_row_count_in_each_page(col_offset_indexes, num_rows);
-    assert_eq!(row_vec.len(), values.len());
-    let mut sum_row = *row_vec.first().unwrap();
+    assert_eq!(page_row_counts.len(), values.len());
+    let mut sum_row = *page_row_counts.first().unwrap();
     let mut selected = *values.first().unwrap();
     trace!("Pruned to {:?} using {:?}", values, pruning_stats);
     for (i, &f) in values.iter().enumerate().skip(1) {
         if f == selected {
-            sum_row += *row_vec.get(i).unwrap();
+            sum_row += *page_row_counts.get(i).unwrap();
         } else {
             let selector = if selected {
                 RowSelector::select(sum_row)
@@ -364,7 +341,7 @@ fn prune_pages_in_one_row_group(
                 RowSelector::skip(sum_row)
             };
             vec.push(selector);
-            sum_row = *row_vec.get(i).unwrap();
+            sum_row = *page_row_counts.get(i).unwrap();
             selected = f;
         }
     }
@@ -378,206 +355,143 @@ fn prune_pages_in_one_row_group(
     Some(RowSelection::from(vec))
 }
 
-fn create_row_count_in_each_page(
-    location: &[PageLocation],
-    num_rows: usize,
-) -> Vec<usize> {
-    let mut vec = Vec::with_capacity(location.len());
-    location.windows(2).for_each(|x| {
-        let start = x[0].first_row_index as usize;
-        let end = x[1].first_row_index as usize;
-        vec.push(end - start);
-    });
-    vec.push(num_rows - location.last().unwrap().first_row_index as usize);
-    vec
-}
-
-/// Wraps one col page_index in one rowGroup statistics in a way
-/// that implements [`PruningStatistics`]
+/// Implement [`PruningStatistics`] for one column's PageIndex (column_index + offset_index)
 #[derive(Debug)]
 struct PagesPruningStatistics<'a> {
-    col_page_indexes: &'a Index,
-    col_offset_indexes: &'a Vec<PageLocation>,
-    // target_type means the logical type in schema: like 'DECIMAL' is the logical type, but the
-    // real physical type in parquet file may be `INT32, INT64, FIXED_LEN_BYTE_ARRAY`
-    target_type: &'a Option<DataType>,
-    num_rows_in_row_group: i64,
+    row_group_index: usize,
+    row_group_metadatas: &'a [RowGroupMetaData],
+    converter: StatisticsConverter<'a>,
+    column_index: &'a ParquetColumnIndex,
+    offset_index: &'a ParquetOffsetIndex,
+    page_offsets: &'a Vec<PageLocation>,
 }
 
-// Extract the min or max value calling `func` from page idex
-macro_rules! get_min_max_values_for_page_index {
-    ($self:expr, $func:ident) => {{
-        match $self.col_page_indexes {
-            Index::NONE => None,
-            Index::INT32(index) => {
-                match $self.target_type {
-                    // int32 to decimal with the precision and scale
-                    Some(DataType::Decimal128(precision, scale)) => {
-                        let vec = &index.indexes;
-                        let vec: Vec<Option<i128>> = vec
-                            .iter()
-                            .map(|x| x.$func().and_then(|x| Some(*x as i128)))
-                            .collect();
-                        Decimal128Array::from(vec)
-                            .with_precision_and_scale(*precision, *scale)
-                            .ok()
-                            .map(|arr| Arc::new(arr) as ArrayRef)
-                    }
-                    _ => {
-                        let vec = &index.indexes;
-                        Some(Arc::new(Int32Array::from_iter(
-                            vec.iter().map(|x| x.$func().cloned()),
-                        )))
-                    }
-                }
-            }
-            Index::INT64(index) => {
-                match $self.target_type {
-                    // int64 to decimal with the precision and scale
-                    Some(DataType::Decimal128(precision, scale)) => {
-                        let vec = &index.indexes;
-                        let vec: Vec<Option<i128>> = vec
-                            .iter()
-                            .map(|x| x.$func().and_then(|x| Some(*x as i128)))
-                            .collect();
-                        Decimal128Array::from(vec)
-                            .with_precision_and_scale(*precision, *scale)
-                            .ok()
-                            .map(|arr| Arc::new(arr) as ArrayRef)
-                    }
-                    _ => {
-                        let vec = &index.indexes;
-                        Some(Arc::new(Int64Array::from_iter(
-                            vec.iter().map(|x| x.$func().cloned()),
-                        )))
-                    }
-                }
-            }
-            Index::FLOAT(index) => {
-                let vec = &index.indexes;
-                Some(Arc::new(Float32Array::from_iter(
-                    vec.iter().map(|x| x.$func().cloned()),
-                )))
-            }
-            Index::DOUBLE(index) => {
-                let vec = &index.indexes;
-                Some(Arc::new(Float64Array::from_iter(
-                    vec.iter().map(|x| x.$func().cloned()),
-                )))
-            }
-            Index::BOOLEAN(index) => {
-                let vec = &index.indexes;
-                Some(Arc::new(BooleanArray::from_iter(
-                    vec.iter().map(|x| x.$func().cloned()),
-                )))
-            }
-            Index::BYTE_ARRAY(index) => match $self.target_type {
-                Some(DataType::Decimal128(precision, scale)) => {
-                    let vec = &index.indexes;
-                    Decimal128Array::from(
-                        vec.iter()
-                            .map(|x| {
-                                x.$func()
-                                    .and_then(|x| Some(from_bytes_to_i128(x.as_ref())))
-                            })
-                            .collect::<Vec<Option<i128>>>(),
-                    )
-                    .with_precision_and_scale(*precision, *scale)
-                    .ok()
-                    .map(|arr| Arc::new(arr) as ArrayRef)
-                }
-                _ => {
-                    let vec = &index.indexes;
-                    let array: StringArray = vec
-                        .iter()
-                        .map(|x| x.$func())
-                        .map(|x| x.and_then(|x| std::str::from_utf8(x.as_ref()).ok()))
-                        .collect();
-                    Some(Arc::new(array))
-                }
-            },
-            Index::INT96(_) => {
-                //Todo support these type
-                None
-            }
-            Index::FIXED_LEN_BYTE_ARRAY(index) => match $self.target_type {
-                Some(DataType::Decimal128(precision, scale)) => {
-                    let vec = &index.indexes;
-                    Decimal128Array::from(
-                        vec.iter()
-                            .map(|x| {
-                                x.$func()
-                                    .and_then(|x| Some(from_bytes_to_i128(x.as_ref())))
-                            })
-                            .collect::<Vec<Option<i128>>>(),
-                    )
-                    .with_precision_and_scale(*precision, *scale)
-                    .ok()
-                    .map(|arr| Arc::new(arr) as ArrayRef)
-                }
-                _ => None,
-            },
-        }
-    }};
+impl<'a> PagesPruningStatistics<'a> {
+    /// Creates a new [`PagesPruningStatistics`] for a column in a row group, if
+    /// possible.
+    ///
+    /// Returns None if the `parquet_metadata` does not have sufficient
+    /// information to create the statistics.
+    fn try_new(
+        row_group_index: usize,
+        converter: StatisticsConverter<'a>,
+        parquet_metadata: &'a ParquetMetaData,
+    ) -> Option<Self> {
+        let Some(parquet_column_index) = converter.parquet_index() else {
+            trace!(
+                "Column {:?} not in parquet file, skipping",
+                converter.arrow_field()
+            );
+            return None;
+        };
+
+        let column_index = parquet_metadata.column_index()?;
+        let offset_index = parquet_metadata.offset_index()?;
+        let row_group_metadatas = parquet_metadata.row_groups();
+
+        let Some(row_group_page_offsets) = offset_index.get(row_group_index) else {
+            trace!("No page offsets for row group {row_group_index}, skipping");
+            return None;
+        };
+        let Some(page_offsets) = row_group_page_offsets.get(parquet_column_index) else {
+            trace!(
+                "No page offsets for column {:?} in row group {row_group_index}, skipping",
+                converter.arrow_field()
+            );
+            return None;
+        };
+
+        Some(Self {
+            row_group_index,
+            row_group_metadatas,
+            converter,
+            column_index,
+            offset_index,
+            page_offsets,
+        })
+    }
+
+    /// return the row counts in each data page, if possible.
+    fn page_row_counts(&self) -> Option<Vec<usize>> {
+        let row_group_metadata = self
+            .row_group_metadatas
+            .get(self.row_group_index)
+            // fail fast/panic if row_group_index is out of bounds
+            .unwrap();
+
+        let num_rows_in_row_group = row_group_metadata.num_rows() as usize;
+
+        let page_offsets = self.page_offsets;
+        let mut vec = Vec::with_capacity(page_offsets.len());
+        page_offsets.windows(2).for_each(|x| {
+            let start = x[0].first_row_index as usize;
+            let end = x[1].first_row_index as usize;
+            vec.push(end - start);
+        });
+        vec.push(num_rows_in_row_group - page_offsets.last()?.first_row_index as usize);
+        Some(vec)
+    }
 }
 
 impl<'a> PruningStatistics for PagesPruningStatistics<'a> {
     fn min_values(&self, _column: &datafusion_common::Column) -> Option<ArrayRef> {
-        get_min_max_values_for_page_index!(self, min)
+        match self.converter.data_page_mins(
+            self.column_index,
+            self.offset_index,
+            [&self.row_group_index],
+        ) {
+            Ok(min_values) => Some(min_values),
+            Err(e) => {
+                debug!("Error evaluating data page min values {e}");
+                None
+            }
+        }
     }
 
     fn max_values(&self, _column: &datafusion_common::Column) -> Option<ArrayRef> {
-        get_min_max_values_for_page_index!(self, max)
+        match self.converter.data_page_maxes(
+            self.column_index,
+            self.offset_index,
+            [&self.row_group_index],
+        ) {
+            Ok(min_values) => Some(min_values),
+            Err(e) => {
+                debug!("Error evaluating data page max values {e}");
+                None
+            }
+        }
     }
 
     fn num_containers(&self) -> usize {
-        self.col_offset_indexes.len()
+        self.page_offsets.len()
     }
 
     fn null_counts(&self, _column: &datafusion_common::Column) -> Option<ArrayRef> {
-        match self.col_page_indexes {
-            Index::NONE => None,
-            Index::BOOLEAN(index) => Some(Arc::new(Int64Array::from_iter(
-                index.indexes.iter().map(|x| x.null_count),
-            ))),
-            Index::INT32(index) => Some(Arc::new(Int64Array::from_iter(
-                index.indexes.iter().map(|x| x.null_count),
-            ))),
-            Index::INT64(index) => Some(Arc::new(Int64Array::from_iter(
-                index.indexes.iter().map(|x| x.null_count),
-            ))),
-            Index::FLOAT(index) => Some(Arc::new(Int64Array::from_iter(
-                index.indexes.iter().map(|x| x.null_count),
-            ))),
-            Index::DOUBLE(index) => Some(Arc::new(Int64Array::from_iter(
-                index.indexes.iter().map(|x| x.null_count),
-            ))),
-            Index::INT96(index) => Some(Arc::new(Int64Array::from_iter(
-                index.indexes.iter().map(|x| x.null_count),
-            ))),
-            Index::BYTE_ARRAY(index) => Some(Arc::new(Int64Array::from_iter(
-                index.indexes.iter().map(|x| x.null_count),
-            ))),
-            Index::FIXED_LEN_BYTE_ARRAY(index) => Some(Arc::new(Int64Array::from_iter(
-                index.indexes.iter().map(|x| x.null_count),
-            ))),
+        match self.converter.data_page_null_counts(
+            self.column_index,
+            self.offset_index,
+            [&self.row_group_index],
+        ) {
+            Ok(null_counts) => Some(Arc::new(null_counts)),
+            Err(e) => {
+                debug!("Error evaluating data page null counts {e}");
+                None
+            }
         }
     }
 
     fn row_counts(&self, _column: &datafusion_common::Column) -> Option<ArrayRef> {
-        // see https://github.com/apache/arrow-rs/blob/91f0b1771308609ca27db0fb1d2d49571b3980d8/parquet/src/file/metadata.rs#L979-L982
-
-        let row_count_per_page = self.col_offset_indexes.windows(2).map(|location| {
-            Some(location[1].first_row_index - location[0].first_row_index)
-        });
-
-        // append the last page row count
-        let row_count_per_page = row_count_per_page.chain(std::iter::once(Some(
-            self.num_rows_in_row_group
-                - self.col_offset_indexes.last().unwrap().first_row_index,
-        )));
-
-        Some(Arc::new(Int64Array::from_iter(row_count_per_page)))
+        match self.converter.data_page_row_counts(
+            self.offset_index,
+            self.row_group_metadatas,
+            [&self.row_group_index],
+        ) {
+            Ok(row_counts) => row_counts.map(|a| Arc::new(a) as ArrayRef),
+            Err(e) => {
+                debug!("Error evaluating data page row counts {e}");
+                None
+            }
+        }
     }
 
     fn contained(
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
index 44e22f778075d..3d250718f736c 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
@@ -1136,6 +1136,16 @@ pub struct StatisticsConverter<'a> {
 }
 
 impl<'a> StatisticsConverter<'a> {
+    /// Return the index of the column in the parquet file, if any
+    pub fn parquet_index(&self) -> Option<usize> {
+        self.parquet_index
+    }
+
+    /// Return the arrow field of the column in the arrow schema
+    pub fn arrow_field(&self) -> &'a Field {
+        self.arrow_field
+    }
+
     /// Returns a [`UInt64Array`] with row counts for each row group
     ///
     /// # Return Value
diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs
index a1ace229985e7..3c18e53497fd1 100644
--- a/datafusion/core/src/physical_optimizer/pruning.rs
+++ b/datafusion/core/src/physical_optimizer/pruning.rs
@@ -609,6 +609,8 @@ impl PruningPredicate {
     ///
     /// This happens if the predicate is a literal `true`  and
     /// literal_guarantees is empty.
+    ///
+    /// This can happen when a predicate is simplified to a constant `true`
     pub fn always_true(&self) -> bool {
         is_always_true(&self.predicate_expr) && self.literal_guarantees.is_empty()
     }
@@ -736,12 +738,25 @@ impl RequiredColumns {
         Self::default()
     }
 
-    /// Returns number of unique columns
-    pub(crate) fn n_columns(&self) -> usize {
-        self.iter()
-            .map(|(c, _s, _f)| c)
-            .collect::<HashSet<_>>()
-            .len()
+    /// Returns Some(column) if this is a single column predicate.
+    ///
+    /// Returns None if this is a multi-column predicate.
+    ///
+    /// Examples:
+    /// * `a > 5 OR a < 10` returns `Some(a)`
+    /// * `a > 5 OR b < 10` returns `None`
+    /// * `true` returns None
+    pub(crate) fn single_column(&self) -> Option<&phys_expr::Column> {
+        if self.columns.windows(2).all(|w| {
+            // check if all columns are the same (ignoring statistics and field)
+            let c1 = &w[0].0;
+            let c2 = &w[1].0;
+            c1 == c2
+        }) {
+            self.columns.first().map(|r| &r.0)
+        } else {
+            None
+        }
     }
 
     /// Returns an iterator over items in columns (see doc on

From be130b46709e084f969b15e7686cddb289a198ff Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Thu, 18 Jul 2024 09:53:27 -0700
Subject: [PATCH 084/357] Enable SortMergeJoin LeftAnti filtered fuzz tests
 (#11535)

* Enable LeftAnti filtered fuzz tests

* Enable LeftAnti filtered fuzz tests. Add git reference
---
 datafusion/core/tests/fuzz_cases/join_fuzz.rs | 38 ++++++++++++-------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
index 17dbf3a0ff287..604c1f93e55ea 100644
--- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
@@ -83,7 +83,7 @@ fn less_than_100_join_filter(schema1: Arc<Schema>, _schema2: Arc<Schema>) -> Joi
 }
 
 fn col_lt_col_filter(schema1: Arc<Schema>, schema2: Arc<Schema>) -> JoinFilter {
-    let less_than_100 = Arc::new(BinaryExpr::new(
+    let less_filter = Arc::new(BinaryExpr::new(
         Arc::new(Column::new("x", 1)),
         Operator::Lt,
         Arc::new(Column::new("x", 0)),
@@ -99,11 +99,19 @@ fn col_lt_col_filter(schema1: Arc<Schema>, schema2: Arc<Schema>) -> JoinFilter {
         },
     ];
     let intermediate_schema = Schema::new(vec![
-        schema1.field_with_name("x").unwrap().to_owned(),
-        schema2.field_with_name("x").unwrap().to_owned(),
+        schema1
+            .field_with_name("x")
+            .unwrap()
+            .clone()
+            .with_nullable(true),
+        schema2
+            .field_with_name("x")
+            .unwrap()
+            .clone()
+            .with_nullable(true),
     ]);
 
-    JoinFilter::new(less_than_100, column_indices, intermediate_schema)
+    JoinFilter::new(less_filter, column_indices, intermediate_schema)
 }
 
 #[tokio::test]
@@ -217,6 +225,8 @@ async fn test_semi_join_1k() {
 
 #[tokio::test]
 async fn test_semi_join_1k_filtered() {
+    // NLJ vs HJ gives wrong result
+    // Tracked in https://github.com/apache/datafusion/issues/11537
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
         make_staggered_batches(1000),
@@ -239,17 +249,17 @@ async fn test_anti_join_1k() {
     .await
 }
 
-// Test failed for now. https://github.com/apache/datafusion/issues/10872
-#[ignore]
 #[tokio::test]
 async fn test_anti_join_1k_filtered() {
+    // NLJ vs HJ gives wrong result
+    // Tracked in https://github.com/apache/datafusion/issues/11537
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
         make_staggered_batches(1000),
         JoinType::LeftAnti,
-        Some(Box::new(less_than_100_join_filter)),
+        Some(Box::new(col_lt_col_filter)),
     )
-    .run_test(&[JoinTestType::HjSmj, JoinTestType::NljHj], false)
+    .run_test(&[JoinTestType::HjSmj], false)
     .await
 }
 
@@ -422,12 +432,13 @@ impl JoinFuzzTestCase {
             let session_config = SessionConfig::new().with_batch_size(*batch_size);
             let ctx = SessionContext::new_with_config(session_config);
             let task_ctx = ctx.task_ctx();
-            let smj = self.sort_merge_join();
-            let smj_collected = collect(smj, task_ctx.clone()).await.unwrap();
 
             let hj = self.hash_join();
             let hj_collected = collect(hj, task_ctx.clone()).await.unwrap();
 
+            let smj = self.sort_merge_join();
+            let smj_collected = collect(smj, task_ctx.clone()).await.unwrap();
+
             let nlj = self.nested_loop_join();
             let nlj_collected = collect(nlj, task_ctx.clone()).await.unwrap();
 
@@ -437,11 +448,12 @@ impl JoinFuzzTestCase {
             let nlj_rows = nlj_collected.iter().fold(0, |acc, b| acc + b.num_rows());
 
             if debug {
-                println!("The debug is ON. Input data will be saved");
                 let fuzz_debug = "fuzz_test_debug";
                 std::fs::remove_dir_all(fuzz_debug).unwrap_or(());
                 std::fs::create_dir_all(fuzz_debug).unwrap();
                 let out_dir_name = &format!("{fuzz_debug}/batch_size_{batch_size}");
+                println!("The debug is ON. Input data will be saved to {out_dir_name}");
+
                 Self::save_partitioned_batches_as_parquet(
                     &self.input1,
                     out_dir_name,
@@ -562,8 +574,7 @@ impl JoinFuzzTestCase {
     ///         Some(Box::new(col_lt_col_filter)),
     ///     )
     ///     .run_test(&[JoinTestType::HjSmj], false)
-    ///     .await
-    /// }
+    ///     .await;
     ///
     ///     let ctx: SessionContext = SessionContext::new();
     ///     let df = ctx
@@ -592,6 +603,7 @@ impl JoinFuzzTestCase {
     ///         )
     ///         .run_test()
     ///         .await
+    /// }
     fn save_partitioned_batches_as_parquet(
         input: &[RecordBatch],
         output_dir: &str,

From b685e2d4f1f245dd1dbe468b32b115ae99316689 Mon Sep 17 00:00:00 2001
From: JasonLi <lijingxuan92@126.com>
Date: Fri, 19 Jul 2024 03:22:39 +0800
Subject: [PATCH 085/357] chore: fix typos of expr, functions, optimizer,
 physical-expr-common, physical-expr, and physical-plan packages (#11538)

---
 datafusion/expr/src/aggregate_function.rs            |  4 ++--
 datafusion/expr/src/expr.rs                          |  6 +++---
 datafusion/expr/src/expr_rewriter/mod.rs             |  4 ++--
 datafusion/expr/src/logical_plan/builder.rs          |  6 +++---
 datafusion/expr/src/logical_plan/display.rs          |  4 ++--
 datafusion/expr/src/logical_plan/plan.rs             |  6 +++---
 datafusion/expr/src/partition_evaluator.rs           |  2 +-
 datafusion/expr/src/signature.rs                     |  2 +-
 datafusion/expr/src/type_coercion/binary.rs          |  4 ++--
 datafusion/expr/src/type_coercion/functions.rs       |  2 +-
 datafusion/expr/src/type_coercion/mod.rs             |  2 +-
 datafusion/expr/src/utils.rs                         |  2 +-
 .../src/approx_percentile_cont_with_weight.rs        |  2 +-
 datafusion/functions-array/src/remove.rs             |  2 +-
 datafusion/functions/src/core/arrow_cast.rs          |  2 +-
 datafusion/functions/src/datetime/to_local_time.rs   |  4 ++--
 datafusion/functions/src/regex/regexpreplace.rs      |  2 +-
 datafusion/functions/src/unicode/substrindex.rs      |  8 ++++----
 datafusion/optimizer/src/analyzer/subquery.rs        |  4 ++--
 datafusion/optimizer/src/common_subexpr_eliminate.rs |  4 ++--
 .../optimizer/src/decorrelate_predicate_subquery.rs  |  2 +-
 .../src/optimize_projections/required_indices.rs     |  2 +-
 datafusion/optimizer/src/push_down_filter.rs         |  2 +-
 .../src/simplify_expressions/expr_simplifier.rs      |  4 ++--
 .../optimizer/src/unwrap_cast_in_comparison.rs       |  2 +-
 .../src/aggregate/groups_accumulator/accumulate.rs   |  4 ++--
 datafusion/physical-expr-common/src/aggregate/mod.rs |  4 ++--
 datafusion/physical-expr-common/src/binary_map.rs    |  4 ++--
 .../physical-expr-common/src/expressions/column.rs   |  2 +-
 .../src/aggregate/groups_accumulator/adapter.rs      |  2 +-
 datafusion/physical-expr/src/aggregate/min_max.rs    |  2 +-
 datafusion/physical-expr/src/equivalence/class.rs    |  2 +-
 datafusion/physical-expr/src/expressions/case.rs     |  2 +-
 datafusion/physical-expr/src/expressions/column.rs   |  2 +-
 datafusion/physical-expr/src/expressions/try_cast.rs |  2 +-
 datafusion/physical-expr/src/utils/guarantee.rs      |  2 +-
 .../physical-plan/src/aggregates/group_values/row.rs |  2 +-
 datafusion/physical-plan/src/aggregates/mod.rs       |  6 +++---
 datafusion/physical-plan/src/analyze.rs              |  2 +-
 datafusion/physical-plan/src/display.rs              |  2 +-
 datafusion/physical-plan/src/joins/cross_join.rs     |  2 +-
 .../physical-plan/src/joins/nested_loop_join.rs      |  2 +-
 .../physical-plan/src/joins/sort_merge_join.rs       |  2 +-
 .../physical-plan/src/joins/symmetric_hash_join.rs   |  2 +-
 datafusion/physical-plan/src/joins/utils.rs          | 12 ++++++------
 datafusion/physical-plan/src/limit.rs                |  2 +-
 datafusion/physical-plan/src/repartition/mod.rs      |  6 +++---
 datafusion/physical-plan/src/sorts/sort.rs           |  8 ++++----
 datafusion/physical-plan/src/streaming.rs            |  2 +-
 datafusion/physical-plan/src/test/exec.rs            |  2 +-
 datafusion/physical-plan/src/topk/mod.rs             |  8 ++++----
 datafusion/physical-plan/src/windows/mod.rs          |  2 +-
 .../physical-plan/src/windows/window_agg_exec.rs     |  2 +-
 datafusion/physical-plan/src/work_table.rs           |  2 +-
 54 files changed, 89 insertions(+), 89 deletions(-)

diff --git a/datafusion/expr/src/aggregate_function.rs b/datafusion/expr/src/aggregate_function.rs
index 3cae78eaed9b6..39b3b4ed3b5a4 100644
--- a/datafusion/expr/src/aggregate_function.rs
+++ b/datafusion/expr/src/aggregate_function.rs
@@ -152,8 +152,8 @@ mod tests {
     use strum::IntoEnumIterator;
 
     #[test]
-    // Test for AggregateFuncion's Display and from_str() implementations.
-    // For each variant in AggregateFuncion, it converts the variant to a string
+    // Test for AggregateFunction's Display and from_str() implementations.
+    // For each variant in AggregateFunction, it converts the variant to a string
     // and then back to a variant. The test asserts that the original variant and
     // the reconstructed variant are the same. This assertion is also necessary for
     // function suggestion. See https://github.com/apache/datafusion/issues/8082
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index a344e621ddb12..e3620501d9a8f 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -109,7 +109,7 @@ use sqlparser::ast::NullTreatment;
 /// ## Binary Expressions
 ///
 /// Exprs implement traits that allow easy to understand construction of more
-/// complex expresions. For example, to create `c1 + c2` to add columns "c1" and
+/// complex expressions. For example, to create `c1 + c2` to add columns "c1" and
 /// "c2" together
 ///
 /// ```
@@ -1398,7 +1398,7 @@ impl Expr {
             }
             Ok(TreeNodeRecursion::Continue)
         })
-        .expect("traversal is infallable");
+        .expect("traversal is infallible");
     }
 
     /// Return all references to columns and their occurrence counts in the expression.
@@ -1433,7 +1433,7 @@ impl Expr {
             }
             Ok(TreeNodeRecursion::Continue)
         })
-        .expect("traversal is infallable");
+        .expect("traversal is infallible");
     }
 
     /// Returns true if there are any column references in this Expr
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 91bec501f4a02..8d460bdc8e7db 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -155,7 +155,7 @@ pub fn unnormalize_col(expr: Expr) -> Expr {
         })
     })
     .data()
-    .expect("Unnormalize is infallable")
+    .expect("Unnormalize is infallible")
 }
 
 /// Create a Column from the Scalar Expr
@@ -201,7 +201,7 @@ pub fn strip_outer_reference(expr: Expr) -> Expr {
         })
     })
     .data()
-    .expect("strip_outer_reference is infallable")
+    .expect("strip_outer_reference is infallible")
 }
 
 /// Returns plan with expressions coerced to types compatible with
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 4ad3bd5018a45..98e262f0b187b 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -412,14 +412,14 @@ impl LogicalPlanBuilder {
 
     /// Add missing sort columns to all downstream projection
     ///
-    /// Thus, if you have a LogialPlan that selects A and B and have
+    /// Thus, if you have a LogicalPlan that selects A and B and have
     /// not requested a sort by C, this code will add C recursively to
     /// all input projections.
     ///
     /// Adding a new column is not correct if there is a `Distinct`
     /// node, which produces only distinct values of its
     /// inputs. Adding a new column to its input will result in
-    /// potententially different results than with the original column.
+    /// potentially different results than with the original column.
     ///
     /// For example, if the input is like:
     ///
@@ -1763,7 +1763,7 @@ mod tests {
         .unwrap();
         assert_eq!(&expected, plan.schema().as_ref());
 
-        // Note scan of "EMPLOYEE_CSV" is treated as a SQL identifer
+        // Note scan of "EMPLOYEE_CSV" is treated as a SQL identifier
         // (and thus normalized to "employee"csv") as well
         let projection = None;
         let plan =
diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs
index 81fd03555abb7..343eda056ffe0 100644
--- a/datafusion/expr/src/logical_plan/display.rs
+++ b/datafusion/expr/src/logical_plan/display.rs
@@ -338,9 +338,9 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                     .collect::<Vec<_>>()
                     .join(", ");
 
-                let elipse = if values.len() > 5 { "..." } else { "" };
+                let eclipse = if values.len() > 5 { "..." } else { "" };
 
-                let values_str = format!("{}{}", str_values, elipse);
+                let values_str = format!("{}{}", str_values, eclipse);
                 json!({
                     "Node Type": "Values",
                     "Values": values_str
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index bde9655b8a390..48fa6270b2027 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -263,7 +263,7 @@ pub enum LogicalPlan {
     /// Prepare a statement and find any bind parameters
     /// (e.g. `?`). This is used to implement SQL-prepared statements.
     Prepare(Prepare),
-    /// Data Manipulaton Language (DML): Insert / Update / Delete
+    /// Data Manipulation Language (DML): Insert / Update / Delete
     Dml(DmlStatement),
     /// Data Definition Language (DDL): CREATE / DROP TABLES / VIEWS / SCHEMAS
     Ddl(DdlStatement),
@@ -1598,8 +1598,8 @@ impl LogicalPlan {
                             })
                             .collect();
 
-                        let elipse = if values.len() > 5 { "..." } else { "" };
-                        write!(f, "Values: {}{}", str_values.join(", "), elipse)
+                        let eclipse = if values.len() > 5 { "..." } else { "" };
+                        write!(f, "Values: {}{}", str_values.join(", "), eclipse)
                     }
 
                     LogicalPlan::TableScan(TableScan {
diff --git a/datafusion/expr/src/partition_evaluator.rs b/datafusion/expr/src/partition_evaluator.rs
index 04b6faf55ae1e..a0f0988b4f4e5 100644
--- a/datafusion/expr/src/partition_evaluator.rs
+++ b/datafusion/expr/src/partition_evaluator.rs
@@ -135,7 +135,7 @@ pub trait PartitionEvaluator: Debug + Send {
     /// must produce an output column with one output row for every
     /// input row.
     ///
-    /// `num_rows` is requied to correctly compute the output in case
+    /// `num_rows` is required to correctly compute the output in case
     /// `values.len() == 0`
     ///
     /// Implementing this function is an optimization: certain window
diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs
index fba793dd229d3..eadd7ac2f83fb 100644
--- a/datafusion/expr/src/signature.rs
+++ b/datafusion/expr/src/signature.rs
@@ -65,7 +65,7 @@ pub enum Volatility {
 /// automatically coerces (add casts to) function arguments so they match the type signature.
 ///
 /// For example, a function like `cos` may only be implemented for `Float64` arguments. To support a query
-/// that calles `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
+/// that calls `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
 /// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning.
 ///
 /// # Data Types
diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index 70139aaa4a0cc..e1765b5c3e6ad 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -370,7 +370,7 @@ impl From<&DataType> for TypeCategory {
 /// The rules in the document provide a clue, but adhering strictly to them doesn't precisely
 /// align with the behavior of Postgres. Therefore, we've made slight adjustments to the rules
 /// to better match the behavior of both Postgres and DuckDB. For example, we expect adjusted
-/// decimal percision and scale when coercing decimal types.
+/// decimal precision and scale when coercing decimal types.
 pub fn type_union_resolution(data_types: &[DataType]) -> Option<DataType> {
     if data_types.is_empty() {
         return None;
@@ -718,7 +718,7 @@ pub fn get_wider_type(lhs: &DataType, rhs: &DataType) -> Result<DataType> {
         (Int16 | Int32 | Int64, Int8) | (Int32 | Int64, Int16) | (Int64, Int32) |
         // Left Float is larger than right Float.
         (Float32 | Float64, Float16) | (Float64, Float32) |
-        // Left String is larget than right String.
+        // Left String is larger than right String.
         (LargeUtf8, Utf8) |
         // Any left type is wider than a right hand side Null.
         (_, Null) => lhs.clone(),
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index b430b343e4849..ef52a01e0598f 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -646,7 +646,7 @@ mod tests {
                 vec![DataType::UInt8, DataType::UInt16],
                 Some(vec![DataType::UInt8, DataType::UInt16]),
             ),
-            // 2 entries, can coerse values
+            // 2 entries, can coerce values
             (
                 vec![DataType::UInt16, DataType::UInt16],
                 vec![DataType::UInt8, DataType::UInt16],
diff --git a/datafusion/expr/src/type_coercion/mod.rs b/datafusion/expr/src/type_coercion/mod.rs
index 86005da3dafa7..e0d1236aac2d6 100644
--- a/datafusion/expr/src/type_coercion/mod.rs
+++ b/datafusion/expr/src/type_coercion/mod.rs
@@ -19,7 +19,7 @@
 //!
 //! Coercion is performed automatically by DataFusion when the types
 //! of arguments passed to a function or needed by operators do not
-//! exacty match the types required by that function / operator. In
+//! exactly match the types required by that function / operator. In
 //! this case, DataFusion will attempt to *coerce* the arguments to
 //! types accepted by the function by inserting CAST operations.
 //!
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 45155cbd2c271..889aa0952e51e 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -1212,7 +1212,7 @@ pub fn merge_schema(inputs: Vec<&LogicalPlan>) -> DFSchema {
     }
 }
 
-/// Build state name. State is the intermidiate state of the aggregate function.
+/// Build state name. State is the intermediate state of the aggregate function.
 pub fn format_state_name(name: &str, state_name: &str) -> String {
     format!("{name}[{state_name}]")
 }
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
index a64218c606c4a..0dbea1fb1ff79 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -43,7 +43,7 @@ make_udaf_expr_and_func!(
     approx_percentile_cont_with_weight_udaf
 );
 
-/// APPROX_PERCENTILE_CONT_WITH_WEIGTH aggregate expression
+/// APPROX_PERCENTILE_CONT_WITH_WEIGHT aggregate expression
 pub struct ApproxPercentileContWithWeight {
     signature: Signature,
     approx_percentile_cont: ApproxPercentileCont,
diff --git a/datafusion/functions-array/src/remove.rs b/datafusion/functions-array/src/remove.rs
index 589dd4d0c41c5..0b7cfc283c06f 100644
--- a/datafusion/functions-array/src/remove.rs
+++ b/datafusion/functions-array/src/remove.rs
@@ -228,7 +228,7 @@ fn array_remove_internal(
     }
 }
 
-/// For each element of `list_array[i]`, removed up to `arr_n[i]`  occurences
+/// For each element of `list_array[i]`, removed up to `arr_n[i]` occurrences
 /// of `element_array[i]`.
 ///
 /// The type of each **element** in `list_array` must be the same as the type of
diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs
index 9c410d4e18e89..9227f9e3a2a8c 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_cast.rs
@@ -444,7 +444,7 @@ fn is_separator(c: char) -> bool {
 }
 
 #[derive(Debug)]
-/// Splits a strings like Dictionary(Int32, Int64) into tokens sutable for parsing
+/// Splits a strings like Dictionary(Int32, Int64) into tokens suitable for parsing
 ///
 /// For example the string "Timestamp(Nanosecond, None)" would be parsed into:
 ///
diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs
index c84d1015bd7ee..634e28e6f3930 100644
--- a/datafusion/functions/src/datetime/to_local_time.rs
+++ b/datafusion/functions/src/datetime/to_local_time.rs
@@ -84,7 +84,7 @@ impl ToLocalTimeFunc {
         let arg_type = time_value.data_type();
         match arg_type {
             DataType::Timestamp(_, None) => {
-                // if no timezone specificed, just return the input
+                // if no timezone specified, just return the input
                 Ok(time_value.clone())
             }
             // If has timezone, adjust the underlying time value. The current time value
@@ -165,7 +165,7 @@ impl ToLocalTimeFunc {
 
                         match array.data_type() {
                             Timestamp(_, None) => {
-                                // if no timezone specificed, just return the input
+                                // if no timezone specified, just return the input
                                 Ok(time_value.clone())
                             }
                             Timestamp(Nanosecond, Some(_)) => {
diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs
index 378b6ced076c3..d820f991be18e 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -562,7 +562,7 @@ mod tests {
     #[test]
     fn test_static_pattern_regexp_replace_pattern_error() {
         let values = StringArray::from(vec!["abc"; 5]);
-        // Delibaretely using an invalid pattern to see how the single pattern
+        // Deliberately using an invalid pattern to see how the single pattern
         // error is propagated on regexp_replace.
         let patterns = StringArray::from(vec!["["; 5]);
         let replacements = StringArray::from(vec!["foo"; 5]);
diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs
index a057e42985463..f8ecab9073c42 100644
--- a/datafusion/functions/src/unicode/substrindex.rs
+++ b/datafusion/functions/src/unicode/substrindex.rs
@@ -122,15 +122,15 @@ pub fn substr_index<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
                 let occurrences = usize::try_from(n.unsigned_abs()).unwrap_or(usize::MAX);
                 let length = if n > 0 {
-                    let splitted = string.split(delimiter);
-                    splitted
+                    let split = string.split(delimiter);
+                    split
                         .take(occurrences)
                         .map(|s| s.len() + delimiter.len())
                         .sum::<usize>()
                         - delimiter.len()
                 } else {
-                    let splitted = string.rsplit(delimiter);
-                    splitted
+                    let split = string.rsplit(delimiter);
+                    split
                         .take(occurrences)
                         .map(|s| s.len() + delimiter.len())
                         .sum::<usize>()
diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs
index db39f8f7737d4..9856ea271ca54 100644
--- a/datafusion/optimizer/src/analyzer/subquery.rs
+++ b/datafusion/optimizer/src/analyzer/subquery.rs
@@ -159,11 +159,11 @@ fn check_inner_plan(
             let (correlated, _): (Vec<_>, Vec<_>) = split_conjunction(predicate)
                 .into_iter()
                 .partition(|e| e.contains_outer());
-            let maybe_unsupport = correlated
+            let maybe_unsupported = correlated
                 .into_iter()
                 .filter(|expr| !can_pullup_over_aggregation(expr))
                 .collect::<Vec<_>>();
-            if is_aggregate && is_scalar && !maybe_unsupport.is_empty() {
+            if is_aggregate && is_scalar && !maybe_unsupported.is_empty() {
                 return plan_err!(
                     "Correlated column is not allowed in predicate: {predicate}"
                 );
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index e4b36652974d7..bbf2091c22175 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -248,7 +248,7 @@ impl CommonSubexprEliminate {
     }
 
     /// Rewrites the expression in `exprs_list` with common sub-expressions
-    /// replaced with a new colum and adds a ProjectionExec on top of `input`
+    /// replaced with a new column and adds a ProjectionExec on top of `input`
     /// which computes any replaced common sub-expressions.
     ///
     /// Returns a tuple of:
@@ -636,7 +636,7 @@ impl CommonSubexprEliminate {
 /// Returns the window expressions, and the input to the deepest child
 /// LogicalPlan.
 ///
-/// For example, if the input widnow looks like
+/// For example, if the input window looks like
 ///
 /// ```text
 ///   LogicalPlan::Window(exprs=[a, b, c])
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index 4e3ca7e33a2eb..b6d49490d4379 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -1232,7 +1232,7 @@ mod tests {
     }
 
     #[test]
-    fn in_subquery_muti_project_subquery_cols() -> Result<()> {
+    fn in_subquery_multi_project_subquery_cols() -> Result<()> {
         let table_scan = test_table_scan()?;
         let subquery_scan = test_table_scan_with_name("sq")?;
 
diff --git a/datafusion/optimizer/src/optimize_projections/required_indices.rs b/datafusion/optimizer/src/optimize_projections/required_indices.rs
index 3f32a0c36a9ae..a9a18898c82e5 100644
--- a/datafusion/optimizer/src/optimize_projections/required_indices.rs
+++ b/datafusion/optimizer/src/optimize_projections/required_indices.rs
@@ -160,7 +160,7 @@ impl RequiredIndicies {
         (l, r.map_indices(|idx| idx - n))
     }
 
-    /// Partitions the indicies in this instance into two groups based on the
+    /// Partitions the indices in this instance into two groups based on the
     /// given predicate function `f`.
     fn partition<F>(&self, f: F) -> (Self, Self)
     where
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 20e2ac07dffd8..33b2883d6ed89 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -1913,7 +1913,7 @@ mod tests {
         assert_optimized_plan_eq(plan, expected)
     }
 
-    /// post-join predicates with columns from both sides are converted to join filterss
+    /// post-join predicates with columns from both sides are converted to join filters
     #[test]
     fn filter_join_on_common_dependent() -> Result<()> {
         let table_scan = test_table_scan()?;
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 8414f39f30607..56556f387d1ba 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -478,7 +478,7 @@ struct ConstEvaluator<'a> {
 #[allow(dead_code)]
 /// The simplify result of ConstEvaluator
 enum ConstSimplifyResult {
-    // Expr was simplifed and contains the new expression
+    // Expr was simplified and contains the new expression
     Simplified(ScalarValue),
     // Expr was not simplified and original value is returned
     NotSimplified(ScalarValue),
@@ -519,7 +519,7 @@ impl<'a> TreeNodeRewriter for ConstEvaluator<'a> {
     fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
         match self.can_evaluate.pop() {
             // Certain expressions such as `CASE` and `COALESCE` are short circuiting
-            // and may not evalute all their sub expressions. Thus if
+            // and may not evaluate all their sub expressions. Thus if
             // if any error is countered during simplification, return the original
             // so that normal evaluation can occur
             Some(true) => {
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 7238dd5bbd97e..e0f50a470d43e 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -893,7 +893,7 @@ mod tests {
         DataType::Timestamp(TimeUnit::Nanosecond, utc)
     }
 
-    // a dictonary type for storing string tags
+    // a dictionary type for storing string tags
     fn dictionary_tag_type() -> DataType {
         DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
     }
diff --git a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr-common/src/aggregate/groups_accumulator/accumulate.rs
index f109079f6a26b..3fcd570f514e5 100644
--- a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr-common/src/aggregate/groups_accumulator/accumulate.rs
@@ -410,7 +410,7 @@ pub fn accumulate_indices<F>(
                 },
             );
 
-            // handle any remaining bits (after the intial 64)
+            // handle any remaining bits (after the initial 64)
             let remainder_bits = bit_chunks.remainder_bits();
             group_indices_remainder
                 .iter()
@@ -835,7 +835,7 @@ mod test {
         }
     }
 
-    /// Parallel implementaiton of NullState to check expected values
+    /// Parallel implementation of NullState to check expected values
     #[derive(Debug, Default)]
     struct MockNullState {
         /// group indices that had values that passed the filter
diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
index 0e245fd0a66aa..7a4a3a6cac4bb 100644
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -346,7 +346,7 @@ impl AggregateExpr for AggregateFunctionExpr {
         let accumulator = self.fun.create_sliding_accumulator(args)?;
 
         // Accumulators that have window frame startings different
-        // than `UNBOUNDED PRECEDING`, such as `1 PRECEEDING`, need to
+        // than `UNBOUNDED PRECEDING`, such as `1 PRECEDING`, need to
         // implement retract_batch method in order to run correctly
         // currently in DataFusion.
         //
@@ -377,7 +377,7 @@ impl AggregateExpr for AggregateFunctionExpr {
         // 3. Third sum we add to the state sum value between `[2, 3)`
         // (`[0, 2)` is already in the state sum).  Also we need to
         // retract values between `[0, 1)` by this way we can obtain sum
-        // between [1, 3) which is indeed the apropriate range.
+        // between [1, 3) which is indeed the appropriate range.
         //
         // When we use `UNBOUNDED PRECEDING` in the query starting
         // index will always be 0 for the desired range, and hence the
diff --git a/datafusion/physical-expr-common/src/binary_map.rs b/datafusion/physical-expr-common/src/binary_map.rs
index bff571f5b5be1..23280701013de 100644
--- a/datafusion/physical-expr-common/src/binary_map.rs
+++ b/datafusion/physical-expr-common/src/binary_map.rs
@@ -355,7 +355,7 @@ where
         assert_eq!(values.len(), batch_hashes.len());
 
         for (value, &hash) in values.iter().zip(batch_hashes.iter()) {
-            // hande null value
+            // handle null value
             let Some(value) = value else {
                 let payload = if let Some(&(payload, _offset)) = self.null.as_ref() {
                     payload
@@ -439,7 +439,7 @@ where
                     // Put the small values into buffer and offsets so it
                     // appears the output array, and store that offset
                     // so the bytes can be compared if needed
-                    let offset = self.buffer.len(); // offset of start fof data
+                    let offset = self.buffer.len(); // offset of start for data
                     self.buffer.append_slice(value);
                     self.offsets.push(O::usize_as(self.buffer.len()));
 
diff --git a/datafusion/physical-expr-common/src/expressions/column.rs b/datafusion/physical-expr-common/src/expressions/column.rs
index 956c33d59b204..d972d35b9e4e7 100644
--- a/datafusion/physical-expr-common/src/expressions/column.rs
+++ b/datafusion/physical-expr-common/src/expressions/column.rs
@@ -80,7 +80,7 @@ impl PhysicalExpr for Column {
         Ok(input_schema.field(self.index).data_type().clone())
     }
 
-    /// Decide whehter this expression is nullable, given the schema of the input
+    /// Decide whether this expression is nullable, given the schema of the input
     fn nullable(&self, input_schema: &Schema) -> Result<bool> {
         self.bounds_check(input_schema)?;
         Ok(input_schema.field(self.index).is_nullable())
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
index 9856e1c989b3e..592c130b69d89 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -69,7 +69,7 @@ impl AccumulatorState {
         }
     }
 
-    /// Returns the amount of memory taken by this structre and its accumulator
+    /// Returns the amount of memory taken by this structure and its accumulator
     fn size(&self) -> usize {
         self.accumulator.size()
             + std::mem::size_of_val(self)
diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index 65bb9e478c3d6..9987e97b38d38 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -296,7 +296,7 @@ macro_rules! typed_min_max_batch_string {
     }};
 }
 
-// Statically-typed version of min/max(array) -> ScalarValue for binay types.
+// Statically-typed version of min/max(array) -> ScalarValue for binary types.
 macro_rules! typed_min_max_batch_binary {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
         let array = downcast_value!($VALUES, $ARRAYTYPE);
diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs
index e483f935b75c0..ffa58e3853225 100644
--- a/datafusion/physical-expr/src/equivalence/class.rs
+++ b/datafusion/physical-expr/src/equivalence/class.rs
@@ -67,7 +67,7 @@ impl ConstExpr {
     pub fn new(expr: Arc<dyn PhysicalExpr>) -> Self {
         Self {
             expr,
-            // By default, assume constant expressions are not same accross partitions.
+            // By default, assume constant expressions are not same across partitions.
             across_partitions: false,
         }
     }
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index cd73c5cb579c6..7a434c9402292 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -931,7 +931,7 @@ mod tests {
     }
 
     #[test]
-    fn case_tranform() -> Result<()> {
+    fn case_transform() -> Result<()> {
         let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
 
         let when1 = lit("foo");
diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs
index f6525c7c04626..38779c54607fb 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -67,7 +67,7 @@ impl PhysicalExpr for UnKnownColumn {
         Ok(DataType::Null)
     }
 
-    /// Decide whehter this expression is nullable, given the schema of the input
+    /// Decide whether this expression is nullable, given the schema of the input
     fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
         Ok(true)
     }
diff --git a/datafusion/physical-expr/src/expressions/try_cast.rs b/datafusion/physical-expr/src/expressions/try_cast.rs
index 3549a3df83bbb..43b6c993d2b20 100644
--- a/datafusion/physical-expr/src/expressions/try_cast.rs
+++ b/datafusion/physical-expr/src/expressions/try_cast.rs
@@ -31,7 +31,7 @@ use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
 use datafusion_common::{not_impl_err, Result, ScalarValue};
 use datafusion_expr::ColumnarValue;
 
-/// TRY_CAST expression casts an expression to a specific data type and retuns NULL on invalid cast
+/// TRY_CAST expression casts an expression to a specific data type and returns NULL on invalid cast
 #[derive(Debug, Hash)]
 pub struct TryCastExpr {
     /// The expression to cast
diff --git a/datafusion/physical-expr/src/utils/guarantee.rs b/datafusion/physical-expr/src/utils/guarantee.rs
index 42e5e6fcf3acd..993ff56100630 100644
--- a/datafusion/physical-expr/src/utils/guarantee.rs
+++ b/datafusion/physical-expr/src/utils/guarantee.rs
@@ -283,7 +283,7 @@ impl<'a> GuaranteeBuilder<'a> {
         )
     }
 
-    /// Aggregates a new single column, multi literal term to ths builder
+    /// Aggregates a new single column, multi literal term to this builder
     /// combining with previously known guarantees if possible.
     ///
     /// # Examples
diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 96a12d7b62da5..8c2a4ba5c4977 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -190,7 +190,7 @@ impl GroupValues for GroupValuesRows {
                 let groups_rows = group_values.iter().take(n);
                 let output = self.row_converter.convert_rows(groups_rows)?;
                 // Clear out first n group keys by copying them to a new Rows.
-                // TODO file some ticket in arrow-rs to make this more efficent?
+                // TODO file some ticket in arrow-rs to make this more efficient?
                 let mut new_group_values = self.row_converter.empty_rows(0, 0);
                 for row in group_values.iter().skip(n) {
                     new_group_values.push(row);
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 5f780f1ff8019..4146dda7641d4 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -75,12 +75,12 @@ pub enum AggregateMode {
     /// Applies the entire logical aggregation operation in a single operator,
     /// as opposed to Partial / Final modes which apply the logical aggregation using
     /// two operators.
-    /// This mode requires tha the input is a single partition (like Final)
+    /// This mode requires that the input is a single partition (like Final)
     Single,
     /// Applies the entire logical aggregation operation in a single operator,
     /// as opposed to Partial / Final modes which apply the logical aggregation using
     /// two operators.
-    /// This mode requires tha the input is partitioned by group key (like FinalPartitioned)
+    /// This mode requires that the input is partitioned by group key (like FinalPartitioned)
     SinglePartitioned,
 }
 
@@ -733,7 +733,7 @@ impl ExecutionPlan for AggregateExec {
         // - once expressions will be able to compute their own stats, use it here
         // - case where we group by on a column for which with have the `distinct` stat
         // TODO stats: aggr expression:
-        // - aggregations somtimes also preserve invariants such as min, max...
+        // - aggregations sometimes also preserve invariants such as min, max...
         let column_statistics = Statistics::unknown_column(&self.schema());
         match self.mode {
             AggregateMode::Final | AggregateMode::FinalPartitioned
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index b4c1e25e61914..287446328f8de 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -206,7 +206,7 @@ impl ExecutionPlan for AnalyzeExec {
     }
 }
 
-/// Creates the ouput of AnalyzeExec as a RecordBatch
+/// Creates the output of AnalyzeExec as a RecordBatch
 fn create_output_batch(
     verbose: bool,
     show_statistics: bool,
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index 7f4ae5797d97f..0d2653c5c7753 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -236,7 +236,7 @@ enum ShowMetrics {
     /// Do not show any metrics
     None,
 
-    /// Show aggregrated metrics across partition
+    /// Show aggregated metrics across partition
     Aggregated,
 
     /// Show full per-partition metrics
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 33a9c061bf31d..8304ddc7331a0 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -578,7 +578,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_stats_cartesian_product_with_unknwon_size() {
+    async fn test_stats_cartesian_product_with_unknown_size() {
         let left_row_count = 11;
 
         let left = Statistics {
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 754e55e496504..f8ca389808500 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -160,7 +160,7 @@ pub struct NestedLoopJoinExec {
 }
 
 impl NestedLoopJoinExec {
-    /// Try to create a nwe [`NestedLoopJoinExec`]
+    /// Try to create a new [`NestedLoopJoinExec`]
     pub fn try_new(
         left: Arc<dyn ExecutionPlan>,
         right: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index e9124a72970ae..a03e4a83fd2d8 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -634,7 +634,7 @@ struct SMJStream {
     pub buffered: SendableRecordBatchStream,
     /// Current processing record batch of streamed
     pub streamed_batch: StreamedBatch,
-    /// Currrent buffered data
+    /// Current buffered data
     pub buffered_data: BufferedData,
     /// (used in outer join) Is current streamed row joined at least once?
     pub streamed_joined: bool,
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index c23dc2032c4bc..2299b7ff07f13 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -215,7 +215,7 @@ impl SymmetricHashJoinExec {
         let left_schema = left.schema();
         let right_schema = right.schema();
 
-        // Error out if no "on" contraints are given:
+        // Error out if no "on" constraints are given:
         if on.is_empty() {
             return plan_err!(
                 "On constraints in SymmetricHashJoinExec should be non-empty"
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index e3ec242ce8de6..51744730a5a16 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -145,7 +145,7 @@ impl JoinHashMap {
 pub(crate) type JoinHashMapOffset = (usize, Option<u64>);
 
 // Macro for traversing chained values with limit.
-// Early returns in case of reacing output tuples limit.
+// Early returns in case of reaching output tuples limit.
 macro_rules! chain_traverse {
     (
         $input_indices:ident, $match_indices:ident, $hash_values:ident, $next_chain:ident,
@@ -477,7 +477,7 @@ fn offset_ordering(
     offset: usize,
 ) -> Vec<PhysicalSortExpr> {
     match join_type {
-        // In the case below, right ordering should be offseted with the left
+        // In the case below, right ordering should be offsetted with the left
         // side length, since we append the right table to the left table.
         JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right => ordering
             .iter()
@@ -910,7 +910,7 @@ fn estimate_inner_join_cardinality(
     left_stats: Statistics,
     right_stats: Statistics,
 ) -> Option<Precision<usize>> {
-    // Immediatedly return if inputs considered as non-overlapping
+    // Immediately return if inputs considered as non-overlapping
     if let Some(estimation) = estimate_disjoint_inputs(&left_stats, &right_stats) {
         return Some(estimation);
     };
@@ -2419,7 +2419,7 @@ mod tests {
         );
         assert!(
             absent_outer_estimation.is_none(),
-            "Expected \"None\" esimated SemiJoin cardinality for absent outer num_rows"
+            "Expected \"None\" estimated SemiJoin cardinality for absent outer num_rows"
         );
 
         let absent_inner_estimation = estimate_join_cardinality(
@@ -2437,7 +2437,7 @@ mod tests {
             &join_on,
         ).expect("Expected non-empty PartialJoinStatistics for SemiJoin with absent inner num_rows");
 
-        assert_eq!(absent_inner_estimation.num_rows, 500, "Expected outer.num_rows esimated SemiJoin cardinality for absent inner num_rows");
+        assert_eq!(absent_inner_estimation.num_rows, 500, "Expected outer.num_rows estimated SemiJoin cardinality for absent inner num_rows");
 
         let absent_inner_estimation = estimate_join_cardinality(
             &JoinType::LeftSemi,
@@ -2453,7 +2453,7 @@ mod tests {
             },
             &join_on,
         );
-        assert!(absent_inner_estimation.is_none(), "Expected \"None\" esimated SemiJoin cardinality for absent outer and inner num_rows");
+        assert!(absent_inner_estimation.is_none(), "Expected \"None\" estimated SemiJoin cardinality for absent outer and inner num_rows");
 
         Ok(())
     }
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index 9c77a3d05cc25..f3dad6afabdea 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -393,7 +393,7 @@ impl ExecutionPlan for LocalLimitExec {
                 ..
             } if nr <= self.fetch => input_stats,
             // if the input is greater than the limit, the num_row will be greater
-            // than the limit because the partitions will be limited separatly
+            // than the limit because the partitions will be limited separately
             // the statistic
             Statistics {
                 num_rows: Precision::Exact(nr),
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index e5c506403ff66..4870e9e95eb54 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -1345,8 +1345,8 @@ mod tests {
 
     #[tokio::test]
     // As the hash results might be different on different platforms or
-    // wiht different compilers, we will compare the same execution with
-    // and without droping the output stream.
+    // with different compilers, we will compare the same execution with
+    // and without dropping the output stream.
     async fn hash_repartition_with_dropping_output_stream() {
         let task_ctx = Arc::new(TaskContext::default());
         let partitioning = Partitioning::Hash(
@@ -1357,7 +1357,7 @@ mod tests {
             2,
         );
 
-        // We first collect the results without droping the output stream.
+        // We first collect the results without dropping the output stream.
         let input = Arc::new(make_barrier_exec());
         let exec = RepartitionExec::try_new(
             Arc::clone(&input) as Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 5b99f8bc71617..d576f77d9f742 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -602,7 +602,7 @@ pub fn sort_batch(
         .collect::<Result<Vec<_>>>()?;
 
     let indices = if is_multi_column_with_lists(&sort_columns) {
-        // lex_sort_to_indices doesn't support List with more than one colum
+        // lex_sort_to_indices doesn't support List with more than one column
         // https://github.com/apache/arrow-rs/issues/5454
         lexsort_to_indices_multi_columns(sort_columns, fetch)?
     } else {
@@ -802,12 +802,12 @@ impl DisplayAs for SortExec {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
                 let expr = PhysicalSortExpr::format_list(&self.expr);
-                let preserve_partioning = self.preserve_partitioning;
+                let preserve_partitioning = self.preserve_partitioning;
                 match self.fetch {
                     Some(fetch) => {
-                        write!(f, "SortExec: TopK(fetch={fetch}), expr=[{expr}], preserve_partitioning=[{preserve_partioning}]",)
+                        write!(f, "SortExec: TopK(fetch={fetch}), expr=[{expr}], preserve_partitioning=[{preserve_partitioning}]",)
                     }
-                    None => write!(f, "SortExec: expr=[{expr}], preserve_partitioning=[{preserve_partioning}]"),
+                    None => write!(f, "SortExec: expr=[{expr}], preserve_partitioning=[{preserve_partitioning}]"),
                 }
             }
         }
diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs
index 5a9035c8dbfc1..e10e5c9a6995a 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -80,7 +80,7 @@ impl StreamingTableExec {
             if !schema.eq(partition_schema) {
                 debug!(
                     "Target schema does not match with partition schema. \
-                        Target_schema: {schema:?}. Partiton Schema: {partition_schema:?}"
+                        Target_schema: {schema:?}. Partition Schema: {partition_schema:?}"
                 );
                 return plan_err!("Mismatch between schema and batches");
             }
diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs
index ac4eb1ca9e58f..cf1c0e313733c 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -725,7 +725,7 @@ pub struct PanicExec {
     schema: SchemaRef,
 
     /// Number of output partitions. Each partition will produce this
-    /// many empty output record batches prior to panicing
+    /// many empty output record batches prior to panicking
     batches_until_panics: Vec<usize>,
     cache: PlanProperties,
 }
diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs
index 5366a5707696a..d3f1a4fd96caf 100644
--- a/datafusion/physical-plan/src/topk/mod.rs
+++ b/datafusion/physical-plan/src/topk/mod.rs
@@ -94,7 +94,7 @@ pub struct TopK {
 impl TopK {
     /// Create a new [`TopK`] that stores the top `k` values, as
     /// defined by the sort expressions in `expr`.
-    // TOOD: make a builder or some other nicer API to avoid the
+    // TODO: make a builder or some other nicer API to avoid the
     // clippy warning
     #[allow(clippy::too_many_arguments)]
     pub fn try_new(
@@ -258,7 +258,7 @@ impl TopKMetrics {
 /// Using the `Row` format handles things such as ascending vs
 /// descending and nulls first vs nulls last.
 struct TopKHeap {
-    /// The maximum number of elemenents to store in this heap.
+    /// The maximum number of elements to store in this heap.
     k: usize,
     /// The target number of rows for output batches
     batch_size: usize,
@@ -421,7 +421,7 @@ impl TopKHeap {
         let num_rows = self.inner.len();
         let (new_batch, mut topk_rows) = self.emit_with_state()?;
 
-        // clear all old entires in store (this invalidates all
+        // clear all old entries in store (this invalidates all
         // store_ids in `inner`)
         self.store.clear();
 
@@ -453,7 +453,7 @@ impl TopKHeap {
 
 /// Represents one of the top K rows held in this heap. Orders
 /// according to memcmp of row (e.g. the arrow Row format, but could
-/// also be primtive values)
+/// also be primitive values)
 ///
 /// Reuses allocations to minimize runtime overhead of creating new Vecs
 #[derive(Debug, PartialEq)]
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 7f794556a2415..5eca7af19d16d 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -805,7 +805,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_satisfiy_nullable() -> Result<()> {
+    async fn test_satisfy_nullable() -> Result<()> {
         let schema = create_test_schema()?;
         let params = vec![
             ((true, true), (false, false), false),
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index b6330f65e0b72..1d5c6061a0f97 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -126,7 +126,7 @@ impl WindowAggExec {
 
         // Get output partitioning:
         // Because we can have repartitioning using the partition keys this
-        // would be either 1 or more than 1 depending on the presense of repartitioning.
+        // would be either 1 or more than 1 depending on the presence of repartitioning.
         let output_partitioning = input.output_partitioning().clone();
 
         // Determine execution mode:
diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs
index 5f3cf6e2aee8f..ba95640a87c7f 100644
--- a/datafusion/physical-plan/src/work_table.rs
+++ b/datafusion/physical-plan/src/work_table.rs
@@ -225,7 +225,7 @@ mod tests {
     #[test]
     fn test_work_table() {
         let work_table = WorkTable::new();
-        // cann't take from empty work_table
+        // can't take from empty work_table
         assert!(work_table.take().is_err());
 
         let pool = Arc::new(UnboundedMemoryPool::default()) as _;

From 4dd8532e6cd52c480a29a7851c6676a69f261545 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Fri, 19 Jul 2024 04:12:17 +0800
Subject: [PATCH 086/357] rm clone (#11532)

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/optimizer/src/push_down_filter.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 33b2883d6ed89..a22f2e83e2110 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -1020,7 +1020,7 @@ impl OptimizerRule for PushDownFilter {
 /// ```
 fn rewrite_projection(
     predicates: Vec<Expr>,
-    projection: Projection,
+    mut projection: Projection,
 ) -> Result<(Transformed<LogicalPlan>, Option<Expr>)> {
     // A projection is filter-commutable if it do not contain volatile predicates or contain volatile
     // predicates that are not used in the filter. However, we should re-writes all predicate expressions.
@@ -1053,11 +1053,13 @@ fn rewrite_projection(
             // E.g. in `Filter: b\n  Projection: a > 1 as b`, we can swap them, but the filter must be "a > 1"
             let new_filter = LogicalPlan::Filter(Filter::try_new(
                 replace_cols_by_name(expr, &non_volatile_map)?,
-                Arc::clone(&projection.input),
+                std::mem::take(&mut projection.input),
             )?);
 
+            projection.input = Arc::new(new_filter);
+
             Ok((
-                insert_below(LogicalPlan::Projection(projection), new_filter)?,
+                Transformed::yes(LogicalPlan::Projection(projection)),
                 conjunction(keep_predicates),
             ))
         }

From 723a595528e945c0ebc59a62ece2e24e90627764 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 18 Jul 2024 16:32:09 -0400
Subject: [PATCH 087/357] Minor: avoid a clone in type coercion (#11530)

* Minor: avoid a clone in type coercion

* Fix test
---
 .../optimizer/src/analyzer/type_coercion.rs    | 18 ++++++++----------
 datafusion/sqllogictest/test_files/misc.slt    |  4 ++++
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 337492d1a55ba..50fb1b8193ceb 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -84,7 +84,7 @@ impl AnalyzerRule for TypeCoercion {
 /// Assumes that children have already been optimized
 fn analyze_internal(
     external_schema: &DFSchema,
-    mut plan: LogicalPlan,
+    plan: LogicalPlan,
 ) -> Result<Transformed<LogicalPlan>> {
     // get schema representing all available input fields. This is used for data type
     // resolution only, so order does not matter here
@@ -103,15 +103,13 @@ fn analyze_internal(
     // select t2.c2 from t1 where t1.c1 in (select t2.c1 from t2 where t2.c2=t1.c3)
     schema.merge(external_schema);
 
-    if let LogicalPlan::Filter(filter) = &mut plan {
-        if let Ok(new_predicate) = filter
-            .predicate
-            .clone()
-            .cast_to(&DataType::Boolean, filter.input.schema())
-        {
-            filter.predicate = new_predicate;
-        }
-    }
+    // Coerce filter predicates to boolean (handles `WHERE NULL`)
+    let plan = if let LogicalPlan::Filter(mut filter) = plan {
+        filter.predicate = filter.predicate.cast_to(&DataType::Boolean, &schema)?;
+        LogicalPlan::Filter(filter)
+    } else {
+        plan
+    };
 
     let mut expr_rewrite = TypeCoercionRewriter::new(&schema);
 
diff --git a/datafusion/sqllogictest/test_files/misc.slt b/datafusion/sqllogictest/test_files/misc.slt
index 9f4710eb9bcc0..9bd3023b56f75 100644
--- a/datafusion/sqllogictest/test_files/misc.slt
+++ b/datafusion/sqllogictest/test_files/misc.slt
@@ -30,6 +30,10 @@ query I
 select 1 where NULL
 ----
 
+# Where clause does not accept non boolean and has nice error message
+query error Cannot create filter with non\-boolean predicate 'Utf8\("foo"\)' returning Utf8
+select 1 where 'foo'
+
 query I
 select 1 where NULL and 1 = 1
 ----

From 12d82c427d6c37f7884a508707ccd3058a446908 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Fri, 19 Jul 2024 11:59:01 +0800
Subject: [PATCH 088/357] Move array `ArrayAgg` to a `UserDefinedAggregate`
 (#11448)

* Add input_nullable to UDAF args StateField/AccumulatorArgs

This follows how it done for input_type and only provide a single value.
But might need to be changed into a Vec in the future.

This is need when we are moving `arrag_agg` to udaf where one of the
states nullability will depend on the nullability of the input.

* Make ArragAgg (not ordered or distinct) into a UDAF

* Add roundtrip_expr_api test case

* Address PR comments

* Propegate input nullability for aggregates

* Remove from accumulator args

* first draft

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* distinct

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* address comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
Co-authored-by: Emil Ejbyfeldt <emil.ejbyfeldt@gmail.com>
---
 datafusion/core/src/dataframe/mod.rs          |   6 +-
 datafusion/core/src/physical_planner.rs       |  28 ++
 datafusion/core/tests/dataframe/mod.rs        |   6 +-
 datafusion/expr/src/expr_fn.rs                |  12 -
 .../functions-aggregate/src/array_agg.rs      | 261 +++++++++++
 datafusion/functions-aggregate/src/lib.rs     |   8 +-
 datafusion/functions-array/src/planner.rs     |  12 +-
 .../physical-expr/src/aggregate/array_agg.rs  | 185 --------
 .../src/aggregate/array_agg_distinct.rs       | 433 ------------------
 .../physical-expr/src/aggregate/build_in.rs   |  80 +---
 datafusion/physical-expr/src/aggregate/mod.rs |   2 -
 .../physical-expr/src/expressions/mod.rs      | 159 -------
 .../src/aggregates/no_grouping.rs             |   1 +
 .../proto/src/physical_plan/to_proto.rs       |  21 +-
 .../tests/cases/roundtrip_logical_plan.rs     |   4 +-
 .../sqllogictest/test_files/aggregate.slt     |   2 +-
 16 files changed, 328 insertions(+), 892 deletions(-)
 create mode 100644 datafusion/functions-aggregate/src/array_agg.rs
 delete mode 100644 datafusion/physical-expr/src/aggregate/array_agg.rs
 delete mode 100644 datafusion/physical-expr/src/aggregate/array_agg_distinct.rs

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index c55b7c752765d..fb28b5c1ab470 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1696,10 +1696,10 @@ mod tests {
     use datafusion_common::{Constraint, Constraints, ScalarValue};
     use datafusion_common_runtime::SpawnedTask;
     use datafusion_expr::{
-        array_agg, cast, create_udf, expr, lit, BuiltInWindowFunction,
-        ScalarFunctionImplementation, Volatility, WindowFrame, WindowFunctionDefinition,
+        cast, create_udf, expr, lit, BuiltInWindowFunction, ScalarFunctionImplementation,
+        Volatility, WindowFrame, WindowFunctionDefinition,
     };
-    use datafusion_functions_aggregate::expr_fn::count_distinct;
+    use datafusion_functions_aggregate::expr_fn::{array_agg, count_distinct};
     use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
 
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 0accf9d83516a..97533cd5276a5 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -1839,7 +1839,34 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                 .unwrap_or(sqlparser::ast::NullTreatment::RespectNulls)
                 == NullTreatment::IgnoreNulls;
 
+            // TODO: Remove this after array_agg are all udafs
             let (agg_expr, filter, order_by) = match func_def {
+                AggregateFunctionDefinition::UDF(udf)
+                    if udf.name() == "ARRAY_AGG" && order_by.is_some() =>
+                {
+                    // not yet support UDAF, fallback to builtin
+                    let physical_sort_exprs = match order_by {
+                        Some(exprs) => Some(create_physical_sort_exprs(
+                            exprs,
+                            logical_input_schema,
+                            execution_props,
+                        )?),
+                        None => None,
+                    };
+                    let ordering_reqs: Vec<PhysicalSortExpr> =
+                        physical_sort_exprs.clone().unwrap_or(vec![]);
+                    let fun = aggregates::AggregateFunction::ArrayAgg;
+                    let agg_expr = aggregates::create_aggregate_expr(
+                        &fun,
+                        *distinct,
+                        &physical_args,
+                        &ordering_reqs,
+                        physical_input_schema,
+                        name,
+                        ignore_nulls,
+                    )?;
+                    (agg_expr, filter, physical_sort_exprs)
+                }
                 AggregateFunctionDefinition::BuiltIn(fun) => {
                     let physical_sort_exprs = match order_by {
                         Some(exprs) => Some(create_physical_sort_exprs(
@@ -1888,6 +1915,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                     (agg_expr, filter, physical_sort_exprs)
                 }
             };
+
             Ok((agg_expr, filter, order_by))
         }
         other => internal_err!("Invalid aggregate expression '{other:?}'"),
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index 9f7bd5227e346..d68b80691917c 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -54,11 +54,11 @@ use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_expr::expr::{GroupingSet, Sort};
 use datafusion_expr::var_provider::{VarProvider, VarType};
 use datafusion_expr::{
-    array_agg, cast, col, exists, expr, in_subquery, lit, max, out_ref_col, placeholder,
+    cast, col, exists, expr, in_subquery, lit, max, out_ref_col, placeholder,
     scalar_subquery, when, wildcard, Expr, ExprSchemable, WindowFrame, WindowFrameBound,
     WindowFrameUnits, WindowFunctionDefinition,
 };
-use datafusion_functions_aggregate::expr_fn::{avg, count, sum};
+use datafusion_functions_aggregate::expr_fn::{array_agg, avg, count, sum};
 
 #[tokio::test]
 async fn test_count_wildcard_on_sort() -> Result<()> {
@@ -1389,7 +1389,7 @@ async fn unnest_with_redundant_columns() -> Result<()> {
     let expected = vec![
         "Projection: shapes.shape_id [shape_id:UInt32]",
         "  Unnest: lists[shape_id2] structs[] [shape_id:UInt32, shape_id2:UInt32;N]",
-        "    Aggregate: groupBy=[[shapes.shape_id]], aggr=[[ARRAY_AGG(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { name: \"item\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} });N]",
+        "    Aggregate: groupBy=[[shapes.shape_id]], aggr=[[ARRAY_AGG(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { name: \"item\", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]",
         "      TableScan: shapes projection=[shape_id] [shape_id:UInt32]",
     ];
 
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 8b0213fd52fd0..9187e83522052 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -171,18 +171,6 @@ pub fn max(expr: Expr) -> Expr {
     ))
 }
 
-/// Create an expression to represent the array_agg() aggregate function
-pub fn array_agg(expr: Expr) -> Expr {
-    Expr::AggregateFunction(AggregateFunction::new(
-        aggregate_function::AggregateFunction::ArrayAgg,
-        vec![expr],
-        false,
-        None,
-        None,
-        None,
-    ))
-}
-
 /// Return a new expression with bitwise AND
 pub fn bitwise_and(left: Expr, right: Expr) -> Expr {
     Expr::BinaryExpr(BinaryExpr::new(
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
new file mode 100644
index 0000000000000..9ad453d7a4b22
--- /dev/null
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -0,0 +1,261 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! `ARRAY_AGG` aggregate implementation: [`ArrayAgg`]
+
+use arrow::array::{Array, ArrayRef, AsArray};
+use arrow::datatypes::DataType;
+use arrow_schema::Field;
+
+use datafusion_common::cast::as_list_array;
+use datafusion_common::utils::array_into_list_array_nullable;
+use datafusion_common::ScalarValue;
+use datafusion_common::{internal_err, Result};
+use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
+use datafusion_expr::utils::format_state_name;
+use datafusion_expr::AggregateUDFImpl;
+use datafusion_expr::{Accumulator, Signature, Volatility};
+use std::collections::HashSet;
+use std::sync::Arc;
+
+make_udaf_expr_and_func!(
+    ArrayAgg,
+    array_agg,
+    expression,
+    "input values, including nulls, concatenated into an array",
+    array_agg_udaf
+);
+
+#[derive(Debug)]
+/// ARRAY_AGG aggregate expression
+pub struct ArrayAgg {
+    signature: Signature,
+    alias: Vec<String>,
+}
+
+impl Default for ArrayAgg {
+    fn default() -> Self {
+        Self {
+            signature: Signature::any(1, Volatility::Immutable),
+            alias: vec!["array_agg".to_string()],
+        }
+    }
+}
+
+impl AggregateUDFImpl for ArrayAgg {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    // TODO: change name to lowercase
+    fn name(&self) -> &str {
+        "ARRAY_AGG"
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.alias
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::List(Arc::new(Field::new(
+            "item",
+            arg_types[0].clone(),
+            true,
+        ))))
+    }
+
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
+        if args.is_distinct {
+            return Ok(vec![Field::new_list(
+                format_state_name(args.name, "distinct_array_agg"),
+                Field::new("item", args.input_type.clone(), true),
+                true,
+            )]);
+        }
+
+        Ok(vec![Field::new_list(
+            format_state_name(args.name, "array_agg"),
+            Field::new("item", args.input_type.clone(), true),
+            true,
+        )])
+    }
+
+    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        if acc_args.is_distinct {
+            return Ok(Box::new(DistinctArrayAggAccumulator::try_new(
+                acc_args.input_type,
+            )?));
+        }
+
+        Ok(Box::new(ArrayAggAccumulator::try_new(acc_args.input_type)?))
+    }
+}
+
+#[derive(Debug)]
+pub struct ArrayAggAccumulator {
+    values: Vec<ArrayRef>,
+    datatype: DataType,
+}
+
+impl ArrayAggAccumulator {
+    /// new array_agg accumulator based on given item data type
+    pub fn try_new(datatype: &DataType) -> Result<Self> {
+        Ok(Self {
+            values: vec![],
+            datatype: datatype.clone(),
+        })
+    }
+}
+
+impl Accumulator for ArrayAggAccumulator {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        // Append value like Int64Array(1,2,3)
+        if values.is_empty() {
+            return Ok(());
+        }
+
+        if values.len() != 1 {
+            return internal_err!("expects single batch");
+        }
+
+        let val = Arc::clone(&values[0]);
+        if val.len() > 0 {
+            self.values.push(val);
+        }
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        // Append value like ListArray(Int64Array(1,2,3), Int64Array(4,5,6))
+        if states.is_empty() {
+            return Ok(());
+        }
+
+        if states.len() != 1 {
+            return internal_err!("expects single state");
+        }
+
+        let list_arr = as_list_array(&states[0])?;
+        for arr in list_arr.iter().flatten() {
+            self.values.push(arr);
+        }
+        Ok(())
+    }
+
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        Ok(vec![self.evaluate()?])
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        // Transform Vec<ListArr> to ListArr
+        let element_arrays: Vec<&dyn Array> =
+            self.values.iter().map(|a| a.as_ref()).collect();
+
+        if element_arrays.is_empty() {
+            return Ok(ScalarValue::new_null_list(self.datatype.clone(), true, 1));
+        }
+
+        let concated_array = arrow::compute::concat(&element_arrays)?;
+        let list_array = array_into_list_array_nullable(concated_array);
+
+        Ok(ScalarValue::List(Arc::new(list_array)))
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of_val(self)
+            + (std::mem::size_of::<ArrayRef>() * self.values.capacity())
+            + self
+                .values
+                .iter()
+                .map(|arr| arr.get_array_memory_size())
+                .sum::<usize>()
+            + self.datatype.size()
+            - std::mem::size_of_val(&self.datatype)
+    }
+}
+
+#[derive(Debug)]
+struct DistinctArrayAggAccumulator {
+    values: HashSet<ScalarValue>,
+    datatype: DataType,
+}
+
+impl DistinctArrayAggAccumulator {
+    pub fn try_new(datatype: &DataType) -> Result<Self> {
+        Ok(Self {
+            values: HashSet::new(),
+            datatype: datatype.clone(),
+        })
+    }
+}
+
+impl Accumulator for DistinctArrayAggAccumulator {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        Ok(vec![self.evaluate()?])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        if values.len() != 1 {
+            return internal_err!("expects single batch");
+        }
+
+        let array = &values[0];
+
+        for i in 0..array.len() {
+            let scalar = ScalarValue::try_from_array(&array, i)?;
+            self.values.insert(scalar);
+        }
+
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        if states.is_empty() {
+            return Ok(());
+        }
+
+        if states.len() != 1 {
+            return internal_err!("expects single state");
+        }
+
+        states[0]
+            .as_list::<i32>()
+            .iter()
+            .flatten()
+            .try_for_each(|val| self.update_batch(&[val]))
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        let values: Vec<ScalarValue> = self.values.iter().cloned().collect();
+        if values.is_empty() {
+            return Ok(ScalarValue::new_null_list(self.datatype.clone(), true, 1));
+        }
+        let arr = ScalarValue::new_list(&values, &self.datatype, true);
+        Ok(ScalarValue::List(arr))
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of_val(self) + ScalarValue::size_of_hashset(&self.values)
+            - std::mem::size_of_val(&self.values)
+            + self.datatype.size()
+            - std::mem::size_of_val(&self.datatype)
+    }
+}
diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs
index a3808a08b0074..b39b1955bb07b 100644
--- a/datafusion/functions-aggregate/src/lib.rs
+++ b/datafusion/functions-aggregate/src/lib.rs
@@ -58,6 +58,7 @@
 pub mod macros;
 
 pub mod approx_distinct;
+pub mod array_agg;
 pub mod correlation;
 pub mod count;
 pub mod covariance;
@@ -93,6 +94,7 @@ pub mod expr_fn {
     pub use super::approx_median::approx_median;
     pub use super::approx_percentile_cont::approx_percentile_cont;
     pub use super::approx_percentile_cont_with_weight::approx_percentile_cont_with_weight;
+    pub use super::array_agg::array_agg;
     pub use super::average::avg;
     pub use super::bit_and_or_xor::bit_and;
     pub use super::bit_and_or_xor::bit_or;
@@ -128,6 +130,7 @@ pub mod expr_fn {
 /// Returns all default aggregate functions
 pub fn all_default_aggregate_functions() -> Vec<Arc<AggregateUDF>> {
     vec![
+        array_agg::array_agg_udaf(),
         first_last::first_value_udaf(),
         first_last::last_value_udaf(),
         covariance::covar_samp_udaf(),
@@ -191,8 +194,9 @@ mod tests {
         let mut names = HashSet::new();
         for func in all_default_aggregate_functions() {
             // TODO: remove this
-            // These functions are in intermediate migration state, skip them
-            if func.name().to_lowercase() == "count" {
+            // These functions are in intermidiate migration state, skip them
+            let name_lower_case = func.name().to_lowercase();
+            if name_lower_case == "count" || name_lower_case == "array_agg" {
                 continue;
             }
             assert!(
diff --git a/datafusion/functions-array/src/planner.rs b/datafusion/functions-array/src/planner.rs
index cfbe99b4b7fd4..dfb620f84f3a9 100644
--- a/datafusion/functions-array/src/planner.rs
+++ b/datafusion/functions-array/src/planner.rs
@@ -19,8 +19,9 @@
 
 use datafusion_common::{utils::list_ndims, DFSchema, Result};
 use datafusion_expr::{
+    expr::AggregateFunctionDefinition,
     planner::{ExprPlanner, PlannerResult, RawBinaryExpr, RawFieldAccessExpr},
-    sqlparser, AggregateFunction, Expr, ExprSchemable, GetFieldAccess,
+    sqlparser, Expr, ExprSchemable, GetFieldAccess,
 };
 use datafusion_functions::expr_fn::get_field;
 use datafusion_functions_aggregate::nth_value::nth_value_udaf;
@@ -153,8 +154,9 @@ impl ExprPlanner for FieldAccessPlanner {
 }
 
 fn is_array_agg(agg_func: &datafusion_expr::expr::AggregateFunction) -> bool {
-    agg_func.func_def
-        == datafusion_expr::expr::AggregateFunctionDefinition::BuiltIn(
-            AggregateFunction::ArrayAgg,
-        )
+    if let AggregateFunctionDefinition::UDF(udf) = &agg_func.func_def {
+        return udf.name() == "ARRAY_AGG";
+    }
+
+    false
 }
diff --git a/datafusion/physical-expr/src/aggregate/array_agg.rs b/datafusion/physical-expr/src/aggregate/array_agg.rs
deleted file mode 100644
index 0d5ed730e2834..0000000000000
--- a/datafusion/physical-expr/src/aggregate/array_agg.rs
+++ /dev/null
@@ -1,185 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use crate::aggregate::utils::down_cast_any_ref;
-use crate::expressions::format_state_name;
-use crate::{AggregateExpr, PhysicalExpr};
-use arrow::array::ArrayRef;
-use arrow::datatypes::{DataType, Field};
-use arrow_array::Array;
-use datafusion_common::cast::as_list_array;
-use datafusion_common::utils::array_into_list_array_nullable;
-use datafusion_common::Result;
-use datafusion_common::ScalarValue;
-use datafusion_expr::Accumulator;
-use std::any::Any;
-use std::sync::Arc;
-
-/// ARRAY_AGG aggregate expression
-#[derive(Debug)]
-pub struct ArrayAgg {
-    /// Column name
-    name: String,
-    /// The DataType for the input expression
-    input_data_type: DataType,
-    /// The input expression
-    expr: Arc<dyn PhysicalExpr>,
-}
-
-impl ArrayAgg {
-    /// Create a new ArrayAgg aggregate function
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        name: impl Into<String>,
-        data_type: DataType,
-    ) -> Self {
-        Self {
-            name: name.into(),
-            input_data_type: data_type,
-            expr,
-        }
-    }
-}
-
-impl AggregateExpr for ArrayAgg {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new_list(
-            &self.name,
-            // This should be the same as return type of AggregateFunction::ArrayAgg
-            Field::new("item", self.input_data_type.clone(), true),
-            true,
-        ))
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(ArrayAggAccumulator::try_new(
-            &self.input_data_type,
-        )?))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new_list(
-            format_state_name(&self.name, "array_agg"),
-            Field::new("item", self.input_data_type.clone(), true),
-            true,
-        )])
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![Arc::clone(&self.expr)]
-    }
-
-    fn name(&self) -> &str {
-        &self.name
-    }
-}
-
-impl PartialEq<dyn Any> for ArrayAgg {
-    fn eq(&self, other: &dyn Any) -> bool {
-        down_cast_any_ref(other)
-            .downcast_ref::<Self>()
-            .map(|x| {
-                self.name == x.name
-                    && self.input_data_type == x.input_data_type
-                    && self.expr.eq(&x.expr)
-            })
-            .unwrap_or(false)
-    }
-}
-
-#[derive(Debug)]
-pub(crate) struct ArrayAggAccumulator {
-    values: Vec<ArrayRef>,
-    datatype: DataType,
-}
-
-impl ArrayAggAccumulator {
-    /// new array_agg accumulator based on given item data type
-    pub fn try_new(datatype: &DataType) -> Result<Self> {
-        Ok(Self {
-            values: vec![],
-            datatype: datatype.clone(),
-        })
-    }
-}
-
-impl Accumulator for ArrayAggAccumulator {
-    // Append value like Int64Array(1,2,3)
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-        assert!(values.len() == 1, "array_agg can only take 1 param!");
-
-        let val = Arc::clone(&values[0]);
-        if val.len() > 0 {
-            self.values.push(val);
-        }
-        Ok(())
-    }
-
-    // Append value like ListArray(Int64Array(1,2,3), Int64Array(4,5,6))
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        if states.is_empty() {
-            return Ok(());
-        }
-        assert!(states.len() == 1, "array_agg states must be singleton!");
-
-        let list_arr = as_list_array(&states[0])?;
-        for arr in list_arr.iter().flatten() {
-            self.values.push(arr);
-        }
-        Ok(())
-    }
-
-    fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.evaluate()?])
-    }
-
-    fn evaluate(&mut self) -> Result<ScalarValue> {
-        // Transform Vec<ListArr> to ListArr
-        let element_arrays: Vec<&dyn Array> =
-            self.values.iter().map(|a| a.as_ref()).collect();
-
-        if element_arrays.is_empty() {
-            return Ok(ScalarValue::new_null_list(self.datatype.clone(), true, 1));
-        }
-
-        let concated_array = arrow::compute::concat(&element_arrays)?;
-        let list_array = array_into_list_array_nullable(concated_array);
-
-        Ok(ScalarValue::List(Arc::new(list_array)))
-    }
-
-    fn size(&self) -> usize {
-        std::mem::size_of_val(self)
-            + (std::mem::size_of::<ArrayRef>() * self.values.capacity())
-            + self
-                .values
-                .iter()
-                .map(|arr| arr.get_array_memory_size())
-                .sum::<usize>()
-            + self.datatype.size()
-            - std::mem::size_of_val(&self.datatype)
-    }
-}
diff --git a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
deleted file mode 100644
index eca6e4ce4f656..0000000000000
--- a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
+++ /dev/null
@@ -1,433 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Implementations for DISTINCT expressions, e.g. `COUNT(DISTINCT c)`
-
-use std::any::Any;
-use std::collections::HashSet;
-use std::fmt::Debug;
-use std::sync::Arc;
-
-use arrow::array::ArrayRef;
-use arrow::datatypes::{DataType, Field};
-use arrow_array::cast::AsArray;
-
-use crate::aggregate::utils::down_cast_any_ref;
-use crate::expressions::format_state_name;
-use crate::{AggregateExpr, PhysicalExpr};
-
-use datafusion_common::{Result, ScalarValue};
-use datafusion_expr::Accumulator;
-
-/// Expression for a ARRAY_AGG(DISTINCT) aggregation.
-#[derive(Debug)]
-pub struct DistinctArrayAgg {
-    /// Column name
-    name: String,
-    /// The DataType for the input expression
-    input_data_type: DataType,
-    /// The input expression
-    expr: Arc<dyn PhysicalExpr>,
-}
-
-impl DistinctArrayAgg {
-    /// Create a new DistinctArrayAgg aggregate function
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        name: impl Into<String>,
-        input_data_type: DataType,
-    ) -> Self {
-        let name = name.into();
-        Self {
-            name,
-            input_data_type,
-            expr,
-        }
-    }
-}
-
-impl AggregateExpr for DistinctArrayAgg {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new_list(
-            &self.name,
-            // This should be the same as return type of AggregateFunction::ArrayAgg
-            Field::new("item", self.input_data_type.clone(), true),
-            true,
-        ))
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(DistinctArrayAggAccumulator::try_new(
-            &self.input_data_type,
-        )?))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new_list(
-            format_state_name(&self.name, "distinct_array_agg"),
-            Field::new("item", self.input_data_type.clone(), true),
-            true,
-        )])
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![Arc::clone(&self.expr)]
-    }
-
-    fn name(&self) -> &str {
-        &self.name
-    }
-}
-
-impl PartialEq<dyn Any> for DistinctArrayAgg {
-    fn eq(&self, other: &dyn Any) -> bool {
-        down_cast_any_ref(other)
-            .downcast_ref::<Self>()
-            .map(|x| {
-                self.name == x.name
-                    && self.input_data_type == x.input_data_type
-                    && self.expr.eq(&x.expr)
-            })
-            .unwrap_or(false)
-    }
-}
-
-#[derive(Debug)]
-struct DistinctArrayAggAccumulator {
-    values: HashSet<ScalarValue>,
-    datatype: DataType,
-}
-
-impl DistinctArrayAggAccumulator {
-    pub fn try_new(datatype: &DataType) -> Result<Self> {
-        Ok(Self {
-            values: HashSet::new(),
-            datatype: datatype.clone(),
-        })
-    }
-}
-
-impl Accumulator for DistinctArrayAggAccumulator {
-    fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.evaluate()?])
-    }
-
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        assert_eq!(values.len(), 1, "batch input should only include 1 column!");
-
-        let array = &values[0];
-
-        for i in 0..array.len() {
-            let scalar = ScalarValue::try_from_array(&array, i)?;
-            self.values.insert(scalar);
-        }
-
-        Ok(())
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        if states.is_empty() {
-            return Ok(());
-        }
-
-        states[0]
-            .as_list::<i32>()
-            .iter()
-            .flatten()
-            .try_for_each(|val| self.update_batch(&[val]))
-    }
-
-    fn evaluate(&mut self) -> Result<ScalarValue> {
-        let values: Vec<ScalarValue> = self.values.iter().cloned().collect();
-        if values.is_empty() {
-            return Ok(ScalarValue::new_null_list(self.datatype.clone(), true, 1));
-        }
-        let arr = ScalarValue::new_list(&values, &self.datatype, true);
-        Ok(ScalarValue::List(arr))
-    }
-
-    fn size(&self) -> usize {
-        std::mem::size_of_val(self) + ScalarValue::size_of_hashset(&self.values)
-            - std::mem::size_of_val(&self.values)
-            + self.datatype.size()
-            - std::mem::size_of_val(&self.datatype)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::expressions::col;
-    use crate::expressions::tests::aggregate;
-    use arrow::array::Int32Array;
-    use arrow::datatypes::Schema;
-    use arrow::record_batch::RecordBatch;
-    use arrow_array::types::Int32Type;
-    use arrow_array::Array;
-    use arrow_array::ListArray;
-    use arrow_buffer::OffsetBuffer;
-    use datafusion_common::internal_err;
-
-    // arrow::compute::sort can't sort nested ListArray directly, so we compare the scalar values pair-wise.
-    fn compare_list_contents(
-        expected: Vec<ScalarValue>,
-        actual: ScalarValue,
-    ) -> Result<()> {
-        let array = actual.to_array()?;
-        let list_array = array.as_list::<i32>();
-        let inner_array = list_array.value(0);
-        let mut actual_scalars = vec![];
-        for index in 0..inner_array.len() {
-            let sv = ScalarValue::try_from_array(&inner_array, index)?;
-            actual_scalars.push(sv);
-        }
-
-        if actual_scalars.len() != expected.len() {
-            return internal_err!(
-                "Expected and actual list lengths differ: expected={}, actual={}",
-                expected.len(),
-                actual_scalars.len()
-            );
-        }
-
-        let mut seen = vec![false; expected.len()];
-        for v in expected {
-            let mut found = false;
-            for (i, sv) in actual_scalars.iter().enumerate() {
-                if sv == &v {
-                    seen[i] = true;
-                    found = true;
-                    break;
-                }
-            }
-            if !found {
-                return internal_err!(
-                    "Expected value {:?} not found in actual values {:?}",
-                    v,
-                    actual_scalars
-                );
-            }
-        }
-
-        Ok(())
-    }
-
-    fn check_distinct_array_agg(
-        input: ArrayRef,
-        expected: Vec<ScalarValue>,
-        datatype: DataType,
-    ) -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", datatype.clone(), false)]);
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![input])?;
-
-        let agg = Arc::new(DistinctArrayAgg::new(
-            col("a", &schema)?,
-            "bla".to_string(),
-            datatype,
-        ));
-        let actual = aggregate(&batch, agg)?;
-        compare_list_contents(expected, actual)
-    }
-
-    fn check_merge_distinct_array_agg(
-        input1: ArrayRef,
-        input2: ArrayRef,
-        expected: Vec<ScalarValue>,
-        datatype: DataType,
-    ) -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", datatype.clone(), false)]);
-        let agg = Arc::new(DistinctArrayAgg::new(
-            col("a", &schema)?,
-            "bla".to_string(),
-            datatype,
-        ));
-
-        let mut accum1 = agg.create_accumulator()?;
-        let mut accum2 = agg.create_accumulator()?;
-
-        accum1.update_batch(&[input1])?;
-        accum2.update_batch(&[input2])?;
-
-        let array = accum2.state()?[0].raw_data()?;
-        accum1.merge_batch(&[array])?;
-
-        let actual = accum1.evaluate()?;
-        compare_list_contents(expected, actual)
-    }
-
-    #[test]
-    fn distinct_array_agg_i32() -> Result<()> {
-        let col: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 7, 4, 5, 2]));
-
-        let expected = vec![
-            ScalarValue::Int32(Some(1)),
-            ScalarValue::Int32(Some(2)),
-            ScalarValue::Int32(Some(4)),
-            ScalarValue::Int32(Some(5)),
-            ScalarValue::Int32(Some(7)),
-        ];
-
-        check_distinct_array_agg(col, expected, DataType::Int32)
-    }
-
-    #[test]
-    fn merge_distinct_array_agg_i32() -> Result<()> {
-        let col1: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 7, 4, 5, 2]));
-        let col2: ArrayRef = Arc::new(Int32Array::from(vec![1, 3, 7, 8, 4]));
-
-        let expected = vec![
-            ScalarValue::Int32(Some(1)),
-            ScalarValue::Int32(Some(2)),
-            ScalarValue::Int32(Some(3)),
-            ScalarValue::Int32(Some(4)),
-            ScalarValue::Int32(Some(5)),
-            ScalarValue::Int32(Some(7)),
-            ScalarValue::Int32(Some(8)),
-        ];
-
-        check_merge_distinct_array_agg(col1, col2, expected, DataType::Int32)
-    }
-
-    #[test]
-    fn distinct_array_agg_nested() -> Result<()> {
-        // [[1, 2, 3], [4, 5]]
-        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-        ])]);
-        let a2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
-            Some(4),
-            Some(5),
-        ])]);
-        let l1 = ListArray::new(
-            Arc::new(Field::new("item", a1.data_type().to_owned(), true)),
-            OffsetBuffer::from_lengths([2]),
-            arrow::compute::concat(&[&a1, &a2]).unwrap(),
-            None,
-        );
-
-        // [[6], [7, 8]]
-        let a1 =
-            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![Some(6)])]);
-        let a2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
-            Some(7),
-            Some(8),
-        ])]);
-        let l2 = ListArray::new(
-            Arc::new(Field::new("item", a1.data_type().to_owned(), true)),
-            OffsetBuffer::from_lengths([2]),
-            arrow::compute::concat(&[&a1, &a2]).unwrap(),
-            None,
-        );
-
-        // [[9]]
-        let a1 =
-            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![Some(9)])]);
-        let l3 = ListArray::new(
-            Arc::new(Field::new("item", a1.data_type().to_owned(), true)),
-            OffsetBuffer::from_lengths([1]),
-            Arc::new(a1),
-            None,
-        );
-
-        let l1 = ScalarValue::List(Arc::new(l1));
-        let l2 = ScalarValue::List(Arc::new(l2));
-        let l3 = ScalarValue::List(Arc::new(l3));
-
-        // Duplicate l1 and l3 in the input array and check that it is deduped in the output.
-        let array = ScalarValue::iter_to_array(vec![
-            l1.clone(),
-            l2.clone(),
-            l3.clone(),
-            l3.clone(),
-            l1.clone(),
-        ])
-        .unwrap();
-        let expected = vec![l1, l2, l3];
-
-        check_distinct_array_agg(
-            array,
-            expected,
-            DataType::List(Arc::new(Field::new_list(
-                "item",
-                Field::new("item", DataType::Int32, true),
-                true,
-            ))),
-        )
-    }
-
-    #[test]
-    fn merge_distinct_array_agg_nested() -> Result<()> {
-        // [[1, 2], [3, 4]]
-        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
-            Some(1),
-            Some(2),
-        ])]);
-        let a2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
-            Some(3),
-            Some(4),
-        ])]);
-        let l1 = ListArray::new(
-            Arc::new(Field::new("item", a1.data_type().to_owned(), true)),
-            OffsetBuffer::from_lengths([2]),
-            arrow::compute::concat(&[&a1, &a2]).unwrap(),
-            None,
-        );
-
-        let a1 =
-            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![Some(5)])]);
-        let l2 = ListArray::new(
-            Arc::new(Field::new("item", a1.data_type().to_owned(), true)),
-            OffsetBuffer::from_lengths([1]),
-            Arc::new(a1),
-            None,
-        );
-
-        // [[6, 7], [8]]
-        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
-            Some(6),
-            Some(7),
-        ])]);
-        let a2 =
-            ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![Some(8)])]);
-        let l3 = ListArray::new(
-            Arc::new(Field::new("item", a1.data_type().to_owned(), true)),
-            OffsetBuffer::from_lengths([2]),
-            arrow::compute::concat(&[&a1, &a2]).unwrap(),
-            None,
-        );
-
-        let l1 = ScalarValue::List(Arc::new(l1));
-        let l2 = ScalarValue::List(Arc::new(l2));
-        let l3 = ScalarValue::List(Arc::new(l3));
-
-        // Duplicate l1 in the input array and check that it is deduped in the output.
-        let input1 = ScalarValue::iter_to_array(vec![l1.clone(), l2.clone()]).unwrap();
-        let input2 = ScalarValue::iter_to_array(vec![l1.clone(), l3.clone()]).unwrap();
-
-        let expected = vec![l1, l2, l3];
-
-        check_merge_distinct_array_agg(input1, input2, expected, DataType::Int32)
-    }
-}
diff --git a/datafusion/physical-expr/src/aggregate/build_in.rs b/datafusion/physical-expr/src/aggregate/build_in.rs
index ef21b3d0f7883..9c270561f37d2 100644
--- a/datafusion/physical-expr/src/aggregate/build_in.rs
+++ b/datafusion/physical-expr/src/aggregate/build_in.rs
@@ -30,7 +30,7 @@ use std::sync::Arc;
 
 use arrow::datatypes::Schema;
 
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::AggregateFunction;
 
 use crate::expressions::{self};
@@ -60,11 +60,13 @@ pub fn create_aggregate_expr(
         .collect::<Result<Vec<_>>>()?;
     let input_phy_exprs = input_phy_exprs.to_vec();
     Ok(match (fun, distinct) {
-        (AggregateFunction::ArrayAgg, false) => {
+        (AggregateFunction::ArrayAgg, _) => {
             let expr = Arc::clone(&input_phy_exprs[0]);
 
             if ordering_req.is_empty() {
-                Arc::new(expressions::ArrayAgg::new(expr, name, data_type))
+                return internal_err!(
+                    "ArrayAgg without ordering should be handled as UDAF"
+                );
             } else {
                 Arc::new(expressions::OrderSensitiveArrayAgg::new(
                     expr,
@@ -75,15 +77,6 @@ pub fn create_aggregate_expr(
                 ))
             }
         }
-        (AggregateFunction::ArrayAgg, true) => {
-            if !ordering_req.is_empty() {
-                return not_impl_err!(
-                    "ARRAY_AGG(DISTINCT ORDER BY a ASC) order-sensitive aggregations are not available"
-                );
-            }
-            let expr = Arc::clone(&input_phy_exprs[0]);
-            Arc::new(expressions::DistinctArrayAgg::new(expr, name, data_type))
-        }
         (AggregateFunction::Min, _) => Arc::new(expressions::Min::new(
             Arc::clone(&input_phy_exprs[0]),
             name,
@@ -104,70 +97,9 @@ mod tests {
     use datafusion_common::plan_err;
     use datafusion_expr::{type_coercion, Signature};
 
-    use crate::expressions::{try_cast, ArrayAgg, DistinctArrayAgg, Max, Min};
+    use crate::expressions::{try_cast, Max, Min};
 
     use super::*;
-    #[test]
-    fn test_approx_expr() -> Result<()> {
-        let funcs = vec![AggregateFunction::ArrayAgg];
-        let data_types = vec![
-            DataType::UInt32,
-            DataType::Int32,
-            DataType::Float32,
-            DataType::Float64,
-            DataType::Decimal128(10, 2),
-            DataType::Utf8,
-        ];
-        for fun in funcs {
-            for data_type in &data_types {
-                let input_schema =
-                    Schema::new(vec![Field::new("c1", data_type.clone(), true)]);
-                let input_phy_exprs: Vec<Arc<dyn PhysicalExpr>> = vec![Arc::new(
-                    expressions::Column::new_with_schema("c1", &input_schema).unwrap(),
-                )];
-                let result_agg_phy_exprs = create_physical_agg_expr_for_test(
-                    &fun,
-                    false,
-                    &input_phy_exprs[0..1],
-                    &input_schema,
-                    "c1",
-                )?;
-                if fun == AggregateFunction::ArrayAgg {
-                    assert!(result_agg_phy_exprs.as_any().is::<ArrayAgg>());
-                    assert_eq!("c1", result_agg_phy_exprs.name());
-                    assert_eq!(
-                        Field::new_list(
-                            "c1",
-                            Field::new("item", data_type.clone(), true),
-                            true,
-                        ),
-                        result_agg_phy_exprs.field().unwrap()
-                    );
-                }
-
-                let result_distinct = create_physical_agg_expr_for_test(
-                    &fun,
-                    true,
-                    &input_phy_exprs[0..1],
-                    &input_schema,
-                    "c1",
-                )?;
-                if fun == AggregateFunction::ArrayAgg {
-                    assert!(result_distinct.as_any().is::<DistinctArrayAgg>());
-                    assert_eq!("c1", result_distinct.name());
-                    assert_eq!(
-                        Field::new_list(
-                            "c1",
-                            Field::new("item", data_type.clone(), true),
-                            true,
-                        ),
-                        result_agg_phy_exprs.field().unwrap()
-                    );
-                }
-            }
-        }
-        Ok(())
-    }
 
     #[test]
     fn test_min_max_expr() -> Result<()> {
diff --git a/datafusion/physical-expr/src/aggregate/mod.rs b/datafusion/physical-expr/src/aggregate/mod.rs
index b9d803900f53a..749cf2be7297c 100644
--- a/datafusion/physical-expr/src/aggregate/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/mod.rs
@@ -17,8 +17,6 @@
 
 pub use datafusion_physical_expr_common::aggregate::AggregateExpr;
 
-pub(crate) mod array_agg;
-pub(crate) mod array_agg_distinct;
 pub(crate) mod array_agg_ordered;
 #[macro_use]
 pub(crate) mod min_max;
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index 7d8f12091f46e..fa80bc9873f04 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -34,8 +34,6 @@ mod try_cast;
 pub mod helpers {
     pub use crate::aggregate::min_max::{max, min};
 }
-pub use crate::aggregate::array_agg::ArrayAgg;
-pub use crate::aggregate::array_agg_distinct::DistinctArrayAgg;
 pub use crate::aggregate::array_agg_ordered::OrderSensitiveArrayAgg;
 pub use crate::aggregate::build_in::create_aggregate_expr;
 pub use crate::aggregate::min_max::{Max, MaxAccumulator, Min, MinAccumulator};
@@ -63,160 +61,3 @@ pub use negative::{negative, NegativeExpr};
 pub use no_op::NoOp;
 pub use not::{not, NotExpr};
 pub use try_cast::{try_cast, TryCastExpr};
-
-#[cfg(test)]
-pub(crate) mod tests {
-    use std::sync::Arc;
-
-    use crate::AggregateExpr;
-
-    use arrow::record_batch::RecordBatch;
-    use datafusion_common::{Result, ScalarValue};
-
-    /// macro to perform an aggregation using [`datafusion_expr::Accumulator`] and verify the
-    /// result.
-    #[macro_export]
-    macro_rules! generic_test_op {
-        ($ARRAY:expr, $DATATYPE:expr, $OP:ident, $EXPECTED:expr) => {
-            generic_test_op!($ARRAY, $DATATYPE, $OP, $EXPECTED, $EXPECTED.data_type())
-        };
-        ($ARRAY:expr, $DATATYPE:expr, $OP:ident, $EXPECTED:expr, $EXPECTED_DATATYPE:expr) => {{
-            let schema = Schema::new(vec![Field::new("a", $DATATYPE, true)]);
-
-            let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![$ARRAY])?;
-
-            let agg = Arc::new(<$OP>::new(
-                col("a", &schema)?,
-                "bla".to_string(),
-                $EXPECTED_DATATYPE,
-            ));
-            let actual = aggregate(&batch, agg)?;
-            let expected = ScalarValue::from($EXPECTED);
-
-            assert_eq!(expected, actual);
-
-            Ok(()) as Result<(), ::datafusion_common::DataFusionError>
-        }};
-    }
-
-    /// Same as [`generic_test_op`] but with support for providing a 4th argument, usually
-    /// a boolean to indicate if using the distinct version of the op.
-    #[macro_export]
-    macro_rules! generic_test_distinct_op {
-        ($ARRAY:expr, $DATATYPE:expr, $OP:ident, $DISTINCT:expr, $EXPECTED:expr) => {
-            generic_test_distinct_op!(
-                $ARRAY,
-                $DATATYPE,
-                $OP,
-                $DISTINCT,
-                $EXPECTED,
-                $EXPECTED.data_type()
-            )
-        };
-        ($ARRAY:expr, $DATATYPE:expr, $OP:ident, $DISTINCT:expr, $EXPECTED:expr, $EXPECTED_DATATYPE:expr) => {{
-            let schema = Schema::new(vec![Field::new("a", $DATATYPE, true)]);
-
-            let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![$ARRAY])?;
-
-            let agg = Arc::new(<$OP>::new(
-                col("a", &schema)?,
-                "bla".to_string(),
-                $EXPECTED_DATATYPE,
-                $DISTINCT,
-            ));
-            let actual = aggregate(&batch, agg)?;
-            let expected = ScalarValue::from($EXPECTED);
-
-            assert_eq!(expected, actual);
-
-            Ok(()) as Result<(), ::datafusion_common::DataFusionError>
-        }};
-    }
-
-    /// macro to perform an aggregation using [`crate::GroupsAccumulator`] and verify the result.
-    ///
-    /// The difference between this and the above `generic_test_op` is that the former checks
-    /// the old slow-path [`datafusion_expr::Accumulator`] implementation, while this checks
-    /// the new [`crate::GroupsAccumulator`] implementation.
-    #[macro_export]
-    macro_rules! generic_test_op_new {
-        ($ARRAY:expr, $DATATYPE:expr, $OP:ident, $EXPECTED:expr) => {
-            generic_test_op_new!(
-                $ARRAY,
-                $DATATYPE,
-                $OP,
-                $EXPECTED,
-                $EXPECTED.data_type().clone()
-            )
-        };
-        ($ARRAY:expr, $DATATYPE:expr, $OP:ident, $EXPECTED:expr, $EXPECTED_DATATYPE:expr) => {{
-            let schema = Schema::new(vec![Field::new("a", $DATATYPE, true)]);
-
-            let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![$ARRAY])?;
-
-            let agg = Arc::new(<$OP>::new(
-                col("a", &schema)?,
-                "bla".to_string(),
-                $EXPECTED_DATATYPE,
-            ));
-            let actual = aggregate_new(&batch, agg)?;
-            assert_eq!($EXPECTED, &actual);
-
-            Ok(()) as Result<(), ::datafusion_common::DataFusionError>
-        }};
-    }
-
-    /// macro to perform an aggregation with two inputs and verify the result.
-    #[macro_export]
-    macro_rules! generic_test_op2 {
-        ($ARRAY1:expr, $ARRAY2:expr, $DATATYPE1:expr, $DATATYPE2:expr, $OP:ident, $EXPECTED:expr) => {
-            generic_test_op2!(
-                $ARRAY1,
-                $ARRAY2,
-                $DATATYPE1,
-                $DATATYPE2,
-                $OP,
-                $EXPECTED,
-                $EXPECTED.data_type()
-            )
-        };
-        ($ARRAY1:expr, $ARRAY2:expr, $DATATYPE1:expr, $DATATYPE2:expr, $OP:ident, $EXPECTED:expr, $EXPECTED_DATATYPE:expr) => {{
-            let schema = Schema::new(vec![
-                Field::new("a", $DATATYPE1, true),
-                Field::new("b", $DATATYPE2, true),
-            ]);
-            let batch =
-                RecordBatch::try_new(Arc::new(schema.clone()), vec![$ARRAY1, $ARRAY2])?;
-
-            let agg = Arc::new(<$OP>::new(
-                col("a", &schema)?,
-                col("b", &schema)?,
-                "bla".to_string(),
-                $EXPECTED_DATATYPE,
-            ));
-            let actual = aggregate(&batch, agg)?;
-            let expected = ScalarValue::from($EXPECTED);
-
-            assert_eq!(expected, actual);
-
-            Ok(())
-        }};
-    }
-
-    pub fn aggregate(
-        batch: &RecordBatch,
-        agg: Arc<dyn AggregateExpr>,
-    ) -> Result<ScalarValue> {
-        let mut accum = agg.create_accumulator()?;
-        let expr = agg.expressions();
-        let values = expr
-            .iter()
-            .map(|e| {
-                e.evaluate(batch)
-                    .and_then(|v| v.into_array(batch.num_rows()))
-            })
-            .collect::<Result<Vec<_>>>()?;
-        accum.update_batch(&values)?;
-        accum.evaluate()
-    }
-}
diff --git a/datafusion/physical-plan/src/aggregates/no_grouping.rs b/datafusion/physical-plan/src/aggregates/no_grouping.rs
index f85164f7f1e2f..99417e4ee3e91 100644
--- a/datafusion/physical-plan/src/aggregates/no_grouping.rs
+++ b/datafusion/physical-plan/src/aggregates/no_grouping.rs
@@ -218,6 +218,7 @@ fn aggregate_batch(
                 Some(filter) => Cow::Owned(batch_filter(&batch, filter)?),
                 None => Cow::Borrowed(&batch),
             };
+
             // 1.3
             let values = &expr
                 .iter()
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 7ea2902cf3c09..e9a90fce2663f 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -23,10 +23,9 @@ use datafusion::datasource::file_format::parquet::ParquetSink;
 use datafusion::physical_expr::window::{NthValueKind, SlidingAggregateWindowExpr};
 use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr};
 use datafusion::physical_plan::expressions::{
-    ArrayAgg, BinaryExpr, CaseExpr, CastExpr, Column, CumeDist, DistinctArrayAgg,
-    InListExpr, IsNotNullExpr, IsNullExpr, Literal, Max, Min, NegativeExpr, NotExpr,
-    NthValue, Ntile, OrderSensitiveArrayAgg, Rank, RankType, RowNumber, TryCastExpr,
-    WindowShift,
+    BinaryExpr, CaseExpr, CastExpr, Column, CumeDist, InListExpr, IsNotNullExpr,
+    IsNullExpr, Literal, Max, Min, NegativeExpr, NotExpr, NthValue, Ntile,
+    OrderSensitiveArrayAgg, Rank, RankType, RowNumber, TryCastExpr, WindowShift,
 };
 use datafusion::physical_plan::udaf::AggregateFunctionExpr;
 use datafusion::physical_plan::windows::{BuiltInWindowExpr, PlainAggregateWindowExpr};
@@ -260,14 +259,9 @@ struct AggrFn {
 
 fn aggr_expr_to_aggr_fn(expr: &dyn AggregateExpr) -> Result<AggrFn> {
     let aggr_expr = expr.as_any();
-    let mut distinct = false;
 
-    let inner = if aggr_expr.downcast_ref::<ArrayAgg>().is_some() {
-        protobuf::AggregateFunction::ArrayAgg
-    } else if aggr_expr.downcast_ref::<DistinctArrayAgg>().is_some() {
-        distinct = true;
-        protobuf::AggregateFunction::ArrayAgg
-    } else if aggr_expr.downcast_ref::<OrderSensitiveArrayAgg>().is_some() {
+    // TODO: remove OrderSensitiveArrayAgg
+    let inner = if aggr_expr.downcast_ref::<OrderSensitiveArrayAgg>().is_some() {
         protobuf::AggregateFunction::ArrayAgg
     } else if aggr_expr.downcast_ref::<Min>().is_some() {
         protobuf::AggregateFunction::Min
@@ -277,7 +271,10 @@ fn aggr_expr_to_aggr_fn(expr: &dyn AggregateExpr) -> Result<AggrFn> {
         return not_impl_err!("Aggregate function not supported: {expr:?}");
     };
 
-    Ok(AggrFn { inner, distinct })
+    Ok(AggrFn {
+        inner,
+        distinct: false,
+    })
 }
 
 pub fn serialize_physical_sort_exprs<I>(
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 0117502f400d2..11945f39589a7 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -66,7 +66,7 @@ use datafusion_expr::{
 };
 use datafusion_functions_aggregate::average::avg_udaf;
 use datafusion_functions_aggregate::expr_fn::{
-    avg, bit_and, bit_or, bit_xor, bool_and, bool_or, corr,
+    array_agg, avg, bit_and, bit_or, bit_xor, bool_and, bool_or, corr,
 };
 use datafusion_functions_aggregate::string_agg::string_agg;
 use datafusion_proto::bytes::{
@@ -702,6 +702,8 @@ async fn roundtrip_expr_api() -> Result<()> {
         string_agg(col("a").cast_to(&DataType::Utf8, &schema)?, lit("|")),
         bool_and(lit(true)),
         bool_or(lit(true)),
+        array_agg(lit(1)),
+        array_agg(lit(1)).distinct().build().unwrap(),
     ];
 
     // ensure expressions created with the expr api can be round tripped
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index a0140b1c5292a..1976951b8ce66 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -183,7 +183,7 @@ CREATE TABLE array_agg_distinct_list_table AS VALUES
 ;
 
 # Apply array_sort to have deterministic result, higher dimension nested array also works but not for array sort,
-# so they are covered in `datafusion/physical-expr/src/aggregate/array_agg_distinct.rs`
+# so they are covered in `datafusion/functions-aggregate/src/array_agg.rs`
 query ??
 select array_sort(c1), array_sort(c2) from (
   select array_agg(distinct column1) as c1, array_agg(distinct column2) as c2 from array_agg_distinct_list_table

From 5f0dfbb8e7424964303b00f4781f8df4f445d928 Mon Sep 17 00:00:00 2001
From: Jax Liu <liugs963@gmail.com>
Date: Fri, 19 Jul 2024 17:32:24 +0800
Subject: [PATCH 089/357] Move `MAKE_MAP` to ExprPlanner (#11452)

* move make_map to ExprPlanner

* add benchmark for make_map

* remove todo comment

* update lock

* refactor plan_make_map

* implement make_array_strict for type checking strictly

* fix planner provider

* roll back to `make_array`

* update lock
---
 datafusion-cli/Cargo.lock                  |   9 +-
 datafusion/expr/src/planner.rs             |   7 +
 datafusion/functions-array/Cargo.toml      |   5 +
 datafusion/functions-array/benches/map.rs  |  69 ++++++++++
 datafusion/functions-array/src/planner.rs  |  21 ++-
 datafusion/functions/benches/map.rs        |  23 +---
 datafusion/functions/src/core/map.rs       | 149 +--------------------
 datafusion/functions/src/core/mod.rs       |   6 -
 datafusion/sql/src/expr/function.rs        |  12 ++
 datafusion/sqllogictest/test_files/map.slt |  14 +-
 10 files changed, 131 insertions(+), 184 deletions(-)
 create mode 100644 datafusion/functions-array/benches/map.rs

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index cdf0e7f573163..61d9c72b89d99 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1331,6 +1331,7 @@ dependencies = [
  "itertools",
  "log",
  "paste",
+ "rand",
 ]
 
 [[package]]
@@ -3593,18 +3594,18 @@ checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
 
 [[package]]
 name = "thiserror"
-version = "1.0.62"
+version = "1.0.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2675633b1499176c2dff06b0856a27976a8f9d436737b4cf4f312d4d91d8bbb"
+checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.62"
+version = "1.0.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d20468752b09f49e909e55a5d338caa8bedf615594e9d80bc4c565d30faf798c"
+checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs
index 009f3512c588e..415af1bf94dce 100644
--- a/datafusion/expr/src/planner.rs
+++ b/datafusion/expr/src/planner.rs
@@ -173,6 +173,13 @@ pub trait ExprPlanner: Send + Sync {
     fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
         Ok(PlannerResult::Original(args))
     }
+
+    /// Plan a make_map expression, e.g., `make_map(key1, value1, key2, value2, ...)`
+    ///
+    /// Returns origin expression arguments if not possible
+    fn plan_make_map(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
+        Ok(PlannerResult::Original(args))
+    }
 }
 
 /// An operator with two arguments to plan
diff --git a/datafusion/functions-array/Cargo.toml b/datafusion/functions-array/Cargo.toml
index 73c5b9114a2c6..de424b259694c 100644
--- a/datafusion/functions-array/Cargo.toml
+++ b/datafusion/functions-array/Cargo.toml
@@ -53,6 +53,7 @@ datafusion-functions-aggregate = { workspace = true }
 itertools = { version = "0.12", features = ["use_std"] }
 log = { workspace = true }
 paste = "1.0.14"
+rand = "0.8.5"
 
 [dev-dependencies]
 criterion = { version = "0.5", features = ["async_tokio"] }
@@ -60,3 +61,7 @@ criterion = { version = "0.5", features = ["async_tokio"] }
 [[bench]]
 harness = false
 name = "array_expression"
+
+[[bench]]
+harness = false
+name = "map"
diff --git a/datafusion/functions-array/benches/map.rs b/datafusion/functions-array/benches/map.rs
new file mode 100644
index 0000000000000..2e9b45266abc6
--- /dev/null
+++ b/datafusion/functions-array/benches/map.rs
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::prelude::ThreadRng;
+use rand::Rng;
+
+use datafusion_common::ScalarValue;
+use datafusion_expr::planner::ExprPlanner;
+use datafusion_expr::Expr;
+use datafusion_functions_array::planner::ArrayFunctionPlanner;
+
+fn keys(rng: &mut ThreadRng) -> Vec<String> {
+    let mut keys = vec![];
+    for _ in 0..1000 {
+        keys.push(rng.gen_range(0..9999).to_string());
+    }
+    keys
+}
+
+fn values(rng: &mut ThreadRng) -> Vec<i32> {
+    let mut values = vec![];
+    for _ in 0..1000 {
+        values.push(rng.gen_range(0..9999));
+    }
+    values
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    c.bench_function("make_map_1000", |b| {
+        let mut rng = rand::thread_rng();
+        let keys = keys(&mut rng);
+        let values = values(&mut rng);
+        let mut buffer = Vec::new();
+        for i in 0..1000 {
+            buffer.push(Expr::Literal(ScalarValue::Utf8(Some(keys[i].clone()))));
+            buffer.push(Expr::Literal(ScalarValue::Int32(Some(values[i]))));
+        }
+
+        let planner = ArrayFunctionPlanner {};
+
+        b.iter(|| {
+            black_box(
+                planner
+                    .plan_make_map(buffer.clone())
+                    .expect("map should work on valid values"),
+            );
+        });
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions-array/src/planner.rs b/datafusion/functions-array/src/planner.rs
index dfb620f84f3a9..fbb541d9b151e 100644
--- a/datafusion/functions-array/src/planner.rs
+++ b/datafusion/functions-array/src/planner.rs
@@ -17,7 +17,8 @@
 
 //! SQL planning extensions like [`ArrayFunctionPlanner`] and [`FieldAccessPlanner`]
 
-use datafusion_common::{utils::list_ndims, DFSchema, Result};
+use datafusion_common::{exec_err, utils::list_ndims, DFSchema, Result};
+use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::{
     expr::AggregateFunctionDefinition,
     planner::{ExprPlanner, PlannerResult, RawBinaryExpr, RawFieldAccessExpr},
@@ -98,6 +99,24 @@ impl ExprPlanner for ArrayFunctionPlanner {
     ) -> Result<PlannerResult<Vec<Expr>>> {
         Ok(PlannerResult::Planned(make_array(exprs)))
     }
+
+    fn plan_make_map(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
+        if args.len() % 2 != 0 {
+            return exec_err!("make_map requires an even number of arguments");
+        }
+
+        let (keys, values): (Vec<_>, Vec<_>) =
+            args.into_iter().enumerate().partition(|(i, _)| i % 2 == 0);
+        let keys = make_array(keys.into_iter().map(|(_, e)| e).collect());
+        let values = make_array(values.into_iter().map(|(_, e)| e).collect());
+
+        Ok(PlannerResult::Planned(Expr::ScalarFunction(
+            ScalarFunction::new_udf(
+                datafusion_functions::core::map(),
+                vec![keys, values],
+            ),
+        )))
+    }
 }
 
 pub struct FieldAccessPlanner;
diff --git a/datafusion/functions/benches/map.rs b/datafusion/functions/benches/map.rs
index cd863d0e33114..811c21a41b46d 100644
--- a/datafusion/functions/benches/map.rs
+++ b/datafusion/functions/benches/map.rs
@@ -23,7 +23,7 @@ use arrow_buffer::{OffsetBuffer, ScalarBuffer};
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use datafusion_common::ScalarValue;
 use datafusion_expr::ColumnarValue;
-use datafusion_functions::core::{make_map, map};
+use datafusion_functions::core::map;
 use rand::prelude::ThreadRng;
 use rand::Rng;
 use std::sync::Arc;
@@ -45,27 +45,6 @@ fn values(rng: &mut ThreadRng) -> Vec<i32> {
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("make_map_1000", |b| {
-        let mut rng = rand::thread_rng();
-        let keys = keys(&mut rng);
-        let values = values(&mut rng);
-        let mut buffer = Vec::new();
-        for i in 0..1000 {
-            buffer.push(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
-                keys[i].clone(),
-            ))));
-            buffer.push(ColumnarValue::Scalar(ScalarValue::Int32(Some(values[i]))));
-        }
-
-        b.iter(|| {
-            black_box(
-                make_map()
-                    .invoke(&buffer)
-                    .expect("map should work on valid values"),
-            );
-        });
-    });
-
     c.bench_function("map_1000", |b| {
         let mut rng = rand::thread_rng();
         let field = Arc::new(Field::new("item", DataType::Utf8, true));
diff --git a/datafusion/functions/src/core/map.rs b/datafusion/functions/src/core/map.rs
index 1834c7ac6060f..2deef242f8a02 100644
--- a/datafusion/functions/src/core/map.rs
+++ b/datafusion/functions/src/core/map.rs
@@ -20,12 +20,11 @@ use std::collections::VecDeque;
 use std::sync::Arc;
 
 use arrow::array::{Array, ArrayData, ArrayRef, MapArray, StructArray};
-use arrow::compute::concat;
 use arrow::datatypes::{DataType, Field, SchemaBuilder};
 use arrow_buffer::{Buffer, ToByteSlice};
 
-use datafusion_common::{exec_err, internal_err, ScalarValue};
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::Result;
+use datafusion_common::{exec_err, ScalarValue};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 /// Check if we can evaluate the expr to constant directly.
@@ -40,41 +39,6 @@ fn can_evaluate_to_const(args: &[ColumnarValue]) -> bool {
         .all(|arg| matches!(arg, ColumnarValue::Scalar(_)))
 }
 
-fn make_map(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let can_evaluate_to_const = can_evaluate_to_const(args);
-
-    let (key, value): (Vec<_>, Vec<_>) = args
-        .chunks_exact(2)
-        .map(|chunk| {
-            if let ColumnarValue::Array(_) = chunk[0] {
-                return not_impl_err!("make_map does not support array keys");
-            }
-            if let ColumnarValue::Array(_) = chunk[1] {
-                return not_impl_err!("make_map does not support array values");
-            }
-            Ok((chunk[0].clone(), chunk[1].clone()))
-        })
-        .collect::<Result<Vec<_>>>()?
-        .into_iter()
-        .unzip();
-
-    let keys = ColumnarValue::values_to_arrays(&key)?;
-    let values = ColumnarValue::values_to_arrays(&value)?;
-
-    let keys: Vec<_> = keys.iter().map(|k| k.as_ref()).collect();
-    let values: Vec<_> = values.iter().map(|v| v.as_ref()).collect();
-
-    let key = match concat(&keys) {
-        Ok(key) => key,
-        Err(e) => return internal_err!("Error concatenating keys: {}", e),
-    };
-    let value = match concat(&values) {
-        Ok(value) => value,
-        Err(e) => return internal_err!("Error concatenating values: {}", e),
-    };
-    make_map_batch_internal(key, value, can_evaluate_to_const)
-}
-
 fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     if args.len() != 2 {
         return exec_err!(
@@ -154,115 +118,6 @@ fn make_map_batch_internal(
     })
 }
 
-#[derive(Debug)]
-pub struct MakeMap {
-    signature: Signature,
-}
-
-impl Default for MakeMap {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl MakeMap {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::user_defined(Volatility::Immutable),
-        }
-    }
-}
-
-impl ScalarUDFImpl for MakeMap {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "make_map"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        if arg_types.is_empty() {
-            return exec_err!(
-                "make_map requires at least one pair of arguments, got 0 instead"
-            );
-        }
-        if arg_types.len() % 2 != 0 {
-            return exec_err!(
-                "make_map requires an even number of arguments, got {} instead",
-                arg_types.len()
-            );
-        }
-
-        let key_type = &arg_types[0];
-        let mut value_type = &arg_types[1];
-
-        for (i, chunk) in arg_types.chunks_exact(2).enumerate() {
-            if chunk[0].is_null() {
-                return exec_err!("make_map key cannot be null at position {}", i);
-            }
-            if &chunk[0] != key_type {
-                return exec_err!(
-                    "make_map requires all keys to have the same type {}, got {} instead at position {}",
-                    key_type,
-                    chunk[0],
-                    i
-                );
-            }
-
-            if !chunk[1].is_null() {
-                if value_type.is_null() {
-                    value_type = &chunk[1];
-                } else if &chunk[1] != value_type {
-                    return exec_err!(
-                        "map requires all values to have the same type {}, got {} instead at position {}",
-                        value_type,
-                        &chunk[1],
-                        i
-                    );
-                }
-            }
-        }
-
-        let mut result = Vec::new();
-        for _ in 0..arg_types.len() / 2 {
-            result.push(key_type.clone());
-            result.push(value_type.clone());
-        }
-
-        Ok(result)
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        let key_type = &arg_types[0];
-        let mut value_type = &arg_types[1];
-
-        for chunk in arg_types.chunks_exact(2) {
-            if !chunk[1].is_null() && value_type.is_null() {
-                value_type = &chunk[1];
-            }
-        }
-
-        let mut builder = SchemaBuilder::new();
-        builder.push(Field::new("key", key_type.clone(), false));
-        builder.push(Field::new("value", value_type.clone(), true));
-        let fields = builder.finish().fields;
-        Ok(DataType::Map(
-            Arc::new(Field::new("entries", DataType::Struct(fields), false)),
-            false,
-        ))
-    }
-
-    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        make_map(args)
-    }
-}
-
 #[derive(Debug)]
 pub struct MapFunc {
     signature: Signature,
diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index 31bce04beec1b..cbfaa592b012b 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -43,7 +43,6 @@ make_udf_function!(r#struct::StructFunc, STRUCT, r#struct);
 make_udf_function!(named_struct::NamedStructFunc, NAMED_STRUCT, named_struct);
 make_udf_function!(getfield::GetFieldFunc, GET_FIELD, get_field);
 make_udf_function!(coalesce::CoalesceFunc, COALESCE, coalesce);
-make_udf_function!(map::MakeMap, MAKE_MAP, make_map);
 make_udf_function!(map::MapFunc, MAP, map);
 
 pub mod expr_fn {
@@ -81,10 +80,6 @@ pub mod expr_fn {
         coalesce,
         "Returns `coalesce(args...)`, which evaluates to the value of the first expr which is not NULL",
         args,
-    ),(
-        make_map,
-        "Returns a map created from the given keys and values pairs. This function isn't efficient for large maps. Use the `map` function instead.",
-        args,
     ),(
         map,
         "Returns a map created from a key list and a value list",
@@ -107,7 +102,6 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         named_struct(),
         get_field(),
         coalesce(),
-        make_map(),
         map(),
     ]
 }
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index dab328cc49080..4a4b16b804e25 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -21,6 +21,7 @@ use datafusion_common::{
     internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema,
     Dependency, Result,
 };
+use datafusion_expr::planner::PlannerResult;
 use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
 use datafusion_expr::{
     expr, AggregateFunction, Expr, ExprSchemable, WindowFrame, WindowFunctionDefinition,
@@ -227,6 +228,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             crate::utils::normalize_ident(name.0[0].clone())
         };
 
+        if name.eq("make_map") {
+            let mut fn_args =
+                self.function_args_to_expr(args.clone(), schema, planner_context)?;
+            for planner in self.context_provider.get_expr_planners().iter() {
+                match planner.plan_make_map(fn_args)? {
+                    PlannerResult::Planned(expr) => return Ok(expr),
+                    PlannerResult::Original(args) => fn_args = args,
+                }
+            }
+        }
+
         // user-defined function (UDF) should have precedence
         if let Some(fm) = self.context_provider.get_function_meta(&name) {
             let args = self.function_args_to_expr(args, schema, planner_context)?;
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index fb8917a5f4fee..26bfb4a5922e6 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -131,17 +131,23 @@ SELECT MAKE_MAP([1,2], ['a', 'b'], [3,4], ['b']);
 ----
 {[1, 2]: [a, b], [3, 4]: [b]}
 
-query error
+query ?
 SELECT MAKE_MAP('POST', 41, 'HEAD', 'ab', 'PATCH', 30);
+----
+{POST: 41, HEAD: ab, PATCH: 30}
 
 query error
 SELECT MAKE_MAP('POST', 41, 'HEAD', 33, null, 30);
 
-query error
-SELECT MAKE_MAP('POST', 41, 123, 33,'PATCH', 30);
+query ?
+SELECT MAKE_MAP('POST', 41, 'HEAD', 'ab', 'PATCH', 30);
+----
+{POST: 41, HEAD: ab, PATCH: 30}
 
-query error
+query ?
 SELECT MAKE_MAP()
+----
+{}
 
 query error
 SELECT MAKE_MAP('POST', 41, 'HEAD');

From 28fa74bf0fb69f46fd03ef97eb301090de23b5f5 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 19 Jul 2024 06:53:21 -0600
Subject: [PATCH 090/357] feat: Optimize CASE expression for "column or null"
 use case (#11534)

---
 datafusion/core/example.parquet               | Bin 0 -> 976 bytes
 datafusion/physical-expr/benches/case_when.rs |  41 ++++-
 .../physical-expr/src/expressions/case.rs     | 161 +++++++++++++++++-
 datafusion/sqllogictest/README.md             |   2 +-
 datafusion/sqllogictest/test_files/case.slt   |  52 ++++++
 5 files changed, 242 insertions(+), 14 deletions(-)
 create mode 100644 datafusion/core/example.parquet
 create mode 100644 datafusion/sqllogictest/test_files/case.slt

diff --git a/datafusion/core/example.parquet b/datafusion/core/example.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..94de10394b33d26a23a9888e88faa1fa90f14043
GIT binary patch
literal 976
zcmb7@y-UMD7{=dR+8CijD}7&bkU<0w2k`?GGL%A>;?SWub(6GKRM0|Ob#>@0PW}lF
zj)D#j4jufP929)7>E!~g7L&j|d7ryqp4>;XcDRc<EaF-N1;Prz#>@6Mv}ynjBo~6V
zH`y+thhEt7jT5A*RN}trNWm|{0f9NW4_;9QPK*T-bm!2sqpAu*JJPBsrP-S1{t{1r
zL|?P<j1%}qEsEEkiDyPyMwHTQ?T>hFl5S2s;{?7@i{f==;+c__5v4R+(_C2+(#~f_
zqL5^4^S5jpnYGQ=*fw%%hg8QQV?c&9a#K0ZCz5+L4hnI<-@7>)bWXb$F?zh7>w<U0
zv~5q@4v>(ctf)69oWF1C%Kz8Cp)ViHG+x3gs82To&93&yRUd+}=C`ei=G63r()}_L
z-TE5)>SeImRT}5jD9>0kT~<YNY7^|E#=dJTJ-H;5PY<~=A1Zkq595=o!CCKe^}5@i
Vw0k>Se9KA*kUVhCXR^s>`3E!aTC)HE

literal 0
HcmV?d00001

diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
index 9cc7bdc465fb5..862edd9c1fac3 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -40,6 +40,7 @@ fn criterion_benchmark(c: &mut Criterion) {
     // create input data
     let mut c1 = Int32Builder::new();
     let mut c2 = StringBuilder::new();
+    let mut c3 = StringBuilder::new();
     for i in 0..1000 {
         c1.append_value(i);
         if i % 7 == 0 {
@@ -47,14 +48,21 @@ fn criterion_benchmark(c: &mut Criterion) {
         } else {
             c2.append_value(&format!("string {i}"));
         }
+        if i % 9 == 0 {
+            c3.append_null();
+        } else {
+            c3.append_value(&format!("other string {i}"));
+        }
     }
     let c1 = Arc::new(c1.finish());
     let c2 = Arc::new(c2.finish());
+    let c3 = Arc::new(c3.finish());
     let schema = Schema::new(vec![
         Field::new("c1", DataType::Int32, true),
         Field::new("c2", DataType::Utf8, true),
+        Field::new("c3", DataType::Utf8, true),
     ]);
-    let batch = RecordBatch::try_new(Arc::new(schema), vec![c1, c2]).unwrap();
+    let batch = RecordBatch::try_new(Arc::new(schema), vec![c1, c2, c3]).unwrap();
 
     // use same predicate for all benchmarks
     let predicate = Arc::new(BinaryExpr::new(
@@ -63,7 +71,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         make_lit_i32(500),
     ));
 
-    // CASE WHEN expr THEN 1 ELSE 0 END
+    // CASE WHEN c1 <= 500 THEN 1 ELSE 0 END
     c.bench_function("case_when: scalar or scalar", |b| {
         let expr = Arc::new(
             CaseExpr::try_new(
@@ -76,13 +84,38 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
     });
 
-    // CASE WHEN expr THEN col ELSE null END
+    // CASE WHEN c1 <= 500 THEN c2 [ELSE NULL] END
     c.bench_function("case_when: column or null", |b| {
+        let expr = Arc::new(
+            CaseExpr::try_new(None, vec![(predicate.clone(), make_col("c2", 1))], None)
+                .unwrap(),
+        );
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+
+    // CASE WHEN c1 <= 500 THEN c2 ELSE c3 END
+    c.bench_function("case_when: expr or expr", |b| {
         let expr = Arc::new(
             CaseExpr::try_new(
                 None,
                 vec![(predicate.clone(), make_col("c2", 1))],
-                Some(Arc::new(Literal::new(ScalarValue::Utf8(None)))),
+                Some(make_col("c3", 2)),
+            )
+            .unwrap(),
+        );
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+
+    // CASE c1 WHEN 1 THEN c2 WHEN 2 THEN c3 END
+    c.bench_function("case_when: CASE expr", |b| {
+        let expr = Arc::new(
+            CaseExpr::try_new(
+                Some(make_col("c1", 0)),
+                vec![
+                    (make_lit_i32(1), make_col("c2", 1)),
+                    (make_lit_i32(2), make_col("c3", 2)),
+                ],
+                None,
             )
             .unwrap(),
         );
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index 7a434c9402292..521a7ed9acae4 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -32,10 +32,33 @@ use datafusion_common::cast::as_boolean_array;
 use datafusion_common::{exec_err, internal_err, DataFusionError, Result, ScalarValue};
 use datafusion_expr::ColumnarValue;
 
+use datafusion_physical_expr_common::expressions::column::Column;
+use datafusion_physical_expr_common::expressions::Literal;
 use itertools::Itertools;
 
 type WhenThen = (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>);
 
+#[derive(Debug, Hash)]
+enum EvalMethod {
+    /// CASE WHEN condition THEN result
+    ///      [WHEN ...]
+    ///      [ELSE result]
+    /// END
+    NoExpression,
+    /// CASE expression
+    ///     WHEN value THEN result
+    ///     [WHEN ...]
+    ///     [ELSE result]
+    /// END
+    WithExpression,
+    /// This is a specialization for a specific use case where we can take a fast path
+    /// for expressions that are infallible and can be cheaply computed for the entire
+    /// record batch rather than just for the rows where the predicate is true.
+    ///
+    /// CASE WHEN condition THEN column [ELSE NULL] END
+    InfallibleExprOrNull,
+}
+
 /// The CASE expression is similar to a series of nested if/else and there are two forms that
 /// can be used. The first form consists of a series of boolean "when" expressions with
 /// corresponding "then" expressions, and an optional "else" expression.
@@ -61,6 +84,8 @@ pub struct CaseExpr {
     when_then_expr: Vec<WhenThen>,
     /// Optional "else" expression
     else_expr: Option<Arc<dyn PhysicalExpr>>,
+    /// Evaluation method to use
+    eval_method: EvalMethod,
 }
 
 impl std::fmt::Display for CaseExpr {
@@ -79,6 +104,15 @@ impl std::fmt::Display for CaseExpr {
     }
 }
 
+/// This is a specialization for a specific use case where we can take a fast path
+/// for expressions that are infallible and can be cheaply computed for the entire
+/// record batch rather than just for the rows where the predicate is true. For now,
+/// this is limited to use with Column expressions but could potentially be used for other
+/// expressions in the future
+fn is_cheap_and_infallible(expr: &Arc<dyn PhysicalExpr>) -> bool {
+    expr.as_any().is::<Column>()
+}
+
 impl CaseExpr {
     /// Create a new CASE WHEN expression
     pub fn try_new(
@@ -86,13 +120,35 @@ impl CaseExpr {
         when_then_expr: Vec<WhenThen>,
         else_expr: Option<Arc<dyn PhysicalExpr>>,
     ) -> Result<Self> {
+        // normalize null literals to None in the else_expr (this already happens
+        // during SQL planning, but not necessarily for other use cases)
+        let else_expr = match &else_expr {
+            Some(e) => match e.as_any().downcast_ref::<Literal>() {
+                Some(lit) if lit.value().is_null() => None,
+                _ => else_expr,
+            },
+            _ => else_expr,
+        };
+
         if when_then_expr.is_empty() {
             exec_err!("There must be at least one WHEN clause")
         } else {
+            let eval_method = if expr.is_some() {
+                EvalMethod::WithExpression
+            } else if when_then_expr.len() == 1
+                && is_cheap_and_infallible(&(when_then_expr[0].1))
+                && else_expr.is_none()
+            {
+                EvalMethod::InfallibleExprOrNull
+            } else {
+                EvalMethod::NoExpression
+            };
+
             Ok(Self {
                 expr,
                 when_then_expr,
                 else_expr,
+                eval_method,
             })
         }
     }
@@ -256,6 +312,38 @@ impl CaseExpr {
 
         Ok(ColumnarValue::Array(current_value))
     }
+
+    /// This function evaluates the specialized case of:
+    ///
+    /// CASE WHEN condition THEN column
+    ///      [ELSE NULL]
+    /// END
+    ///
+    /// Note that this function is only safe to use for "then" expressions
+    /// that are infallible because the expression will be evaluated for all
+    /// rows in the input batch.
+    fn case_column_or_null(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
+        let when_expr = &self.when_then_expr[0].0;
+        let then_expr = &self.when_then_expr[0].1;
+        if let ColumnarValue::Array(bit_mask) = when_expr.evaluate(batch)? {
+            let bit_mask = bit_mask
+                .as_any()
+                .downcast_ref::<BooleanArray>()
+                .expect("predicate should evaluate to a boolean array");
+            // invert the bitmask
+            let bit_mask = not(bit_mask)?;
+            match then_expr.evaluate(batch)? {
+                ColumnarValue::Array(array) => {
+                    Ok(ColumnarValue::Array(nullif(&array, &bit_mask)?))
+                }
+                ColumnarValue::Scalar(_) => {
+                    internal_err!("expression did not evaluate to an array")
+                }
+            }
+        } else {
+            internal_err!("predicate did not evaluate to an array")
+        }
+    }
 }
 
 impl PhysicalExpr for CaseExpr {
@@ -303,14 +391,21 @@ impl PhysicalExpr for CaseExpr {
     }
 
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        if self.expr.is_some() {
-            // this use case evaluates "expr" and then compares the values with the "when"
-            // values
-            self.case_when_with_expr(batch)
-        } else {
-            // The "when" conditions all evaluate to boolean in this use case and can be
-            // arbitrary expressions
-            self.case_when_no_expr(batch)
+        match self.eval_method {
+            EvalMethod::WithExpression => {
+                // this use case evaluates "expr" and then compares the values with the "when"
+                // values
+                self.case_when_with_expr(batch)
+            }
+            EvalMethod::NoExpression => {
+                // The "when" conditions all evaluate to boolean in this use case and can be
+                // arbitrary expressions
+                self.case_when_no_expr(batch)
+            }
+            EvalMethod::InfallibleExprOrNull => {
+                // Specialization for CASE WHEN expr THEN column [ELSE NULL] END
+                self.case_column_or_null(batch)
+            }
         }
     }
 
@@ -409,7 +504,7 @@ pub fn case(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::expressions::{binary, cast, col, lit};
+    use crate::expressions::{binary, cast, col, lit, BinaryExpr};
 
     use arrow::buffer::Buffer;
     use arrow::datatypes::DataType::Float64;
@@ -419,6 +514,7 @@ mod tests {
     use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
     use datafusion_expr::type_coercion::binary::comparison_coercion;
     use datafusion_expr::Operator;
+    use datafusion_physical_expr_common::expressions::Literal;
 
     #[test]
     fn case_with_expr() -> Result<()> {
@@ -998,6 +1094,53 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_column_or_null_specialization() -> Result<()> {
+        // create input data
+        let mut c1 = Int32Builder::new();
+        let mut c2 = StringBuilder::new();
+        for i in 0..1000 {
+            c1.append_value(i);
+            if i % 7 == 0 {
+                c2.append_null();
+            } else {
+                c2.append_value(&format!("string {i}"));
+            }
+        }
+        let c1 = Arc::new(c1.finish());
+        let c2 = Arc::new(c2.finish());
+        let schema = Schema::new(vec![
+            Field::new("c1", DataType::Int32, true),
+            Field::new("c2", DataType::Utf8, true),
+        ]);
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![c1, c2]).unwrap();
+
+        // CaseWhenExprOrNull should produce same results as CaseExpr
+        let predicate = Arc::new(BinaryExpr::new(
+            make_col("c1", 0),
+            Operator::LtEq,
+            make_lit_i32(250),
+        ));
+        let expr = CaseExpr::try_new(None, vec![(predicate, make_col("c2", 1))], None)?;
+        assert!(matches!(expr.eval_method, EvalMethod::InfallibleExprOrNull));
+        match expr.evaluate(&batch)? {
+            ColumnarValue::Array(array) => {
+                assert_eq!(1000, array.len());
+                assert_eq!(785, array.null_count());
+            }
+            _ => unreachable!(),
+        }
+        Ok(())
+    }
+
+    fn make_col(name: &str, index: usize) -> Arc<dyn PhysicalExpr> {
+        Arc::new(Column::new(name, index))
+    }
+
+    fn make_lit_i32(n: i32) -> Arc<dyn PhysicalExpr> {
+        Arc::new(Literal::new(ScalarValue::Int32(Some(n))))
+    }
+
     fn generate_case_when_with_type_coercion(
         expr: Option<Arc<dyn PhysicalExpr>>,
         when_thens: Vec<WhenThen>,
diff --git a/datafusion/sqllogictest/README.md b/datafusion/sqllogictest/README.md
index c7f04c0d762c1..5becc75c985a4 100644
--- a/datafusion/sqllogictest/README.md
+++ b/datafusion/sqllogictest/README.md
@@ -133,7 +133,7 @@ In order to run the sqllogictests running against a previously running Postgres
 PG_COMPAT=true PG_URI="postgresql://postgres@127.0.0.1/postgres" cargo test --features=postgres --test sqllogictests
 ```
 
-The environemnt variables:
+The environment variables:
 
 1. `PG_COMPAT` instructs sqllogictest to run against Postgres (not DataFusion)
 2. `PG_URI` contains a `libpq` style connection string, whose format is described in
diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt
new file mode 100644
index 0000000000000..fac1042bb6dd7
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# create test data
+statement ok
+create table foo (a int, b int) as values (1, 2), (3, 4), (5, 6);
+
+# CASE WHEN with condition
+query T
+SELECT CASE a WHEN 1 THEN 'one' WHEN 3 THEN 'three' ELSE '?' END FROM foo
+----
+one
+three
+?
+
+# CASE WHEN with no condition
+query I
+SELECT CASE WHEN a > 2 THEN a ELSE b END FROM foo
+----
+2
+3
+5
+
+# column or explicit null
+query I
+SELECT CASE WHEN a > 2 THEN b ELSE null END FROM foo
+----
+NULL
+4
+6
+
+# column or implicit null
+query I
+SELECT CASE WHEN a > 2 THEN b END FROM foo
+----
+NULL
+4
+6

From cf9da768306a1e103bfeae68f4f2ed3dfe87df7b Mon Sep 17 00:00:00 2001
From: JasonLi <lijingxuan92@126.com>
Date: Fri, 19 Jul 2024 21:56:14 +0800
Subject: [PATCH 091/357] fix: typos of sql, sqllogictest and substrait
 packages (#11548)

---
 datafusion/sql/src/expr/function.rs               | 2 +-
 datafusion/sql/src/parser.rs                      | 4 ++--
 datafusion/sql/src/relation/mod.rs                | 2 +-
 datafusion/sql/src/select.rs                      | 2 +-
 datafusion/sql/src/unparser/ast.rs                | 2 +-
 datafusion/sql/src/unparser/plan.rs               | 6 +++---
 datafusion/sql/src/unparser/utils.rs              | 4 ++--
 datafusion/sql/src/utils.rs                       | 2 +-
 datafusion/sql/tests/common/mod.rs                | 2 +-
 datafusion/sql/tests/sql_integration.rs           | 4 ++--
 datafusion/sqllogictest/test_files/aggregate.slt  | 8 ++++----
 datafusion/sqllogictest/test_files/array.slt      | 4 ++--
 datafusion/sqllogictest/test_files/binary.slt     | 2 +-
 datafusion/sqllogictest/test_files/copy.slt       | 2 +-
 datafusion/sqllogictest/test_files/explain.slt    | 2 +-
 datafusion/sqllogictest/test_files/interval.slt   | 2 +-
 datafusion/sqllogictest/test_files/math.slt       | 2 +-
 datafusion/sqllogictest/test_files/options.slt    | 2 +-
 datafusion/sqllogictest/test_files/scalar.slt     | 2 +-
 datafusion/sqllogictest/test_files/timestamps.slt | 2 +-
 datafusion/sqllogictest/test_files/unnest.slt     | 4 ++--
 datafusion/sqllogictest/test_files/update.slt     | 2 +-
 datafusion/sqllogictest/test_files/window.slt     | 2 +-
 datafusion/substrait/src/logical_plan/producer.rs | 2 +-
 24 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index 4a4b16b804e25..4804752d8389f 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -67,7 +67,7 @@ pub fn suggest_valid_function(
     find_closest_match(valid_funcs, input_function_name)
 }
 
-/// Find the closest matching string to the target string in the candidates list, using edit distance(case insensitve)
+/// Find the closest matching string to the target string in the candidates list, using edit distance(case insensitive)
 /// Input `candidates` must not be empty otherwise it will panic
 fn find_closest_match(candidates: Vec<String>, target: &str) -> String {
     let target = target.to_lowercase();
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index bc13484235c39..a743aa72829de 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -218,7 +218,7 @@ impl fmt::Display for CreateExternalTable {
 ///
 /// This can either be a [`Statement`] from [`sqlparser`] from a
 /// standard SQL dialect, or a DataFusion extension such as `CREATE
-/// EXTERAL TABLE`. See [`DFParser`] for more information.
+/// EXTERNAL TABLE`. See [`DFParser`] for more information.
 ///
 /// [`Statement`]: sqlparser::ast::Statement
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -1101,7 +1101,7 @@ mod tests {
         });
         expect_parse_ok(sql, expected)?;
 
-        // positive case: column definiton allowed in 'partition by' clause
+        // positive case: column definition allowed in 'partition by' clause
         let sql =
             "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs
index 9380e569f2e43..b812dae5ae3e3 100644
--- a/datafusion/sql/src/relation/mod.rs
+++ b/datafusion/sql/src/relation/mod.rs
@@ -105,7 +105,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 // Unnest table factor has empty input
                 let schema = DFSchema::empty();
                 let input = LogicalPlanBuilder::empty(true).build()?;
-                // Unnest table factor can have multiple arugments.
+                // Unnest table factor can have multiple arguments.
                 // We treat each argument as a separate unnest expression.
                 let unnest_exprs = array_exprs
                     .into_iter()
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 84b80c311245c..fc46c3a841b53 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -306,7 +306,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let mut intermediate_select_exprs = select_exprs;
         // Each expr in select_exprs can contains multiple unnest stage
         // The transformation happen bottom up, one at a time for each iteration
-        // Ony exaust the loop if no more unnest transformation is found
+        // Only exaust the loop if no more unnest transformation is found
         for i in 0.. {
             let mut unnest_columns = vec![];
             // from which column used for projection, before the unnest happen
diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs
index 06b4d4a710a31..02eb44dbb657d 100644
--- a/datafusion/sql/src/unparser/ast.rs
+++ b/datafusion/sql/src/unparser/ast.rs
@@ -497,7 +497,7 @@ impl Default for DerivedRelationBuilder {
 pub(super) struct UninitializedFieldError(&'static str);
 
 impl UninitializedFieldError {
-    /// Create a new `UnitializedFieldError` for the specified field name.
+    /// Create a new `UninitializedFieldError` for the specified field name.
     pub fn new(field_name: &'static str) -> Self {
         UninitializedFieldError(field_name)
     }
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 7a653f80be08b..26fd472996376 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -214,12 +214,12 @@ impl Unparser<'_> {
         } else {
             let mut derived_builder = DerivedRelationBuilder::default();
             derived_builder.lateral(false).alias(None).subquery({
-                let inner_statment = self.plan_to_sql(plan)?;
-                if let ast::Statement::Query(inner_query) = inner_statment {
+                let inner_statement = self.plan_to_sql(plan)?;
+                if let ast::Statement::Query(inner_query) = inner_statement {
                     inner_query
                 } else {
                     return internal_err!(
-                        "Subquery must be a Query, but found {inner_statment:?}"
+                        "Subquery must be a Query, but found {inner_statement:?}"
                     );
                 }
             });
diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs
index 331da9773f161..71f64f1cf459e 100644
--- a/datafusion/sql/src/unparser/utils.rs
+++ b/datafusion/sql/src/unparser/utils.rs
@@ -31,7 +31,7 @@ pub(crate) enum AggVariant<'a> {
 /// Recursively searches children of [LogicalPlan] to find an Aggregate or window node if one exists
 /// prior to encountering a Join, TableScan, or a nested subquery (derived table factor).
 /// If an Aggregate or window node is not found prior to this or at all before reaching the end
-/// of the tree, None is returned. It is assumed that a Window and Aggegate node cannot both
+/// of the tree, None is returned. It is assumed that a Window and Aggregate node cannot both
 /// be found in a single select query.
 pub(crate) fn find_agg_node_within_select<'a>(
     plan: &'a LogicalPlan,
@@ -82,7 +82,7 @@ pub(crate) fn unproject_agg_exprs(expr: &Expr, agg: &Aggregate) -> Result<Expr>
     expr.clone()
         .transform(|sub_expr| {
             if let Expr::Column(c) = sub_expr {
-                // find the column in the agg schmea
+                // find the column in the agg schema
                 if let Ok(n) = agg.schema.index_of_column(&c) {
                     let unprojected_expr = agg
                         .group_expr
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 2eacbd174fc24..a70e3e9be9303 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -325,7 +325,7 @@ pub(crate) fn transform_bottom_unnest(
             let (data_type, _) = arg.data_type_and_nullable(input.schema())?;
 
             if let DataType::Struct(_) = data_type {
-                return internal_err!("unnest on struct can ony be applied at the root level of select expression");
+                return internal_err!("unnest on struct can only be applied at the root level of select expression");
             }
 
             let mut transformed_exprs = transform(&expr, arg)?;
diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs
index b8d8bd12d28bb..bcfb8f43848e4 100644
--- a/datafusion/sql/tests/common/mod.rs
+++ b/datafusion/sql/tests/common/mod.rs
@@ -56,7 +56,7 @@ pub(crate) struct MockContextProvider {
 }
 
 impl MockContextProvider {
-    // Surpressing dead code warning, as this is used in integration test crates
+    // Suppressing dead code warning, as this is used in integration test crates
     #[allow(dead_code)]
     pub(crate) fn options_mut(&mut self) -> &mut ConfigOptions {
         &mut self.options
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index e34e7e20a0f32..57dab81331b3f 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1144,7 +1144,7 @@ fn select_aggregate_with_group_by_with_having_that_reuses_aggregate_multiple_tim
 }
 
 #[test]
-fn select_aggregate_with_group_by_with_having_using_aggreagate_not_in_select() {
+fn select_aggregate_with_group_by_with_having_using_aggregate_not_in_select() {
     let sql = "SELECT first_name, MAX(age)
                    FROM person
                    GROUP BY first_name
@@ -1185,7 +1185,7 @@ fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compo
 }
 
 #[test]
-fn select_aggregate_with_group_by_with_having_using_derived_column_aggreagate_not_in_select(
+fn select_aggregate_with_group_by_with_having_using_derived_column_aggregate_not_in_select(
 ) {
     let sql = "SELECT first_name, MAX(age)
                    FROM person
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 1976951b8ce66..d0f7f2d9ac7aa 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -3705,7 +3705,7 @@ select
   column3 as tag
 from t_source;
 
-# Demonstate the contents
+# Demonstrate the contents
 query PPPPPPPPTT
 select * from t;
 ----
@@ -3816,7 +3816,7 @@ select
   column3 as tag
 from t_source;
 
-# Demonstate the contents
+# Demonstrate the contents
 query DDTT
 select * from t;
 ----
@@ -3914,7 +3914,7 @@ select
   column3 as tag
 from t_source;
 
-# Demonstate the contents
+# Demonstrate the contents
 query DDDDTT
 select * from t;
 ----
@@ -4108,7 +4108,7 @@ select sum(c1), arrow_typeof(sum(c1)) from d_table;
 ----
 100 Decimal128(20, 3)
 
-# aggregate sum with deciaml
+# aggregate sum with decimal
 statement ok
 create table t (c decimal(35, 3)) as values (10), (null), (20);
 
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 7917f1d78da8e..f2972e4c14c26 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5690,7 +5690,7 @@ select
 ----
 [] [] [0] [0]
 
-# Test range for other egde cases
+# Test range for other edge cases
 query ????????
 select 
   range(9223372036854775807, 9223372036854775807, -1) as c1,
@@ -5828,7 +5828,7 @@ select
 [-9223372036854775808] [9223372036854775807] [0, -9223372036854775808] [0, 9223372036854775807]
 
 
-# Test generate_series for other egde cases
+# Test generate_series for other edge cases
 query ????
 select 
   generate_series(9223372036854775807, 9223372036854775807, -1) as c1,
diff --git a/datafusion/sqllogictest/test_files/binary.slt b/datafusion/sqllogictest/test_files/binary.slt
index 621cd3e528f17..5c5f9d510e554 100644
--- a/datafusion/sqllogictest/test_files/binary.slt
+++ b/datafusion/sqllogictest/test_files/binary.slt
@@ -25,7 +25,7 @@ SELECT X'FF01', arrow_typeof(X'FF01');
 ----
 ff01 Binary
 
-# Invaid hex values
+# Invalid hex values
 query error DataFusion error: Error during planning: Invalid HexStringLiteral 'Z'
 SELECT X'Z'
 
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index 6a6ab15a065d3..7af4c52c654b6 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -271,7 +271,7 @@ select * from validate_struct_with_array;
 {c0: foo, c1: [1, 2, 3], c2: {c0: bar, c1: [2, 3, 4]}}
 
 
-# Copy parquet with all supported statment overrides
+# Copy parquet with all supported statement overrides
 query IT
 COPY source_table
 TO 'test_files/scratch/copy/table_with_options/'
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index 3a4e8072bbc76..172cbad44dca4 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -394,7 +394,7 @@ physical_plan_with_schema
 statement ok
 set datafusion.execution.collect_statistics = false;
 
-# Explain ArrayFuncions
+# Explain ArrayFunctions
 
 statement ok
 set datafusion.explain.physical_plan_only = false
diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt
index eab4eed002696..afb262cf95a55 100644
--- a/datafusion/sqllogictest/test_files/interval.slt
+++ b/datafusion/sqllogictest/test_files/interval.slt
@@ -325,7 +325,7 @@ select
 ----
 Interval(MonthDayNano) Interval(MonthDayNano)
 
-# cast with explicit cast sytax
+# cast with explicit cast syntax
 query TT
 select
   arrow_typeof(cast ('5 months' as interval)),
diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt
index 6ff804c3065d9..6884d762612d8 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -112,7 +112,7 @@ SELECT iszero(1.0), iszero(0.0), iszero(-0.0), iszero(NULL)
 ----
 false true true NULL
 
-# abs: empty argumnet
+# abs: empty argument
 statement error
 SELECT abs();
 
diff --git a/datafusion/sqllogictest/test_files/options.slt b/datafusion/sqllogictest/test_files/options.slt
index ba9eedcbbd34b..aafaa054964e1 100644
--- a/datafusion/sqllogictest/test_files/options.slt
+++ b/datafusion/sqllogictest/test_files/options.slt
@@ -42,7 +42,7 @@ physical_plan
 statement ok
 set datafusion.execution.coalesce_batches = false
 
-# expect no coalsece
+# expect no coalescence
 query TT
 explain SELECT * FROM a WHERE c0 < 1;
 ----
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index 5daa9333fb36f..dd19a1344139d 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1203,7 +1203,7 @@ FROM t1
 999
 999
 
-# case_when_else_with_null_contant()
+# case_when_else_with_null_constant()
 query I
  SELECT
 CASE WHEN c1 = 'a' THEN 1
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index f4e492649b9f8..2ca2d49997a66 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -1161,7 +1161,7 @@ ts_data_secs 2020-09-08T00:00:00
 ts_data_secs 2020-09-08T00:00:00
 ts_data_secs 2020-09-08T00:00:00
 
-# Test date trun on different granularity
+# Test date turn on different granularity
 query TP rowsort
 SELECT 'millisecond', DATE_TRUNC('millisecond', ts) FROM ts_data_nanos
   UNION ALL
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 93146541e107b..d818c0e927953 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -484,7 +484,7 @@ query error DataFusion error: type_coercion\ncaused by\nThis feature is not impl
 select sum(unnest(generate_series(1,10)));
 
 ## TODO: support unnest as a child expr
-query error DataFusion error: Internal error: unnest on struct can ony be applied at the root level of select expression
+query error DataFusion error: Internal error: unnest on struct can only be applied at the root level of select expression
 select arrow_typeof(unnest(column5)) from unnest_table;
 
 
@@ -517,7 +517,7 @@ select unnest(unnest(unnest(column3)['c1'])), column3 from recursive_unnest_tabl
 3 [{c0: [2], c1: [[3], [4]]}]
 4 [{c0: [2], c1: [[3], [4]]}]
 
-## tripple list unnest
+## triple list unnest
 query I?
 select unnest(unnest(unnest(column2))), column2 from recursive_unnest_table;
 ----
diff --git a/datafusion/sqllogictest/test_files/update.slt b/datafusion/sqllogictest/test_files/update.slt
index 49b2bd9aa0b56..3d455d7a88ca7 100644
--- a/datafusion/sqllogictest/test_files/update.slt
+++ b/datafusion/sqllogictest/test_files/update.slt
@@ -74,7 +74,7 @@ logical_plan
 statement ok
 create table t3(a int, b varchar, c double, d int);
 
-# set from mutiple tables, sqlparser only supports from one table
+# set from multiple tables, sqlparser only supports from one table
 query error DataFusion error: SQL error: ParserError\("Expected end of statement, found: ,"\)
 explain update t1 set b = t2.b, c = t3.a, d = 1 from t2, t3 where t1.a = t2.a and t1.a = t3.a;
 
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 5296f13de08a5..37214e11eae80 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -3968,7 +3968,7 @@ CREATE TABLE table_with_pk (
 # However, if we know that <expr> contains a unique column (e.g. a PRIMARY KEY),
 # it can be treated as `OVER (ORDER BY <expr> ROWS BETWEEN UNBOUNDED PRECEDING
 # AND CURRENT ROW)` where window frame units change from `RANGE` to `ROWS`. This
-# conversion makes the window frame manifestly causal by eliminating the possiblity
+# conversion makes the window frame manifestly causal by eliminating the possibility
 # of ties explicitly (see window frame documentation for a discussion of causality
 # in this context). The Query below should have `ROWS` in its window frame.
 query TT
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index 7849d0bd431e6..0fd59d5280862 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -994,7 +994,7 @@ pub fn make_binary_op_scalar_func(
 ///
 /// * `expr` - DataFusion expression to be parse into a Substrait expression
 /// * `schema` - DataFusion input schema for looking up field qualifiers
-/// * `col_ref_offset` - Offset for caculating Substrait field reference indices.
+/// * `col_ref_offset` - Offset for calculating Substrait field reference indices.
 ///                     This should only be set by caller with more than one input relations i.e. Join.
 ///                     Substrait expects one set of indices when joining two relations.
 ///                     Let's say `left` and `right` have `m` and `n` columns, respectively. The `right`

From a4c9bb45744bfdfa714cb1a7a234e89608196169 Mon Sep 17 00:00:00 2001
From: Arttu <Blizzara@users.noreply.github.com>
Date: Fri, 19 Jul 2024 21:38:04 +0200
Subject: [PATCH 092/357] feat: consume and produce Substrait type extensions
 (#11510)

* support reading type extensions in consumer

* read extension for UDTs

* support also type extensions in producer

* produce extensions for MonthDayNano UDT

* unify extensions between consumer and producer

* fixes

* add doc comments

* add extension tests

* fix

* fix docs

* fix test

* fix clipppy
---
 datafusion/substrait/src/extensions.rs        | 157 ++++++
 datafusion/substrait/src/lib.rs               |   1 +
 .../substrait/src/logical_plan/consumer.rs    | 269 +++++----
 .../substrait/src/logical_plan/producer.rs    | 524 +++++++++---------
 datafusion/substrait/src/variation_const.rs   |  24 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  99 ++--
 6 files changed, 644 insertions(+), 430 deletions(-)
 create mode 100644 datafusion/substrait/src/extensions.rs

diff --git a/datafusion/substrait/src/extensions.rs b/datafusion/substrait/src/extensions.rs
new file mode 100644
index 0000000000000..459d0e0c5ae58
--- /dev/null
+++ b/datafusion/substrait/src/extensions.rs
@@ -0,0 +1,157 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::common::{plan_err, DataFusionError};
+use std::collections::HashMap;
+use substrait::proto::extensions::simple_extension_declaration::{
+    ExtensionFunction, ExtensionType, ExtensionTypeVariation, MappingType,
+};
+use substrait::proto::extensions::SimpleExtensionDeclaration;
+
+/// Substrait uses [SimpleExtensions](https://substrait.io/extensions/#simple-extensions) to define
+/// behavior of plans in addition to what's supported directly by the protobuf definitions.
+/// That includes functions, but also provides support for custom types and variations for existing
+/// types. This structs facilitates the use of these extensions in DataFusion.
+/// TODO: DF doesn't yet use extensions for type variations <https://github.com/apache/datafusion/issues/11544>
+/// TODO: DF doesn't yet provide valid extensionUris <https://github.com/apache/datafusion/issues/11545>
+#[derive(Default, Debug, PartialEq)]
+pub struct Extensions {
+    pub functions: HashMap<u32, String>, // anchor -> function name
+    pub types: HashMap<u32, String>,     // anchor -> type name
+    pub type_variations: HashMap<u32, String>, // anchor -> type variation name
+}
+
+impl Extensions {
+    /// Registers a function and returns the anchor (reference) to it. If the function has already
+    /// been registered, it returns the existing anchor.
+    /// Function names are case-insensitive (converted to lowercase).
+    pub fn register_function(&mut self, function_name: String) -> u32 {
+        let function_name = function_name.to_lowercase();
+
+        // Some functions are named differently in Substrait default extensions than in DF
+        // Rename those to match the Substrait extensions for interoperability
+        let function_name = match function_name.as_str() {
+            "substr" => "substring".to_string(),
+            _ => function_name,
+        };
+
+        match self.functions.iter().find(|(_, f)| *f == &function_name) {
+            Some((function_anchor, _)) => *function_anchor, // Function has been registered
+            None => {
+                // Function has NOT been registered
+                let function_anchor = self.functions.len() as u32;
+                self.functions
+                    .insert(function_anchor, function_name.clone());
+                function_anchor
+            }
+        }
+    }
+
+    /// Registers a type and returns the anchor (reference) to it. If the type has already
+    /// been registered, it returns the existing anchor.
+    pub fn register_type(&mut self, type_name: String) -> u32 {
+        let type_name = type_name.to_lowercase();
+        match self.types.iter().find(|(_, t)| *t == &type_name) {
+            Some((type_anchor, _)) => *type_anchor, // Type has been registered
+            None => {
+                // Type has NOT been registered
+                let type_anchor = self.types.len() as u32;
+                self.types.insert(type_anchor, type_name.clone());
+                type_anchor
+            }
+        }
+    }
+}
+
+impl TryFrom<&Vec<SimpleExtensionDeclaration>> for Extensions {
+    type Error = DataFusionError;
+
+    fn try_from(
+        value: &Vec<SimpleExtensionDeclaration>,
+    ) -> datafusion::common::Result<Self> {
+        let mut functions = HashMap::new();
+        let mut types = HashMap::new();
+        let mut type_variations = HashMap::new();
+
+        for ext in value {
+            match &ext.mapping_type {
+                Some(MappingType::ExtensionFunction(ext_f)) => {
+                    functions.insert(ext_f.function_anchor, ext_f.name.to_owned());
+                }
+                Some(MappingType::ExtensionType(ext_t)) => {
+                    types.insert(ext_t.type_anchor, ext_t.name.to_owned());
+                }
+                Some(MappingType::ExtensionTypeVariation(ext_v)) => {
+                    type_variations
+                        .insert(ext_v.type_variation_anchor, ext_v.name.to_owned());
+                }
+                None => return plan_err!("Cannot parse empty extension"),
+            }
+        }
+
+        Ok(Extensions {
+            functions,
+            types,
+            type_variations,
+        })
+    }
+}
+
+impl From<Extensions> for Vec<SimpleExtensionDeclaration> {
+    fn from(val: Extensions) -> Vec<SimpleExtensionDeclaration> {
+        let mut extensions = vec![];
+        for (f_anchor, f_name) in val.functions {
+            let function_extension = ExtensionFunction {
+                extension_uri_reference: u32::MAX,
+                function_anchor: f_anchor,
+                name: f_name,
+            };
+            let simple_extension = SimpleExtensionDeclaration {
+                mapping_type: Some(MappingType::ExtensionFunction(function_extension)),
+            };
+            extensions.push(simple_extension);
+        }
+
+        for (t_anchor, t_name) in val.types {
+            let type_extension = ExtensionType {
+                extension_uri_reference: u32::MAX, // https://github.com/apache/datafusion/issues/11545
+                type_anchor: t_anchor,
+                name: t_name,
+            };
+            let simple_extension = SimpleExtensionDeclaration {
+                mapping_type: Some(MappingType::ExtensionType(type_extension)),
+            };
+            extensions.push(simple_extension);
+        }
+
+        for (tv_anchor, tv_name) in val.type_variations {
+            let type_variation_extension = ExtensionTypeVariation {
+                extension_uri_reference: u32::MAX, // We don't register proper extension URIs yet
+                type_variation_anchor: tv_anchor,
+                name: tv_name,
+            };
+            let simple_extension = SimpleExtensionDeclaration {
+                mapping_type: Some(MappingType::ExtensionTypeVariation(
+                    type_variation_extension,
+                )),
+            };
+            extensions.push(simple_extension);
+        }
+
+        extensions
+    }
+}
diff --git a/datafusion/substrait/src/lib.rs b/datafusion/substrait/src/lib.rs
index 454f0e7b7cb99..0b1c796553c0a 100644
--- a/datafusion/substrait/src/lib.rs
+++ b/datafusion/substrait/src/lib.rs
@@ -72,6 +72,7 @@
 //! # Ok(())
 //! # }
 //! ```
+pub mod extensions;
 pub mod logical_plan;
 pub mod physical_plan;
 pub mod serializer;
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 1365630d5079a..5768c44bbf6c8 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -36,16 +36,21 @@ use datafusion::logical_expr::{
 use substrait::proto::expression::subquery::set_predicate::PredicateOp;
 use url::Url;
 
+use crate::extensions::Extensions;
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
-    INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
-    INTERVAL_YEAR_MONTH_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF,
+    INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
     TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
     TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
     UNSIGNED_INTEGER_TYPE_VARIATION_REF,
 };
+#[allow(deprecated)]
+use crate::variation_const::{
+    INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
+    INTERVAL_YEAR_MONTH_TYPE_REF,
+};
 use datafusion::common::scalar::ScalarStructBuilder;
 use datafusion::logical_expr::expr::InList;
 use datafusion::logical_expr::{
@@ -65,7 +70,9 @@ use std::str::FromStr;
 use std::sync::Arc;
 use substrait::proto::exchange_rel::ExchangeKind;
 use substrait::proto::expression::literal::user_defined::Val;
-use substrait::proto::expression::literal::{IntervalDayToSecond, IntervalYearToMonth};
+use substrait::proto::expression::literal::{
+    IntervalDayToSecond, IntervalYearToMonth, UserDefined,
+};
 use substrait::proto::expression::subquery::SubqueryType;
 use substrait::proto::expression::{self, FieldReference, Literal, ScalarFunction};
 use substrait::proto::read_rel::local_files::file_or_files::PathType::UriFile;
@@ -78,7 +85,6 @@ use substrait::proto::{
         window_function::bound::Kind as BoundKind, window_function::Bound,
         window_function::BoundsType, MaskExpression, RexType,
     },
-    extensions::simple_extension_declaration::MappingType,
     function_argument::ArgType,
     join_rel, plan_rel, r#type,
     read_rel::ReadType,
@@ -185,19 +191,10 @@ pub async fn from_substrait_plan(
     plan: &Plan,
 ) -> Result<LogicalPlan> {
     // Register function extension
-    let function_extension = plan
-        .extensions
-        .iter()
-        .map(|e| match &e.mapping_type {
-            Some(ext) => match ext {
-                MappingType::ExtensionFunction(ext_f) => {
-                    Ok((ext_f.function_anchor, &ext_f.name))
-                }
-                _ => not_impl_err!("Extension type not supported: {ext:?}"),
-            },
-            None => not_impl_err!("Cannot parse empty extension"),
-        })
-        .collect::<Result<HashMap<_, _>>>()?;
+    let extensions = Extensions::try_from(&plan.extensions)?;
+    if !extensions.type_variations.is_empty() {
+        return not_impl_err!("Type variation extensions are not supported");
+    }
 
     // Parse relations
     match plan.relations.len() {
@@ -205,10 +202,10 @@ pub async fn from_substrait_plan(
             match plan.relations[0].rel_type.as_ref() {
                 Some(rt) => match rt {
                     plan_rel::RelType::Rel(rel) => {
-                        Ok(from_substrait_rel(ctx, rel, &function_extension).await?)
+                        Ok(from_substrait_rel(ctx, rel, &extensions).await?)
                     },
                     plan_rel::RelType::Root(root) => {
-                        let plan = from_substrait_rel(ctx, root.input.as_ref().unwrap(), &function_extension).await?;
+                        let plan = from_substrait_rel(ctx, root.input.as_ref().unwrap(), &extensions).await?;
                         if root.names.is_empty() {
                             // Backwards compatibility for plans missing names
                             return Ok(plan);
@@ -396,7 +393,7 @@ fn make_renamed_schema(
 pub async fn from_substrait_rel(
     ctx: &SessionContext,
     rel: &Rel,
-    extensions: &HashMap<u32, &String>,
+    extensions: &Extensions,
 ) -> Result<LogicalPlan> {
     match &rel.rel_type {
         Some(RelType::Project(p)) => {
@@ -660,7 +657,7 @@ pub async fn from_substrait_rel(
                     substrait_datafusion_err!("No base schema provided for Virtual Table")
                 })?;
 
-                let schema = from_substrait_named_struct(base_schema)?;
+                let schema = from_substrait_named_struct(base_schema, extensions)?;
 
                 if vt.values.is_empty() {
                     return Ok(LogicalPlan::EmptyRelation(EmptyRelation {
@@ -681,6 +678,7 @@ pub async fn from_substrait_rel(
                                 name_idx += 1; // top-level names are provided through schema
                                 Ok(Expr::Literal(from_substrait_literal(
                                     lit,
+                                    extensions,
                                     &base_schema.names,
                                     &mut name_idx,
                                 )?))
@@ -892,7 +890,7 @@ pub async fn from_substrait_sorts(
     ctx: &SessionContext,
     substrait_sorts: &Vec<SortField>,
     input_schema: &DFSchema,
-    extensions: &HashMap<u32, &String>,
+    extensions: &Extensions,
 ) -> Result<Vec<Expr>> {
     let mut sorts: Vec<Expr> = vec![];
     for s in substrait_sorts {
@@ -942,7 +940,7 @@ pub async fn from_substrait_rex_vec(
     ctx: &SessionContext,
     exprs: &Vec<Expression>,
     input_schema: &DFSchema,
-    extensions: &HashMap<u32, &String>,
+    extensions: &Extensions,
 ) -> Result<Vec<Expr>> {
     let mut expressions: Vec<Expr> = vec![];
     for expr in exprs {
@@ -957,7 +955,7 @@ pub async fn from_substrait_func_args(
     ctx: &SessionContext,
     arguments: &Vec<FunctionArgument>,
     input_schema: &DFSchema,
-    extensions: &HashMap<u32, &String>,
+    extensions: &Extensions,
 ) -> Result<Vec<Expr>> {
     let mut args: Vec<Expr> = vec![];
     for arg in arguments {
@@ -977,7 +975,7 @@ pub async fn from_substrait_agg_func(
     ctx: &SessionContext,
     f: &AggregateFunction,
     input_schema: &DFSchema,
-    extensions: &HashMap<u32, &String>,
+    extensions: &Extensions,
     filter: Option<Box<Expr>>,
     order_by: Option<Vec<Expr>>,
     distinct: bool,
@@ -985,14 +983,14 @@ pub async fn from_substrait_agg_func(
     let args =
         from_substrait_func_args(ctx, &f.arguments, input_schema, extensions).await?;
 
-    let Some(function_name) = extensions.get(&f.function_reference) else {
+    let Some(function_name) = extensions.functions.get(&f.function_reference) else {
         return plan_err!(
             "Aggregate function not registered: function anchor = {:?}",
             f.function_reference
         );
     };
 
-    let function_name = substrait_fun_name((**function_name).as_str());
+    let function_name = substrait_fun_name(function_name);
     // try udaf first, then built-in aggr fn.
     if let Ok(fun) = ctx.udaf(function_name) {
         // deal with situation that count(*) got no arguments
@@ -1025,7 +1023,7 @@ pub async fn from_substrait_rex(
     ctx: &SessionContext,
     e: &Expression,
     input_schema: &DFSchema,
-    extensions: &HashMap<u32, &String>,
+    extensions: &Extensions,
 ) -> Result<Expr> {
     match &e.rex_type {
         Some(RexType::SingularOrList(s)) => {
@@ -1105,7 +1103,7 @@ pub async fn from_substrait_rex(
             }))
         }
         Some(RexType::ScalarFunction(f)) => {
-            let Some(fn_name) = extensions.get(&f.function_reference) else {
+            let Some(fn_name) = extensions.functions.get(&f.function_reference) else {
                 return plan_err!(
                     "Scalar function not found: function reference = {:?}",
                     f.function_reference
@@ -1155,7 +1153,7 @@ pub async fn from_substrait_rex(
             }
         }
         Some(RexType::Literal(lit)) => {
-            let scalar_value = from_substrait_literal_without_names(lit)?;
+            let scalar_value = from_substrait_literal_without_names(lit, extensions)?;
             Ok(Expr::Literal(scalar_value))
         }
         Some(RexType::Cast(cast)) => match cast.as_ref().r#type.as_ref() {
@@ -1169,12 +1167,13 @@ pub async fn from_substrait_rex(
                     )
                     .await?,
                 ),
-                from_substrait_type_without_names(output_type)?,
+                from_substrait_type_without_names(output_type, extensions)?,
             ))),
             None => substrait_err!("Cast expression without output type is not allowed"),
         },
         Some(RexType::WindowFunction(window)) => {
-            let Some(fn_name) = extensions.get(&window.function_reference) else {
+            let Some(fn_name) = extensions.functions.get(&window.function_reference)
+            else {
                 return plan_err!(
                     "Window function not found: function reference = {:?}",
                     window.function_reference
@@ -1328,12 +1327,16 @@ pub async fn from_substrait_rex(
     }
 }
 
-pub(crate) fn from_substrait_type_without_names(dt: &Type) -> Result<DataType> {
-    from_substrait_type(dt, &[], &mut 0)
+pub(crate) fn from_substrait_type_without_names(
+    dt: &Type,
+    extensions: &Extensions,
+) -> Result<DataType> {
+    from_substrait_type(dt, extensions, &[], &mut 0)
 }
 
 fn from_substrait_type(
     dt: &Type,
+    extensions: &Extensions,
     dfs_names: &[String],
     name_idx: &mut usize,
 ) -> Result<DataType> {
@@ -1416,7 +1419,7 @@ fn from_substrait_type(
                     substrait_datafusion_err!("List type must have inner type")
                 })?;
                 let field = Arc::new(Field::new_list_field(
-                    from_substrait_type(inner_type, dfs_names, name_idx)?,
+                    from_substrait_type(inner_type, extensions, dfs_names, name_idx)?,
                     // We ignore Substrait's nullability here to match to_substrait_literal
                     // which always creates nullable lists
                     true,
@@ -1438,12 +1441,12 @@ fn from_substrait_type(
                 })?;
                 let key_field = Arc::new(Field::new(
                     "key",
-                    from_substrait_type(key_type, dfs_names, name_idx)?,
+                    from_substrait_type(key_type, extensions, dfs_names, name_idx)?,
                     false,
                 ));
                 let value_field = Arc::new(Field::new(
                     "value",
-                    from_substrait_type(value_type, dfs_names, name_idx)?,
+                    from_substrait_type(value_type, extensions, dfs_names, name_idx)?,
                     true,
                 ));
                 match map.type_variation_reference {
@@ -1490,28 +1493,41 @@ fn from_substrait_type(
                 ),
             },
             r#type::Kind::UserDefined(u) => {
-                match u.type_reference {
-                    // Kept for backwards compatibility, use IntervalYear instead
-                    INTERVAL_YEAR_MONTH_TYPE_REF => {
-                        Ok(DataType::Interval(IntervalUnit::YearMonth))
-                    }
-                    // Kept for backwards compatibility, use IntervalDay instead
-                    INTERVAL_DAY_TIME_TYPE_REF => {
-                        Ok(DataType::Interval(IntervalUnit::DayTime))
+                if let Some(name) = extensions.types.get(&u.type_reference) {
+                    match name.as_ref() {
+                        INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
+                            _ => not_impl_err!(
+                                "Unsupported Substrait user defined type with ref {} and variation {}",
+                                u.type_reference,
+                                u.type_variation_reference
+                            ),
                     }
-                    // Not supported yet by Substrait
-                    INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
-                        Ok(DataType::Interval(IntervalUnit::MonthDayNano))
-                    }
-                    _ => not_impl_err!(
+                } else {
+                    // Kept for backwards compatibility, new plans should include the extension instead
+                    #[allow(deprecated)]
+                    match u.type_reference {
+                        // Kept for backwards compatibility, use IntervalYear instead
+                        INTERVAL_YEAR_MONTH_TYPE_REF => {
+                            Ok(DataType::Interval(IntervalUnit::YearMonth))
+                        }
+                        // Kept for backwards compatibility, use IntervalDay instead
+                        INTERVAL_DAY_TIME_TYPE_REF => {
+                            Ok(DataType::Interval(IntervalUnit::DayTime))
+                        }
+                        // Not supported yet by Substrait
+                        INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
+                            Ok(DataType::Interval(IntervalUnit::MonthDayNano))
+                        }
+                        _ => not_impl_err!(
                         "Unsupported Substrait user defined type with ref {} and variation {}",
                         u.type_reference,
                         u.type_variation_reference
                     ),
+                    }
                 }
             }
             r#type::Kind::Struct(s) => Ok(DataType::Struct(from_substrait_struct_type(
-                s, dfs_names, name_idx,
+                s, extensions, dfs_names, name_idx,
             )?)),
             r#type::Kind::Varchar(_) => Ok(DataType::Utf8),
             r#type::Kind::FixedChar(_) => Ok(DataType::Utf8),
@@ -1523,6 +1539,7 @@ fn from_substrait_type(
 
 fn from_substrait_struct_type(
     s: &r#type::Struct,
+    extensions: &Extensions,
     dfs_names: &[String],
     name_idx: &mut usize,
 ) -> Result<Fields> {
@@ -1530,7 +1547,7 @@ fn from_substrait_struct_type(
     for (i, f) in s.types.iter().enumerate() {
         let field = Field::new(
             next_struct_field_name(i, dfs_names, name_idx)?,
-            from_substrait_type(f, dfs_names, name_idx)?,
+            from_substrait_type(f, extensions, dfs_names, name_idx)?,
             true, // We assume everything to be nullable since that's easier than ensuring it matches
         );
         fields.push(field);
@@ -1556,12 +1573,16 @@ fn next_struct_field_name(
     }
 }
 
-fn from_substrait_named_struct(base_schema: &NamedStruct) -> Result<DFSchemaRef> {
+fn from_substrait_named_struct(
+    base_schema: &NamedStruct,
+    extensions: &Extensions,
+) -> Result<DFSchemaRef> {
     let mut name_idx = 0;
     let fields = from_substrait_struct_type(
         base_schema.r#struct.as_ref().ok_or_else(|| {
             substrait_datafusion_err!("Named struct must contain a struct")
         })?,
+        extensions,
         &base_schema.names,
         &mut name_idx,
     );
@@ -1621,12 +1642,16 @@ fn from_substrait_bound(
     }
 }
 
-pub(crate) fn from_substrait_literal_without_names(lit: &Literal) -> Result<ScalarValue> {
-    from_substrait_literal(lit, &vec![], &mut 0)
+pub(crate) fn from_substrait_literal_without_names(
+    lit: &Literal,
+    extensions: &Extensions,
+) -> Result<ScalarValue> {
+    from_substrait_literal(lit, extensions, &vec![], &mut 0)
 }
 
 fn from_substrait_literal(
     lit: &Literal,
+    extensions: &Extensions,
     dfs_names: &Vec<String>,
     name_idx: &mut usize,
 ) -> Result<ScalarValue> {
@@ -1721,7 +1746,7 @@ fn from_substrait_literal(
             let elements = l
                 .values
                 .iter()
-                .map(|el| from_substrait_literal(el, dfs_names, name_idx))
+                .map(|el| from_substrait_literal(el, extensions, dfs_names, name_idx))
                 .collect::<Result<Vec<_>>>()?;
             if elements.is_empty() {
                 return substrait_err!(
@@ -1744,6 +1769,7 @@ fn from_substrait_literal(
         Some(LiteralType::EmptyList(l)) => {
             let element_type = from_substrait_type(
                 l.r#type.clone().unwrap().as_ref(),
+                extensions,
                 dfs_names,
                 name_idx,
             )?;
@@ -1763,7 +1789,7 @@ fn from_substrait_literal(
             let mut builder = ScalarStructBuilder::new();
             for (i, field) in s.fields.iter().enumerate() {
                 let name = next_struct_field_name(i, dfs_names, name_idx)?;
-                let sv = from_substrait_literal(field, dfs_names, name_idx)?;
+                let sv = from_substrait_literal(field, extensions, dfs_names, name_idx)?;
                 // We assume everything to be nullable, since Arrow's strict about things matching
                 // and it's hard to match otherwise.
                 builder = builder.with_scalar(Field::new(name, sv.data_type(), true), sv);
@@ -1771,7 +1797,7 @@ fn from_substrait_literal(
             builder.build()?
         }
         Some(LiteralType::Null(ntype)) => {
-            from_substrait_null(ntype, dfs_names, name_idx)?
+            from_substrait_null(ntype, extensions, dfs_names, name_idx)?
         }
         Some(LiteralType::IntervalDayToSecond(IntervalDayToSecond {
             days,
@@ -1786,40 +1812,9 @@ fn from_substrait_literal(
         }
         Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
         Some(LiteralType::UserDefined(user_defined)) => {
-            match user_defined.type_reference {
-                // Kept for backwards compatibility, use IntervalYearToMonth instead
-                INTERVAL_YEAR_MONTH_TYPE_REF => {
-                    let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
-                        return substrait_err!("Interval year month value is empty");
-                    };
-                    let value_slice: [u8; 4] =
-                        (*raw_val.value).try_into().map_err(|_| {
-                            substrait_datafusion_err!(
-                                "Failed to parse interval year month value"
-                            )
-                        })?;
-                    ScalarValue::IntervalYearMonth(Some(i32::from_le_bytes(value_slice)))
-                }
-                // Kept for backwards compatibility, use IntervalDayToSecond instead
-                INTERVAL_DAY_TIME_TYPE_REF => {
-                    let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
-                        return substrait_err!("Interval day time value is empty");
-                    };
-                    let value_slice: [u8; 8] =
-                        (*raw_val.value).try_into().map_err(|_| {
-                            substrait_datafusion_err!(
-                                "Failed to parse interval day time value"
-                            )
-                        })?;
-                    let days = i32::from_le_bytes(value_slice[0..4].try_into().unwrap());
-                    let milliseconds =
-                        i32::from_le_bytes(value_slice[4..8].try_into().unwrap());
-                    ScalarValue::IntervalDayTime(Some(IntervalDayTime {
-                        days,
-                        milliseconds,
-                    }))
-                }
-                INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
+            // Helper function to prevent duplicating this code - can be inlined once the non-extension path is removed
+            let interval_month_day_nano =
+                |user_defined: &UserDefined| -> Result<ScalarValue> {
                     let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
                         return substrait_err!("Interval month day nano value is empty");
                     };
@@ -1834,17 +1829,76 @@ fn from_substrait_literal(
                     let days = i32::from_le_bytes(value_slice[4..8].try_into().unwrap());
                     let nanoseconds =
                         i64::from_le_bytes(value_slice[8..16].try_into().unwrap());
-                    ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
-                        months,
-                        days,
-                        nanoseconds,
-                    }))
-                }
-                _ => {
-                    return not_impl_err!(
-                        "Unsupported Substrait user defined type with ref {}",
-                        user_defined.type_reference
+                    Ok(ScalarValue::IntervalMonthDayNano(Some(
+                        IntervalMonthDayNano {
+                            months,
+                            days,
+                            nanoseconds,
+                        },
+                    )))
+                };
+
+            if let Some(name) = extensions.types.get(&user_defined.type_reference) {
+                match name.as_ref() {
+                    INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
+                        interval_month_day_nano(user_defined)?
+                    }
+                    _ => {
+                        return not_impl_err!(
+                        "Unsupported Substrait user defined type with ref {} and name {}",
+                        user_defined.type_reference,
+                        name
                     )
+                    }
+                }
+            } else {
+                // Kept for backwards compatibility - new plans should include extension instead
+                #[allow(deprecated)]
+                match user_defined.type_reference {
+                    // Kept for backwards compatibility, use IntervalYearToMonth instead
+                    INTERVAL_YEAR_MONTH_TYPE_REF => {
+                        let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
+                            return substrait_err!("Interval year month value is empty");
+                        };
+                        let value_slice: [u8; 4] =
+                            (*raw_val.value).try_into().map_err(|_| {
+                                substrait_datafusion_err!(
+                                    "Failed to parse interval year month value"
+                                )
+                            })?;
+                        ScalarValue::IntervalYearMonth(Some(i32::from_le_bytes(
+                            value_slice,
+                        )))
+                    }
+                    // Kept for backwards compatibility, use IntervalDayToSecond instead
+                    INTERVAL_DAY_TIME_TYPE_REF => {
+                        let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
+                            return substrait_err!("Interval day time value is empty");
+                        };
+                        let value_slice: [u8; 8] =
+                            (*raw_val.value).try_into().map_err(|_| {
+                                substrait_datafusion_err!(
+                                    "Failed to parse interval day time value"
+                                )
+                            })?;
+                        let days =
+                            i32::from_le_bytes(value_slice[0..4].try_into().unwrap());
+                        let milliseconds =
+                            i32::from_le_bytes(value_slice[4..8].try_into().unwrap());
+                        ScalarValue::IntervalDayTime(Some(IntervalDayTime {
+                            days,
+                            milliseconds,
+                        }))
+                    }
+                    INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
+                        interval_month_day_nano(user_defined)?
+                    }
+                    _ => {
+                        return not_impl_err!(
+                            "Unsupported Substrait user defined type literal with ref {}",
+                            user_defined.type_reference
+                        )
+                    }
                 }
             }
         }
@@ -1856,6 +1910,7 @@ fn from_substrait_literal(
 
 fn from_substrait_null(
     null_type: &Type,
+    extensions: &Extensions,
     dfs_names: &[String],
     name_idx: &mut usize,
 ) -> Result<ScalarValue> {
@@ -1940,6 +1995,7 @@ fn from_substrait_null(
                 let field = Field::new_list_field(
                     from_substrait_type(
                         l.r#type.clone().unwrap().as_ref(),
+                        extensions,
                         dfs_names,
                         name_idx,
                     )?,
@@ -1958,7 +2014,8 @@ fn from_substrait_null(
                 }
             }
             r#type::Kind::Struct(s) => {
-                let fields = from_substrait_struct_type(s, dfs_names, name_idx)?;
+                let fields =
+                    from_substrait_struct_type(s, extensions, dfs_names, name_idx)?;
                 Ok(ScalarStructBuilder::new_null(fields))
             }
             _ => not_impl_err!("Unsupported Substrait type for null: {kind:?}"),
@@ -2012,7 +2069,7 @@ impl BuiltinExprBuilder {
         ctx: &SessionContext,
         f: &ScalarFunction,
         input_schema: &DFSchema,
-        extensions: &HashMap<u32, &String>,
+        extensions: &Extensions,
     ) -> Result<Expr> {
         match self.expr_name.as_str() {
             "like" => {
@@ -2037,7 +2094,7 @@ impl BuiltinExprBuilder {
         fn_name: &str,
         f: &ScalarFunction,
         input_schema: &DFSchema,
-        extensions: &HashMap<u32, &String>,
+        extensions: &Extensions,
     ) -> Result<Expr> {
         if f.arguments.len() != 1 {
             return substrait_err!("Expect one argument for {fn_name} expr");
@@ -2071,7 +2128,7 @@ impl BuiltinExprBuilder {
         case_insensitive: bool,
         f: &ScalarFunction,
         input_schema: &DFSchema,
-        extensions: &HashMap<u32, &String>,
+        extensions: &Extensions,
     ) -> Result<Expr> {
         let fn_name = if case_insensitive { "ILIKE" } else { "LIKE" };
         if f.arguments.len() != 2 && f.arguments.len() != 3 {
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index 0fd59d5280862..8f69cc5e218f6 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 use itertools::Itertools;
-use std::collections::HashMap;
 use std::ops::Deref;
 use std::sync::Arc;
 
@@ -33,6 +32,16 @@ use datafusion::{
     scalar::ScalarValue,
 };
 
+use crate::extensions::Extensions;
+use crate::variation_const::{
+    DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
+    DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
+    DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
+    INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
+    TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
+    TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
+    UNSIGNED_INTEGER_TYPE_VARIATION_REF,
+};
 use datafusion::arrow::array::{Array, GenericListArray, OffsetSizeTrait};
 use datafusion::common::{
     exec_err, internal_err, not_impl_err, plan_err, substrait_datafusion_err,
@@ -72,10 +81,6 @@ use substrait::{
             ScalarFunction, SingularOrList, Subquery,
             WindowFunction as SubstraitWindowFunction,
         },
-        extensions::{
-            self,
-            simple_extension_declaration::{ExtensionFunction, MappingType},
-        },
         function_argument::ArgType,
         join_rel, plan_rel, r#type,
         read_rel::{NamedTable, ReadType},
@@ -90,39 +95,24 @@ use substrait::{
     version,
 };
 
-use crate::variation_const::{
-    DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
-    DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
-    DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
-    INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_URL,
-    LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
-    TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
-    TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
-};
-
 /// Convert DataFusion LogicalPlan to Substrait Plan
 pub fn to_substrait_plan(plan: &LogicalPlan, ctx: &SessionContext) -> Result<Box<Plan>> {
+    let mut extensions = Extensions::default();
     // Parse relation nodes
-    let mut extension_info: (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ) = (vec![], HashMap::new());
     // Generate PlanRel(s)
     // Note: Only 1 relation tree is currently supported
     let plan_rels = vec![PlanRel {
         rel_type: Some(plan_rel::RelType::Root(RelRoot {
-            input: Some(*to_substrait_rel(plan, ctx, &mut extension_info)?),
-            names: to_substrait_named_struct(plan.schema())?.names,
+            input: Some(*to_substrait_rel(plan, ctx, &mut extensions)?),
+            names: to_substrait_named_struct(plan.schema(), &mut extensions)?.names,
         })),
     }];
 
-    let (function_extensions, _) = extension_info;
-
     // Return parsed plan
     Ok(Box::new(Plan {
         version: Some(version::version_with_producer("datafusion")),
         extension_uris: vec![],
-        extensions: function_extensions,
+        extensions: extensions.into(),
         relations: plan_rels,
         advanced_extensions: None,
         expected_type_urls: vec![],
@@ -133,10 +123,7 @@ pub fn to_substrait_plan(plan: &LogicalPlan, ctx: &SessionContext) -> Result<Box
 pub fn to_substrait_rel(
     plan: &LogicalPlan,
     ctx: &SessionContext,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Result<Box<Rel>> {
     match plan {
         LogicalPlan::TableScan(scan) => {
@@ -187,7 +174,7 @@ pub fn to_substrait_rel(
             Ok(Box::new(Rel {
                 rel_type: Some(RelType::Read(Box::new(ReadRel {
                     common: None,
-                    base_schema: Some(to_substrait_named_struct(&e.schema)?),
+                    base_schema: Some(to_substrait_named_struct(&e.schema, extensions)?),
                     filter: None,
                     best_effort_filter: None,
                     projection: None,
@@ -206,10 +193,10 @@ pub fn to_substrait_rel(
                     let fields = row
                         .iter()
                         .map(|v| match v {
-                            Expr::Literal(sv) => to_substrait_literal(sv),
+                            Expr::Literal(sv) => to_substrait_literal(sv, extensions),
                             Expr::Alias(alias) => match alias.expr.as_ref() {
                                 // The schema gives us the names, so we can skip aliases
-                                Expr::Literal(sv) => to_substrait_literal(sv),
+                                Expr::Literal(sv) => to_substrait_literal(sv, extensions),
                                 _ => Err(substrait_datafusion_err!(
                                     "Only literal types can be aliased in Virtual Tables, got: {}", alias.expr.variant_name()
                                 )),
@@ -225,7 +212,7 @@ pub fn to_substrait_rel(
             Ok(Box::new(Rel {
                 rel_type: Some(RelType::Read(Box::new(ReadRel {
                     common: None,
-                    base_schema: Some(to_substrait_named_struct(&v.schema)?),
+                    base_schema: Some(to_substrait_named_struct(&v.schema, extensions)?),
                     filter: None,
                     best_effort_filter: None,
                     projection: None,
@@ -238,25 +225,25 @@ pub fn to_substrait_rel(
             let expressions = p
                 .expr
                 .iter()
-                .map(|e| to_substrait_rex(ctx, e, p.input.schema(), 0, extension_info))
+                .map(|e| to_substrait_rex(ctx, e, p.input.schema(), 0, extensions))
                 .collect::<Result<Vec<_>>>()?;
             Ok(Box::new(Rel {
                 rel_type: Some(RelType::Project(Box::new(ProjectRel {
                     common: None,
-                    input: Some(to_substrait_rel(p.input.as_ref(), ctx, extension_info)?),
+                    input: Some(to_substrait_rel(p.input.as_ref(), ctx, extensions)?),
                     expressions,
                     advanced_extension: None,
                 }))),
             }))
         }
         LogicalPlan::Filter(filter) => {
-            let input = to_substrait_rel(filter.input.as_ref(), ctx, extension_info)?;
+            let input = to_substrait_rel(filter.input.as_ref(), ctx, extensions)?;
             let filter_expr = to_substrait_rex(
                 ctx,
                 &filter.predicate,
                 filter.input.schema(),
                 0,
-                extension_info,
+                extensions,
             )?;
             Ok(Box::new(Rel {
                 rel_type: Some(RelType::Filter(Box::new(FilterRel {
@@ -268,7 +255,7 @@ pub fn to_substrait_rel(
             }))
         }
         LogicalPlan::Limit(limit) => {
-            let input = to_substrait_rel(limit.input.as_ref(), ctx, extension_info)?;
+            let input = to_substrait_rel(limit.input.as_ref(), ctx, extensions)?;
             // Since protobuf can't directly distinguish `None` vs `0` encode `None` as `MAX`
             let limit_fetch = limit.fetch.unwrap_or(usize::MAX);
             Ok(Box::new(Rel {
@@ -282,13 +269,11 @@ pub fn to_substrait_rel(
             }))
         }
         LogicalPlan::Sort(sort) => {
-            let input = to_substrait_rel(sort.input.as_ref(), ctx, extension_info)?;
+            let input = to_substrait_rel(sort.input.as_ref(), ctx, extensions)?;
             let sort_fields = sort
                 .expr
                 .iter()
-                .map(|e| {
-                    substrait_sort_field(ctx, e, sort.input.schema(), extension_info)
-                })
+                .map(|e| substrait_sort_field(ctx, e, sort.input.schema(), extensions))
                 .collect::<Result<Vec<_>>>()?;
             Ok(Box::new(Rel {
                 rel_type: Some(RelType::Sort(Box::new(SortRel {
@@ -300,19 +285,17 @@ pub fn to_substrait_rel(
             }))
         }
         LogicalPlan::Aggregate(agg) => {
-            let input = to_substrait_rel(agg.input.as_ref(), ctx, extension_info)?;
+            let input = to_substrait_rel(agg.input.as_ref(), ctx, extensions)?;
             let groupings = to_substrait_groupings(
                 ctx,
                 &agg.group_expr,
                 agg.input.schema(),
-                extension_info,
+                extensions,
             )?;
             let measures = agg
                 .aggr_expr
                 .iter()
-                .map(|e| {
-                    to_substrait_agg_measure(ctx, e, agg.input.schema(), extension_info)
-                })
+                .map(|e| to_substrait_agg_measure(ctx, e, agg.input.schema(), extensions))
                 .collect::<Result<Vec<_>>>()?;
 
             Ok(Box::new(Rel {
@@ -327,7 +310,7 @@ pub fn to_substrait_rel(
         }
         LogicalPlan::Distinct(Distinct::All(plan)) => {
             // Use Substrait's AggregateRel with empty measures to represent `select distinct`
-            let input = to_substrait_rel(plan.as_ref(), ctx, extension_info)?;
+            let input = to_substrait_rel(plan.as_ref(), ctx, extensions)?;
             // Get grouping keys from the input relation's number of output fields
             let grouping = (0..plan.schema().fields().len())
                 .map(substrait_field_ref)
@@ -346,8 +329,8 @@ pub fn to_substrait_rel(
             }))
         }
         LogicalPlan::Join(join) => {
-            let left = to_substrait_rel(join.left.as_ref(), ctx, extension_info)?;
-            let right = to_substrait_rel(join.right.as_ref(), ctx, extension_info)?;
+            let left = to_substrait_rel(join.left.as_ref(), ctx, extensions)?;
+            let right = to_substrait_rel(join.right.as_ref(), ctx, extensions)?;
             let join_type = to_substrait_jointype(join.join_type);
             // we only support basic joins so return an error for anything not yet supported
             match join.join_constraint {
@@ -364,7 +347,7 @@ pub fn to_substrait_rel(
                     filter,
                     &Arc::new(in_join_schema),
                     0,
-                    extension_info,
+                    extensions,
                 )?),
                 None => None,
             };
@@ -382,7 +365,7 @@ pub fn to_substrait_rel(
                 eq_op,
                 join.left.schema(),
                 join.right.schema(),
-                extension_info,
+                extensions,
             )?;
 
             // create conjunction between `join_on` and `join_filter` to embed all join conditions,
@@ -393,7 +376,7 @@ pub fn to_substrait_rel(
                         on_expr,
                         filter,
                         Operator::And,
-                        extension_info,
+                        extensions,
                     ))),
                     None => join_on.map(Box::new), // the join expression will only contain `join_on` if filter doesn't exist
                 },
@@ -421,8 +404,8 @@ pub fn to_substrait_rel(
                 right,
                 schema: _,
             } = cross_join;
-            let left = to_substrait_rel(left.as_ref(), ctx, extension_info)?;
-            let right = to_substrait_rel(right.as_ref(), ctx, extension_info)?;
+            let left = to_substrait_rel(left.as_ref(), ctx, extensions)?;
+            let right = to_substrait_rel(right.as_ref(), ctx, extensions)?;
             Ok(Box::new(Rel {
                 rel_type: Some(RelType::Cross(Box::new(CrossRel {
                     common: None,
@@ -435,13 +418,13 @@ pub fn to_substrait_rel(
         LogicalPlan::SubqueryAlias(alias) => {
             // Do nothing if encounters SubqueryAlias
             // since there is no corresponding relation type in Substrait
-            to_substrait_rel(alias.input.as_ref(), ctx, extension_info)
+            to_substrait_rel(alias.input.as_ref(), ctx, extensions)
         }
         LogicalPlan::Union(union) => {
             let input_rels = union
                 .inputs
                 .iter()
-                .map(|input| to_substrait_rel(input.as_ref(), ctx, extension_info))
+                .map(|input| to_substrait_rel(input.as_ref(), ctx, extensions))
                 .collect::<Result<Vec<_>>>()?
                 .into_iter()
                 .map(|ptr| *ptr)
@@ -456,7 +439,7 @@ pub fn to_substrait_rel(
             }))
         }
         LogicalPlan::Window(window) => {
-            let input = to_substrait_rel(window.input.as_ref(), ctx, extension_info)?;
+            let input = to_substrait_rel(window.input.as_ref(), ctx, extensions)?;
             // If the input is a Project relation, we can just append the WindowFunction expressions
             // before returning
             // Otherwise, wrap the input in a Project relation before appending the WindowFunction
@@ -484,7 +467,7 @@ pub fn to_substrait_rel(
                     expr,
                     window.input.schema(),
                     0,
-                    extension_info,
+                    extensions,
                 )?);
             }
             // Append parsed WindowFunction expressions
@@ -494,8 +477,7 @@ pub fn to_substrait_rel(
             }))
         }
         LogicalPlan::Repartition(repartition) => {
-            let input =
-                to_substrait_rel(repartition.input.as_ref(), ctx, extension_info)?;
+            let input = to_substrait_rel(repartition.input.as_ref(), ctx, extensions)?;
             let partition_count = match repartition.partitioning_scheme {
                 Partitioning::RoundRobinBatch(num) => num,
                 Partitioning::Hash(_, num) => num,
@@ -553,7 +535,7 @@ pub fn to_substrait_rel(
                 .node
                 .inputs()
                 .into_iter()
-                .map(|plan| to_substrait_rel(plan, ctx, extension_info))
+                .map(|plan| to_substrait_rel(plan, ctx, extensions))
                 .collect::<Result<Vec<_>>>()?;
             let rel_type = match inputs_rel.len() {
                 0 => RelType::ExtensionLeaf(ExtensionLeafRel {
@@ -579,7 +561,10 @@ pub fn to_substrait_rel(
     }
 }
 
-fn to_substrait_named_struct(schema: &DFSchemaRef) -> Result<NamedStruct> {
+fn to_substrait_named_struct(
+    schema: &DFSchemaRef,
+    extensions: &mut Extensions,
+) -> Result<NamedStruct> {
     // Substrait wants a list of all field names, including nested fields from structs,
     // also from within e.g. lists and maps. However, it does not want the list and map field names
     // themselves - only proper structs fields are considered to have useful names.
@@ -624,7 +609,7 @@ fn to_substrait_named_struct(schema: &DFSchemaRef) -> Result<NamedStruct> {
         types: schema
             .fields()
             .iter()
-            .map(|f| to_substrait_type(f.data_type(), f.is_nullable()))
+            .map(|f| to_substrait_type(f.data_type(), f.is_nullable(), extensions))
             .collect::<Result<_>>()?,
         type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
         nullability: r#type::Nullability::Unspecified as i32,
@@ -642,30 +627,27 @@ fn to_substrait_join_expr(
     eq_op: Operator,
     left_schema: &DFSchemaRef,
     right_schema: &DFSchemaRef,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Result<Option<Expression>> {
     // Only support AND conjunction for each binary expression in join conditions
     let mut exprs: Vec<Expression> = vec![];
     for (left, right) in join_conditions {
         // Parse left
-        let l = to_substrait_rex(ctx, left, left_schema, 0, extension_info)?;
+        let l = to_substrait_rex(ctx, left, left_schema, 0, extensions)?;
         // Parse right
         let r = to_substrait_rex(
             ctx,
             right,
             right_schema,
             left_schema.fields().len(), // offset to return the correct index
-            extension_info,
+            extensions,
         )?;
         // AND with existing expression
-        exprs.push(make_binary_op_scalar_func(&l, &r, eq_op, extension_info));
+        exprs.push(make_binary_op_scalar_func(&l, &r, eq_op, extensions));
     }
     let join_expr: Option<Expression> =
         exprs.into_iter().reduce(|acc: Expression, e: Expression| {
-            make_binary_op_scalar_func(&acc, &e, Operator::And, extension_info)
+            make_binary_op_scalar_func(&acc, &e, Operator::And, extensions)
         });
     Ok(join_expr)
 }
@@ -722,14 +704,11 @@ pub fn parse_flat_grouping_exprs(
     ctx: &SessionContext,
     exprs: &[Expr],
     schema: &DFSchemaRef,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Result<Grouping> {
     let grouping_expressions = exprs
         .iter()
-        .map(|e| to_substrait_rex(ctx, e, schema, 0, extension_info))
+        .map(|e| to_substrait_rex(ctx, e, schema, 0, extensions))
         .collect::<Result<Vec<_>>>()?;
     Ok(Grouping {
         grouping_expressions,
@@ -740,10 +719,7 @@ pub fn to_substrait_groupings(
     ctx: &SessionContext,
     exprs: &[Expr],
     schema: &DFSchemaRef,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Result<Vec<Grouping>> {
     match exprs.len() {
         1 => match &exprs[0] {
@@ -753,9 +729,7 @@ pub fn to_substrait_groupings(
                 )),
                 GroupingSet::GroupingSets(sets) => Ok(sets
                     .iter()
-                    .map(|set| {
-                        parse_flat_grouping_exprs(ctx, set, schema, extension_info)
-                    })
+                    .map(|set| parse_flat_grouping_exprs(ctx, set, schema, extensions))
                     .collect::<Result<Vec<_>>>()?),
                 GroupingSet::Rollup(set) => {
                     let mut sets: Vec<Vec<Expr>> = vec![vec![]];
@@ -766,23 +740,17 @@ pub fn to_substrait_groupings(
                         .iter()
                         .rev()
                         .map(|set| {
-                            parse_flat_grouping_exprs(ctx, set, schema, extension_info)
+                            parse_flat_grouping_exprs(ctx, set, schema, extensions)
                         })
                         .collect::<Result<Vec<_>>>()?)
                 }
             },
             _ => Ok(vec![parse_flat_grouping_exprs(
-                ctx,
-                exprs,
-                schema,
-                extension_info,
+                ctx, exprs, schema, extensions,
             )?]),
         },
         _ => Ok(vec![parse_flat_grouping_exprs(
-            ctx,
-            exprs,
-            schema,
-            extension_info,
+            ctx, exprs, schema, extensions,
         )?]),
     }
 }
@@ -792,25 +760,22 @@ pub fn to_substrait_agg_measure(
     ctx: &SessionContext,
     expr: &Expr,
     schema: &DFSchemaRef,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Result<Measure> {
     match expr {
         Expr::AggregateFunction(expr::AggregateFunction { func_def, args, distinct, filter, order_by, null_treatment: _, }) => {
             match func_def {
                 AggregateFunctionDefinition::BuiltIn (fun) => {
                     let sorts = if let Some(order_by) = order_by {
-                        order_by.iter().map(|expr| to_substrait_sort_field(ctx, expr, schema, extension_info)).collect::<Result<Vec<_>>>()?
+                        order_by.iter().map(|expr| to_substrait_sort_field(ctx, expr, schema, extensions)).collect::<Result<Vec<_>>>()?
                     } else {
                         vec![]
                     };
                     let mut arguments: Vec<FunctionArgument> = vec![];
                     for arg in args {
-                        arguments.push(FunctionArgument { arg_type: Some(ArgType::Value(to_substrait_rex(ctx, arg, schema, 0, extension_info)?)) });
+                        arguments.push(FunctionArgument { arg_type: Some(ArgType::Value(to_substrait_rex(ctx, arg, schema, 0, extensions)?)) });
                     }
-                    let function_anchor = register_function(fun.to_string(), extension_info);
+                    let function_anchor = extensions.register_function(fun.to_string());
                     Ok(Measure {
                         measure: Some(AggregateFunction {
                             function_reference: function_anchor,
@@ -826,22 +791,22 @@ pub fn to_substrait_agg_measure(
                             options: vec![],
                         }),
                         filter: match filter {
-                            Some(f) => Some(to_substrait_rex(ctx, f, schema, 0, extension_info)?),
+                            Some(f) => Some(to_substrait_rex(ctx, f, schema, 0, extensions)?),
                             None => None
                         }
                     })
                 }
                 AggregateFunctionDefinition::UDF(fun) => {
                     let sorts = if let Some(order_by) = order_by {
-                        order_by.iter().map(|expr| to_substrait_sort_field(ctx, expr, schema, extension_info)).collect::<Result<Vec<_>>>()?
+                        order_by.iter().map(|expr| to_substrait_sort_field(ctx, expr, schema, extensions)).collect::<Result<Vec<_>>>()?
                     } else {
                         vec![]
                     };
                     let mut arguments: Vec<FunctionArgument> = vec![];
                     for arg in args {
-                        arguments.push(FunctionArgument { arg_type: Some(ArgType::Value(to_substrait_rex(ctx, arg, schema, 0, extension_info)?)) });
+                        arguments.push(FunctionArgument { arg_type: Some(ArgType::Value(to_substrait_rex(ctx, arg, schema, 0, extensions)?)) });
                     }
-                    let function_anchor = register_function(fun.name().to_string(), extension_info);
+                    let function_anchor = extensions.register_function(fun.name().to_string());
                     Ok(Measure {
                         measure: Some(AggregateFunction {
                             function_reference: function_anchor,
@@ -857,7 +822,7 @@ pub fn to_substrait_agg_measure(
                             options: vec![],
                         }),
                         filter: match filter {
-                            Some(f) => Some(to_substrait_rex(ctx, f, schema, 0, extension_info)?),
+                            Some(f) => Some(to_substrait_rex(ctx, f, schema, 0, extensions)?),
                             None => None
                         }
                     })
@@ -866,7 +831,7 @@ pub fn to_substrait_agg_measure(
 
         }
         Expr::Alias(Alias{expr,..})=> {
-            to_substrait_agg_measure(ctx, expr, schema, extension_info)
+            to_substrait_agg_measure(ctx, expr, schema, extensions)
         }
         _ => internal_err!(
             "Expression must be compatible with aggregation. Unsupported expression: {:?}. ExpressionType: {:?}",
@@ -881,10 +846,7 @@ fn to_substrait_sort_field(
     ctx: &SessionContext,
     expr: &Expr,
     schema: &DFSchemaRef,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Result<SortField> {
     match expr {
         Expr::Sort(sort) => {
@@ -900,7 +862,7 @@ fn to_substrait_sort_field(
                     sort.expr.deref(),
                     schema,
                     0,
-                    extension_info,
+                    extensions,
                 )?),
                 sort_kind: Some(SortKind::Direction(sort_kind.into())),
             })
@@ -909,67 +871,15 @@ fn to_substrait_sort_field(
     }
 }
 
-fn register_function(
-    function_name: String,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
-) -> u32 {
-    let (function_extensions, function_set) = extension_info;
-    let function_name = function_name.to_lowercase();
-
-    // Some functions are named differently in Substrait default extensions than in DF
-    // Rename those to match the Substrait extensions for interoperability
-    let function_name = match function_name.as_str() {
-        "substr" => "substring".to_string(),
-        _ => function_name,
-    };
-
-    // To prevent ambiguous references between ScalarFunctions and AggregateFunctions,
-    // a plan-relative identifier starting from 0 is used as the function_anchor.
-    // The consumer is responsible for correctly registering <function_anchor,function_name>
-    // mapping info stored in the extensions by the producer.
-    let function_anchor = match function_set.get(&function_name) {
-        Some(function_anchor) => {
-            // Function has been registered
-            *function_anchor
-        }
-        None => {
-            // Function has NOT been registered
-            let function_anchor = function_set.len() as u32;
-            function_set.insert(function_name.clone(), function_anchor);
-
-            let function_extension = ExtensionFunction {
-                extension_uri_reference: u32::MAX,
-                function_anchor,
-                name: function_name,
-            };
-            let simple_extension = extensions::SimpleExtensionDeclaration {
-                mapping_type: Some(MappingType::ExtensionFunction(function_extension)),
-            };
-            function_extensions.push(simple_extension);
-            function_anchor
-        }
-    };
-
-    // Return function anchor
-    function_anchor
-}
-
 /// Return Substrait scalar function with two arguments
 #[allow(deprecated)]
 pub fn make_binary_op_scalar_func(
     lhs: &Expression,
     rhs: &Expression,
     op: Operator,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Expression {
-    let function_anchor =
-        register_function(operator_to_name(op).to_string(), extension_info);
+    let function_anchor = extensions.register_function(operator_to_name(op).to_string());
     Expression {
         rex_type: Some(RexType::ScalarFunction(ScalarFunction {
             function_reference: function_anchor,
@@ -1010,17 +920,14 @@ pub fn make_binary_op_scalar_func(
 ///                     `col_ref(1) = col_ref(3 + 0)`
 ///                     , where `3` is the number of `left` columns (`col_ref_offset`) and `0` is the index
 ///                     of the join key column from `right`
-/// * `extension_info` - Substrait extension info. Contains registered function information
+/// * `extensions` - Substrait extension info. Contains registered function information
 #[allow(deprecated)]
 pub fn to_substrait_rex(
     ctx: &SessionContext,
     expr: &Expr,
     schema: &DFSchemaRef,
     col_ref_offset: usize,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Result<Expression> {
     match expr {
         Expr::InList(InList {
@@ -1030,10 +937,10 @@ pub fn to_substrait_rex(
         }) => {
             let substrait_list = list
                 .iter()
-                .map(|x| to_substrait_rex(ctx, x, schema, col_ref_offset, extension_info))
+                .map(|x| to_substrait_rex(ctx, x, schema, col_ref_offset, extensions))
                 .collect::<Result<Vec<Expression>>>()?;
             let substrait_expr =
-                to_substrait_rex(ctx, expr, schema, col_ref_offset, extension_info)?;
+                to_substrait_rex(ctx, expr, schema, col_ref_offset, extensions)?;
 
             let substrait_or_list = Expression {
                 rex_type: Some(RexType::SingularOrList(Box::new(SingularOrList {
@@ -1043,8 +950,7 @@ pub fn to_substrait_rex(
             };
 
             if *negated {
-                let function_anchor =
-                    register_function("not".to_string(), extension_info);
+                let function_anchor = extensions.register_function("not".to_string());
 
                 Ok(Expression {
                     rex_type: Some(RexType::ScalarFunction(ScalarFunction {
@@ -1070,13 +976,12 @@ pub fn to_substrait_rex(
                         arg,
                         schema,
                         col_ref_offset,
-                        extension_info,
+                        extensions,
                     )?)),
                 });
             }
 
-            let function_anchor =
-                register_function(fun.name().to_string(), extension_info);
+            let function_anchor = extensions.register_function(fun.name().to_string());
             Ok(Expression {
                 rex_type: Some(RexType::ScalarFunction(ScalarFunction {
                     function_reference: function_anchor,
@@ -1096,58 +1001,58 @@ pub fn to_substrait_rex(
             if *negated {
                 // `expr NOT BETWEEN low AND high` can be translated into (expr < low OR high < expr)
                 let substrait_expr =
-                    to_substrait_rex(ctx, expr, schema, col_ref_offset, extension_info)?;
+                    to_substrait_rex(ctx, expr, schema, col_ref_offset, extensions)?;
                 let substrait_low =
-                    to_substrait_rex(ctx, low, schema, col_ref_offset, extension_info)?;
+                    to_substrait_rex(ctx, low, schema, col_ref_offset, extensions)?;
                 let substrait_high =
-                    to_substrait_rex(ctx, high, schema, col_ref_offset, extension_info)?;
+                    to_substrait_rex(ctx, high, schema, col_ref_offset, extensions)?;
 
                 let l_expr = make_binary_op_scalar_func(
                     &substrait_expr,
                     &substrait_low,
                     Operator::Lt,
-                    extension_info,
+                    extensions,
                 );
                 let r_expr = make_binary_op_scalar_func(
                     &substrait_high,
                     &substrait_expr,
                     Operator::Lt,
-                    extension_info,
+                    extensions,
                 );
 
                 Ok(make_binary_op_scalar_func(
                     &l_expr,
                     &r_expr,
                     Operator::Or,
-                    extension_info,
+                    extensions,
                 ))
             } else {
                 // `expr BETWEEN low AND high` can be translated into (low <= expr AND expr <= high)
                 let substrait_expr =
-                    to_substrait_rex(ctx, expr, schema, col_ref_offset, extension_info)?;
+                    to_substrait_rex(ctx, expr, schema, col_ref_offset, extensions)?;
                 let substrait_low =
-                    to_substrait_rex(ctx, low, schema, col_ref_offset, extension_info)?;
+                    to_substrait_rex(ctx, low, schema, col_ref_offset, extensions)?;
                 let substrait_high =
-                    to_substrait_rex(ctx, high, schema, col_ref_offset, extension_info)?;
+                    to_substrait_rex(ctx, high, schema, col_ref_offset, extensions)?;
 
                 let l_expr = make_binary_op_scalar_func(
                     &substrait_low,
                     &substrait_expr,
                     Operator::LtEq,
-                    extension_info,
+                    extensions,
                 );
                 let r_expr = make_binary_op_scalar_func(
                     &substrait_expr,
                     &substrait_high,
                     Operator::LtEq,
-                    extension_info,
+                    extensions,
                 );
 
                 Ok(make_binary_op_scalar_func(
                     &l_expr,
                     &r_expr,
                     Operator::And,
-                    extension_info,
+                    extensions,
                 ))
             }
         }
@@ -1156,10 +1061,10 @@ pub fn to_substrait_rex(
             substrait_field_ref(index + col_ref_offset)
         }
         Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
-            let l = to_substrait_rex(ctx, left, schema, col_ref_offset, extension_info)?;
-            let r = to_substrait_rex(ctx, right, schema, col_ref_offset, extension_info)?;
+            let l = to_substrait_rex(ctx, left, schema, col_ref_offset, extensions)?;
+            let r = to_substrait_rex(ctx, right, schema, col_ref_offset, extensions)?;
 
-            Ok(make_binary_op_scalar_func(&l, &r, *op, extension_info))
+            Ok(make_binary_op_scalar_func(&l, &r, *op, extensions))
         }
         Expr::Case(Case {
             expr,
@@ -1176,7 +1081,7 @@ pub fn to_substrait_rex(
                         e,
                         schema,
                         col_ref_offset,
-                        extension_info,
+                        extensions,
                     )?),
                     then: None,
                 });
@@ -1189,14 +1094,14 @@ pub fn to_substrait_rex(
                         r#if,
                         schema,
                         col_ref_offset,
-                        extension_info,
+                        extensions,
                     )?),
                     then: Some(to_substrait_rex(
                         ctx,
                         then,
                         schema,
                         col_ref_offset,
-                        extension_info,
+                        extensions,
                     )?),
                 });
             }
@@ -1208,7 +1113,7 @@ pub fn to_substrait_rex(
                     e,
                     schema,
                     col_ref_offset,
-                    extension_info,
+                    extensions,
                 )?)),
                 None => None,
             };
@@ -1221,22 +1126,22 @@ pub fn to_substrait_rex(
             Ok(Expression {
                 rex_type: Some(RexType::Cast(Box::new(
                     substrait::proto::expression::Cast {
-                        r#type: Some(to_substrait_type(data_type, true)?),
+                        r#type: Some(to_substrait_type(data_type, true, extensions)?),
                         input: Some(Box::new(to_substrait_rex(
                             ctx,
                             expr,
                             schema,
                             col_ref_offset,
-                            extension_info,
+                            extensions,
                         )?)),
                         failure_behavior: 0, // FAILURE_BEHAVIOR_UNSPECIFIED
                     },
                 ))),
             })
         }
-        Expr::Literal(value) => to_substrait_literal_expr(value),
+        Expr::Literal(value) => to_substrait_literal_expr(value, extensions),
         Expr::Alias(Alias { expr, .. }) => {
-            to_substrait_rex(ctx, expr, schema, col_ref_offset, extension_info)
+            to_substrait_rex(ctx, expr, schema, col_ref_offset, extensions)
         }
         Expr::WindowFunction(WindowFunction {
             fun,
@@ -1247,7 +1152,7 @@ pub fn to_substrait_rex(
             null_treatment: _,
         }) => {
             // function reference
-            let function_anchor = register_function(fun.to_string(), extension_info);
+            let function_anchor = extensions.register_function(fun.to_string());
             // arguments
             let mut arguments: Vec<FunctionArgument> = vec![];
             for arg in args {
@@ -1257,19 +1162,19 @@ pub fn to_substrait_rex(
                         arg,
                         schema,
                         col_ref_offset,
-                        extension_info,
+                        extensions,
                     )?)),
                 });
             }
             // partition by expressions
             let partition_by = partition_by
                 .iter()
-                .map(|e| to_substrait_rex(ctx, e, schema, col_ref_offset, extension_info))
+                .map(|e| to_substrait_rex(ctx, e, schema, col_ref_offset, extensions))
                 .collect::<Result<Vec<_>>>()?;
             // order by expressions
             let order_by = order_by
                 .iter()
-                .map(|e| substrait_sort_field(ctx, e, schema, extension_info))
+                .map(|e| substrait_sort_field(ctx, e, schema, extensions))
                 .collect::<Result<Vec<_>>>()?;
             // window frame
             let bounds = to_substrait_bounds(window_frame)?;
@@ -1298,7 +1203,7 @@ pub fn to_substrait_rex(
             *escape_char,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         Expr::InSubquery(InSubquery {
             expr,
@@ -1306,10 +1211,10 @@ pub fn to_substrait_rex(
             negated,
         }) => {
             let substrait_expr =
-                to_substrait_rex(ctx, expr, schema, col_ref_offset, extension_info)?;
+                to_substrait_rex(ctx, expr, schema, col_ref_offset, extensions)?;
 
             let subquery_plan =
-                to_substrait_rel(subquery.subquery.as_ref(), ctx, extension_info)?;
+                to_substrait_rel(subquery.subquery.as_ref(), ctx, extensions)?;
 
             let substrait_subquery = Expression {
                 rex_type: Some(RexType::Subquery(Box::new(Subquery {
@@ -1324,8 +1229,7 @@ pub fn to_substrait_rex(
                 }))),
             };
             if *negated {
-                let function_anchor =
-                    register_function("not".to_string(), extension_info);
+                let function_anchor = extensions.register_function("not".to_string());
 
                 Ok(Expression {
                     rex_type: Some(RexType::ScalarFunction(ScalarFunction {
@@ -1348,7 +1252,7 @@ pub fn to_substrait_rex(
             arg,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         Expr::IsNull(arg) => to_substrait_unary_scalar_fn(
             ctx,
@@ -1356,7 +1260,7 @@ pub fn to_substrait_rex(
             arg,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         Expr::IsNotNull(arg) => to_substrait_unary_scalar_fn(
             ctx,
@@ -1364,7 +1268,7 @@ pub fn to_substrait_rex(
             arg,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         Expr::IsTrue(arg) => to_substrait_unary_scalar_fn(
             ctx,
@@ -1372,7 +1276,7 @@ pub fn to_substrait_rex(
             arg,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         Expr::IsFalse(arg) => to_substrait_unary_scalar_fn(
             ctx,
@@ -1380,7 +1284,7 @@ pub fn to_substrait_rex(
             arg,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         Expr::IsUnknown(arg) => to_substrait_unary_scalar_fn(
             ctx,
@@ -1388,7 +1292,7 @@ pub fn to_substrait_rex(
             arg,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         Expr::IsNotTrue(arg) => to_substrait_unary_scalar_fn(
             ctx,
@@ -1396,7 +1300,7 @@ pub fn to_substrait_rex(
             arg,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         Expr::IsNotFalse(arg) => to_substrait_unary_scalar_fn(
             ctx,
@@ -1404,7 +1308,7 @@ pub fn to_substrait_rex(
             arg,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         Expr::IsNotUnknown(arg) => to_substrait_unary_scalar_fn(
             ctx,
@@ -1412,7 +1316,7 @@ pub fn to_substrait_rex(
             arg,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         Expr::Negative(arg) => to_substrait_unary_scalar_fn(
             ctx,
@@ -1420,7 +1324,7 @@ pub fn to_substrait_rex(
             arg,
             schema,
             col_ref_offset,
-            extension_info,
+            extensions,
         ),
         _ => {
             not_impl_err!("Unsupported expression: {expr:?}")
@@ -1428,7 +1332,11 @@ pub fn to_substrait_rex(
     }
 }
 
-fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::Type> {
+fn to_substrait_type(
+    dt: &DataType,
+    nullable: bool,
+    extensions: &mut Extensions,
+) -> Result<substrait::proto::Type> {
     let nullability = if nullable {
         r#type::Nullability::Nullable as i32
     } else {
@@ -1548,7 +1456,9 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
                     // Substrait doesn't currently support this type, so we represent it as a UDT
                     Ok(substrait::proto::Type {
                         kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
-                            type_reference: INTERVAL_MONTH_DAY_NANO_TYPE_REF,
+                            type_reference: extensions.register_type(
+                                INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(),
+                            ),
                             type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
                             nullability,
                             type_parameters: vec![],
@@ -1589,7 +1499,8 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
             })),
         }),
         DataType::List(inner) => {
-            let inner_type = to_substrait_type(inner.data_type(), inner.is_nullable())?;
+            let inner_type =
+                to_substrait_type(inner.data_type(), inner.is_nullable(), extensions)?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::List(Box::new(r#type::List {
                     r#type: Some(Box::new(inner_type)),
@@ -1599,7 +1510,8 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
             })
         }
         DataType::LargeList(inner) => {
-            let inner_type = to_substrait_type(inner.data_type(), inner.is_nullable())?;
+            let inner_type =
+                to_substrait_type(inner.data_type(), inner.is_nullable(), extensions)?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::List(Box::new(r#type::List {
                     r#type: Some(Box::new(inner_type)),
@@ -1613,10 +1525,12 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
                 let key_type = to_substrait_type(
                     key_and_value[0].data_type(),
                     key_and_value[0].is_nullable(),
+                    extensions,
                 )?;
                 let value_type = to_substrait_type(
                     key_and_value[1].data_type(),
                     key_and_value[1].is_nullable(),
+                    extensions,
                 )?;
                 Ok(substrait::proto::Type {
                     kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
@@ -1632,7 +1546,9 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
         DataType::Struct(fields) => {
             let field_types = fields
                 .iter()
-                .map(|field| to_substrait_type(field.data_type(), field.is_nullable()))
+                .map(|field| {
+                    to_substrait_type(field.data_type(), field.is_nullable(), extensions)
+                })
                 .collect::<Result<Vec<_>>>()?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::Struct(r#type::Struct {
@@ -1700,21 +1616,19 @@ fn make_substrait_like_expr(
     escape_char: Option<char>,
     schema: &DFSchemaRef,
     col_ref_offset: usize,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Result<Expression> {
     let function_anchor = if ignore_case {
-        register_function("ilike".to_string(), extension_info)
+        extensions.register_function("ilike".to_string())
     } else {
-        register_function("like".to_string(), extension_info)
+        extensions.register_function("like".to_string())
     };
-    let expr = to_substrait_rex(ctx, expr, schema, col_ref_offset, extension_info)?;
-    let pattern = to_substrait_rex(ctx, pattern, schema, col_ref_offset, extension_info)?;
-    let escape_char = to_substrait_literal_expr(&ScalarValue::Utf8(
-        escape_char.map(|c| c.to_string()),
-    ))?;
+    let expr = to_substrait_rex(ctx, expr, schema, col_ref_offset, extensions)?;
+    let pattern = to_substrait_rex(ctx, pattern, schema, col_ref_offset, extensions)?;
+    let escape_char = to_substrait_literal_expr(
+        &ScalarValue::Utf8(escape_char.map(|c| c.to_string())),
+        extensions,
+    )?;
     let arguments = vec![
         FunctionArgument {
             arg_type: Some(ArgType::Value(expr)),
@@ -1738,7 +1652,7 @@ fn make_substrait_like_expr(
     };
 
     if negated {
-        let function_anchor = register_function("not".to_string(), extension_info);
+        let function_anchor = extensions.register_function("not".to_string());
 
         Ok(Expression {
             rex_type: Some(RexType::ScalarFunction(ScalarFunction {
@@ -1870,7 +1784,10 @@ fn to_substrait_bounds(window_frame: &WindowFrame) -> Result<(Bound, Bound)> {
     ))
 }
 
-fn to_substrait_literal(value: &ScalarValue) -> Result<Literal> {
+fn to_substrait_literal(
+    value: &ScalarValue,
+    extensions: &mut Extensions,
+) -> Result<Literal> {
     if value.is_null() {
         return Ok(Literal {
             nullable: true,
@@ -1878,6 +1795,7 @@ fn to_substrait_literal(value: &ScalarValue) -> Result<Literal> {
             literal_type: Some(LiteralType::Null(to_substrait_type(
                 &value.data_type(),
                 true,
+                extensions,
             )?)),
         });
     }
@@ -1949,14 +1867,15 @@ fn to_substrait_literal(value: &ScalarValue) -> Result<Literal> {
             let bytes = i.to_byte_slice();
             (
                 LiteralType::UserDefined(UserDefined {
-                    type_reference: INTERVAL_MONTH_DAY_NANO_TYPE_REF,
+                    type_reference: extensions
+                        .register_type(INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()),
                     type_parameters: vec![],
                     val: Some(user_defined::Val::Value(ProtoAny {
-                        type_url: INTERVAL_MONTH_DAY_NANO_TYPE_URL.to_string(),
+                        type_url: INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(),
                         value: bytes.to_vec().into(),
                     })),
                 }),
-                INTERVAL_MONTH_DAY_NANO_TYPE_REF,
+                DEFAULT_TYPE_VARIATION_REF,
             )
         }
         ScalarValue::IntervalDayTime(Some(i)) => (
@@ -1996,11 +1915,11 @@ fn to_substrait_literal(value: &ScalarValue) -> Result<Literal> {
             DECIMAL_128_TYPE_VARIATION_REF,
         ),
         ScalarValue::List(l) => (
-            convert_array_to_literal_list(l)?,
+            convert_array_to_literal_list(l, extensions)?,
             DEFAULT_CONTAINER_TYPE_VARIATION_REF,
         ),
         ScalarValue::LargeList(l) => (
-            convert_array_to_literal_list(l)?,
+            convert_array_to_literal_list(l, extensions)?,
             LARGE_CONTAINER_TYPE_VARIATION_REF,
         ),
         ScalarValue::Struct(s) => (
@@ -2009,7 +1928,10 @@ fn to_substrait_literal(value: &ScalarValue) -> Result<Literal> {
                     .columns()
                     .iter()
                     .map(|col| {
-                        to_substrait_literal(&ScalarValue::try_from_array(col, 0)?)
+                        to_substrait_literal(
+                            &ScalarValue::try_from_array(col, 0)?,
+                            extensions,
+                        )
                     })
                     .collect::<Result<Vec<_>>>()?,
             }),
@@ -2030,16 +1952,26 @@ fn to_substrait_literal(value: &ScalarValue) -> Result<Literal> {
 
 fn convert_array_to_literal_list<T: OffsetSizeTrait>(
     array: &GenericListArray<T>,
+    extensions: &mut Extensions,
 ) -> Result<LiteralType> {
     assert_eq!(array.len(), 1);
     let nested_array = array.value(0);
 
     let values = (0..nested_array.len())
-        .map(|i| to_substrait_literal(&ScalarValue::try_from_array(&nested_array, i)?))
+        .map(|i| {
+            to_substrait_literal(
+                &ScalarValue::try_from_array(&nested_array, i)?,
+                extensions,
+            )
+        })
         .collect::<Result<Vec<_>>>()?;
 
     if values.is_empty() {
-        let et = match to_substrait_type(array.data_type(), array.is_nullable())? {
+        let et = match to_substrait_type(
+            array.data_type(),
+            array.is_nullable(),
+            extensions,
+        )? {
             substrait::proto::Type {
                 kind: Some(r#type::Kind::List(lt)),
             } => lt.as_ref().to_owned(),
@@ -2051,8 +1983,11 @@ fn convert_array_to_literal_list<T: OffsetSizeTrait>(
     }
 }
 
-fn to_substrait_literal_expr(value: &ScalarValue) -> Result<Expression> {
-    let literal = to_substrait_literal(value)?;
+fn to_substrait_literal_expr(
+    value: &ScalarValue,
+    extensions: &mut Extensions,
+) -> Result<Expression> {
+    let literal = to_substrait_literal(value, extensions)?;
     Ok(Expression {
         rex_type: Some(RexType::Literal(literal)),
     })
@@ -2065,14 +2000,10 @@ fn to_substrait_unary_scalar_fn(
     arg: &Expr,
     schema: &DFSchemaRef,
     col_ref_offset: usize,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Result<Expression> {
-    let function_anchor = register_function(fn_name.to_string(), extension_info);
-    let substrait_expr =
-        to_substrait_rex(ctx, arg, schema, col_ref_offset, extension_info)?;
+    let function_anchor = extensions.register_function(fn_name.to_string());
+    let substrait_expr = to_substrait_rex(ctx, arg, schema, col_ref_offset, extensions)?;
 
     Ok(Expression {
         rex_type: Some(RexType::ScalarFunction(ScalarFunction {
@@ -2116,10 +2047,7 @@ fn substrait_sort_field(
     ctx: &SessionContext,
     expr: &Expr,
     schema: &DFSchemaRef,
-    extension_info: &mut (
-        Vec<extensions::SimpleExtensionDeclaration>,
-        HashMap<String, u32>,
-    ),
+    extensions: &mut Extensions,
 ) -> Result<SortField> {
     match expr {
         Expr::Sort(Sort {
@@ -2127,7 +2055,7 @@ fn substrait_sort_field(
             asc,
             nulls_first,
         }) => {
-            let e = to_substrait_rex(ctx, expr, schema, 0, extension_info)?;
+            let e = to_substrait_rex(ctx, expr, schema, 0, extensions)?;
             let d = match (asc, nulls_first) {
                 (true, true) => SortDirection::AscNullsFirst,
                 (true, false) => SortDirection::AscNullsLast,
@@ -2161,6 +2089,7 @@ fn substrait_field_ref(index: usize) -> Result<Expression> {
 
 #[cfg(test)]
 mod test {
+    use super::*;
     use crate::logical_plan::consumer::{
         from_substrait_literal_without_names, from_substrait_type_without_names,
     };
@@ -2168,8 +2097,7 @@ mod test {
     use datafusion::arrow::array::GenericListArray;
     use datafusion::arrow::datatypes::Field;
     use datafusion::common::scalar::ScalarStructBuilder;
-
-    use super::*;
+    use std::collections::HashMap;
 
     #[test]
     fn round_trip_literals() -> Result<()> {
@@ -2258,12 +2186,47 @@ mod test {
     fn round_trip_literal(scalar: ScalarValue) -> Result<()> {
         println!("Checking round trip of {scalar:?}");
 
-        let substrait_literal = to_substrait_literal(&scalar)?;
-        let roundtrip_scalar = from_substrait_literal_without_names(&substrait_literal)?;
+        let mut extensions = Extensions::default();
+        let substrait_literal = to_substrait_literal(&scalar, &mut extensions)?;
+        let roundtrip_scalar =
+            from_substrait_literal_without_names(&substrait_literal, &extensions)?;
         assert_eq!(scalar, roundtrip_scalar);
         Ok(())
     }
 
+    #[test]
+    fn custom_type_literal_extensions() -> Result<()> {
+        let mut extensions = Extensions::default();
+        // IntervalMonthDayNano is represented as a custom type in Substrait
+        let scalar = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::new(
+            17, 25, 1234567890,
+        )));
+        let substrait_literal = to_substrait_literal(&scalar, &mut extensions)?;
+        let roundtrip_scalar =
+            from_substrait_literal_without_names(&substrait_literal, &extensions)?;
+        assert_eq!(scalar, roundtrip_scalar);
+
+        assert_eq!(
+            extensions,
+            Extensions {
+                functions: HashMap::new(),
+                types: HashMap::from([(
+                    0,
+                    INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()
+                )]),
+                type_variations: HashMap::new(),
+            }
+        );
+
+        // Check we fail if we don't propagate extensions
+        assert!(from_substrait_literal_without_names(
+            &substrait_literal,
+            &Extensions::default()
+        )
+        .is_err());
+        Ok(())
+    }
+
     #[test]
     fn round_trip_types() -> Result<()> {
         round_trip_type(DataType::Boolean)?;
@@ -2329,11 +2292,44 @@ mod test {
     fn round_trip_type(dt: DataType) -> Result<()> {
         println!("Checking round trip of {dt:?}");
 
+        let mut extensions = Extensions::default();
+
         // As DataFusion doesn't consider nullability as a property of the type, but field,
         // it doesn't matter if we set nullability to true or false here.
-        let substrait = to_substrait_type(&dt, true)?;
-        let roundtrip_dt = from_substrait_type_without_names(&substrait)?;
+        let substrait = to_substrait_type(&dt, true, &mut extensions)?;
+        let roundtrip_dt = from_substrait_type_without_names(&substrait, &extensions)?;
+        assert_eq!(dt, roundtrip_dt);
+        Ok(())
+    }
+
+    #[test]
+    fn custom_type_extensions() -> Result<()> {
+        let mut extensions = Extensions::default();
+        // IntervalMonthDayNano is represented as a custom type in Substrait
+        let dt = DataType::Interval(IntervalUnit::MonthDayNano);
+
+        let substrait = to_substrait_type(&dt, true, &mut extensions)?;
+        let roundtrip_dt = from_substrait_type_without_names(&substrait, &extensions)?;
         assert_eq!(dt, roundtrip_dt);
+
+        assert_eq!(
+            extensions,
+            Extensions {
+                functions: HashMap::new(),
+                types: HashMap::from([(
+                    0,
+                    INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()
+                )]),
+                type_variations: HashMap::new(),
+            }
+        );
+
+        // Check we fail if we don't propagate extensions
+        assert!(
+            from_substrait_type_without_names(&substrait, &Extensions::default())
+                .is_err()
+        );
+
         Ok(())
     }
 }
diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs
index 27f4b3ea228a6..c94ad2d669fde 100644
--- a/datafusion/substrait/src/variation_const.rs
+++ b/datafusion/substrait/src/variation_const.rs
@@ -25,13 +25,16 @@
 //! - Default type reference is 0. It is used when the actual type is the same with the original type.
 //! - Extended variant type references start from 1, and ususlly increase by 1.
 //!
-//! Definitions here are not the final form. All the non-system-preferred variations will be defined
+//! TODO: Definitions here are not the final form. All the non-system-preferred variations will be defined
 //! using [simple extensions] as per the [spec of type_variations](https://substrait.io/types/type_variations/)
+//! <https://github.com/apache/datafusion/issues/11545>
 //!
 //! [simple extensions]: (https://substrait.io/extensions/#simple-extensions)
 
 // For [type variations](https://substrait.io/types/type_variations/#type-variations) in substrait.
 // Type variations are used to represent different types based on one type class.
+// TODO: Define as extensions: <https://github.com/apache/datafusion/issues/11544>
+
 /// The "system-preferred" variation (i.e., no variation).
 pub const DEFAULT_TYPE_VARIATION_REF: u32 = 0;
 pub const UNSIGNED_INTEGER_TYPE_VARIATION_REF: u32 = 1;
@@ -55,6 +58,7 @@ pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1;
 /// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
 /// [`IntervalUnit::YearMonth`]: datafusion::arrow::datatypes::IntervalUnit::YearMonth
 /// [`ScalarValue::IntervalYearMonth`]: datafusion::common::ScalarValue::IntervalYearMonth
+#[deprecated(since = "41.0.0", note = "Use Substrait `IntervalYear` type instead")]
 pub const INTERVAL_YEAR_MONTH_TYPE_REF: u32 = 1;
 
 /// For [`DataType::Interval`] with [`IntervalUnit::DayTime`].
@@ -68,6 +72,7 @@ pub const INTERVAL_YEAR_MONTH_TYPE_REF: u32 = 1;
 /// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
 /// [`IntervalUnit::DayTime`]: datafusion::arrow::datatypes::IntervalUnit::DayTime
 /// [`ScalarValue::IntervalDayTime`]: datafusion::common::ScalarValue::IntervalDayTime
+#[deprecated(since = "41.0.0", note = "Use Substrait `IntervalDay` type instead")]
 pub const INTERVAL_DAY_TIME_TYPE_REF: u32 = 2;
 
 /// For [`DataType::Interval`] with [`IntervalUnit::MonthDayNano`].
@@ -82,21 +87,14 @@ pub const INTERVAL_DAY_TIME_TYPE_REF: u32 = 2;
 /// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
 /// [`IntervalUnit::MonthDayNano`]: datafusion::arrow::datatypes::IntervalUnit::MonthDayNano
 /// [`ScalarValue::IntervalMonthDayNano`]: datafusion::common::ScalarValue::IntervalMonthDayNano
+#[deprecated(
+    since = "41.0.0",
+    note = "Use Substrait `UserDefinedType` with name `INTERVAL_MONTH_DAY_NANO_TYPE_NAME` instead"
+)]
 pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3;
 
-// For User Defined URLs
-/// For [`DataType::Interval`] with [`IntervalUnit::YearMonth`].
-///
-/// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
-/// [`IntervalUnit::YearMonth`]: datafusion::arrow::datatypes::IntervalUnit::YearMonth
-pub const INTERVAL_YEAR_MONTH_TYPE_URL: &str = "interval-year-month";
-/// For [`DataType::Interval`] with [`IntervalUnit::DayTime`].
-///
-/// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
-/// [`IntervalUnit::DayTime`]: datafusion::arrow::datatypes::IntervalUnit::DayTime
-pub const INTERVAL_DAY_TIME_TYPE_URL: &str = "interval-day-time";
 /// For [`DataType::Interval`] with [`IntervalUnit::MonthDayNano`].
 ///
 /// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
 /// [`IntervalUnit::MonthDayNano`]: datafusion::arrow::datatypes::IntervalUnit::MonthDayNano
-pub const INTERVAL_MONTH_DAY_NANO_TYPE_URL: &str = "interval-month-day-nano";
+pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano";
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index a7653e11d598f..5b4389c832c7c 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -38,7 +38,10 @@ use datafusion::optimizer::simplify_expressions::expr_simplifier::THRESHOLD_INLI
 use datafusion::prelude::*;
 
 use datafusion::execution::session_state::SessionStateBuilder;
-use substrait::proto::extensions::simple_extension_declaration::MappingType;
+use substrait::proto::extensions::simple_extension_declaration::{
+    ExtensionType, MappingType,
+};
+use substrait::proto::extensions::SimpleExtensionDeclaration;
 use substrait::proto::rel::RelType;
 use substrait::proto::{plan_rel, Plan, Rel};
 
@@ -175,15 +178,46 @@ async fn select_with_filter() -> Result<()> {
 
 #[tokio::test]
 async fn select_with_reused_functions() -> Result<()> {
+    let ctx = create_context().await?;
     let sql = "SELECT * FROM data WHERE a > 1 AND a < 10 AND b > 0";
-    roundtrip(sql).await?;
-    let (mut function_names, mut function_anchors) = function_extension_info(sql).await?;
-    function_names.sort();
-    function_anchors.sort();
+    let proto = roundtrip_with_ctx(sql, ctx).await?;
+    let mut functions = proto
+        .extensions
+        .iter()
+        .map(|e| match e.mapping_type.as_ref().unwrap() {
+            MappingType::ExtensionFunction(ext_f) => {
+                (ext_f.function_anchor, ext_f.name.to_owned())
+            }
+            _ => unreachable!("Non-function extensions not expected"),
+        })
+        .collect::<Vec<_>>();
+    functions.sort_by_key(|(anchor, _)| *anchor);
+
+    // Functions are encountered (and thus registered) depth-first
+    let expected = vec![
+        (0, "gt".to_string()),
+        (1, "lt".to_string()),
+        (2, "and".to_string()),
+    ];
+    assert_eq!(functions, expected);
 
-    assert_eq!(function_names, ["and", "gt", "lt"]);
-    assert_eq!(function_anchors, [0, 1, 2]);
+    Ok(())
+}
 
+#[tokio::test]
+async fn roundtrip_udt_extensions() -> Result<()> {
+    let ctx = create_context().await?;
+    let proto =
+        roundtrip_with_ctx("SELECT INTERVAL '1 YEAR 1 DAY 1 SECOND' FROM data", ctx)
+            .await?;
+    let expected_type = SimpleExtensionDeclaration {
+        mapping_type: Some(MappingType::ExtensionType(ExtensionType {
+            extension_uri_reference: u32::MAX,
+            type_anchor: 0,
+            name: "interval-month-day-nano".to_string(),
+        })),
+    };
+    assert_eq!(proto.extensions, vec![expected_type]);
     Ok(())
 }
 
@@ -858,7 +892,8 @@ async fn roundtrip_aggregate_udf() -> Result<()> {
     let ctx = create_context().await?;
     ctx.register_udaf(dummy_agg);
 
-    roundtrip_with_ctx("select dummy_agg(a) from data", ctx).await
+    roundtrip_with_ctx("select dummy_agg(a) from data", ctx).await?;
+    Ok(())
 }
 
 #[tokio::test]
@@ -891,7 +926,8 @@ async fn roundtrip_window_udf() -> Result<()> {
     let ctx = create_context().await?;
     ctx.register_udwf(dummy_agg);
 
-    roundtrip_with_ctx("select dummy_window(a) OVER () from data", ctx).await
+    roundtrip_with_ctx("select dummy_window(a) OVER () from data", ctx).await?;
+    Ok(())
 }
 
 #[tokio::test]
@@ -1083,7 +1119,7 @@ async fn test_alias(sql_with_alias: &str, sql_no_alias: &str) -> Result<()> {
     Ok(())
 }
 
-async fn roundtrip_with_ctx(sql: &str, ctx: SessionContext) -> Result<()> {
+async fn roundtrip_with_ctx(sql: &str, ctx: SessionContext) -> Result<Box<Plan>> {
     let df = ctx.sql(sql).await?;
     let plan = df.into_optimized_plan()?;
     let proto = to_substrait_plan(&plan, &ctx)?;
@@ -1102,56 +1138,25 @@ async fn roundtrip_with_ctx(sql: &str, ctx: SessionContext) -> Result<()> {
     assert_eq!(plan.schema(), plan2.schema());
 
     DataFrame::new(ctx.state(), plan2).show().await?;
-    Ok(())
+    Ok(proto)
 }
 
 async fn roundtrip(sql: &str) -> Result<()> {
-    roundtrip_with_ctx(sql, create_context().await?).await
+    roundtrip_with_ctx(sql, create_context().await?).await?;
+    Ok(())
 }
 
 async fn roundtrip_verify_post_join_filter(sql: &str) -> Result<()> {
     let ctx = create_context().await?;
-    let df = ctx.sql(sql).await?;
-    let plan = df.into_optimized_plan()?;
-    let proto = to_substrait_plan(&plan, &ctx)?;
-    let plan2 = from_substrait_plan(&ctx, &proto).await?;
-    let plan2 = ctx.state().optimize(&plan2)?;
-
-    println!("{plan:#?}");
-    println!("{plan2:#?}");
-
-    let plan1str = format!("{plan:?}");
-    let plan2str = format!("{plan2:?}");
-    assert_eq!(plan1str, plan2str);
-
-    assert_eq!(plan.schema(), plan2.schema());
+    let proto = roundtrip_with_ctx(sql, ctx).await?;
 
     // verify that the join filters are None
     verify_post_join_filter_value(proto).await
 }
 
 async fn roundtrip_all_types(sql: &str) -> Result<()> {
-    roundtrip_with_ctx(sql, create_all_type_context().await?).await
-}
-
-async fn function_extension_info(sql: &str) -> Result<(Vec<String>, Vec<u32>)> {
-    let ctx = create_context().await?;
-    let df = ctx.sql(sql).await?;
-    let plan = df.into_optimized_plan()?;
-    let proto = to_substrait_plan(&plan, &ctx)?;
-
-    let mut function_names: Vec<String> = vec![];
-    let mut function_anchors: Vec<u32> = vec![];
-    for e in &proto.extensions {
-        let (function_anchor, function_name) = match e.mapping_type.as_ref().unwrap() {
-            MappingType::ExtensionFunction(ext_f) => (ext_f.function_anchor, &ext_f.name),
-            _ => unreachable!("Producer does not generate a non-function extension"),
-        };
-        function_names.push(function_name.to_string());
-        function_anchors.push(function_anchor);
-    }
-
-    Ok((function_names, function_anchors))
+    roundtrip_with_ctx(sql, create_all_type_context().await?).await?;
+    Ok(())
 }
 
 async fn create_context() -> Result<SessionContext> {

From af0d2baf02e169760beedb8465ec1d9e5563d2be Mon Sep 17 00:00:00 2001
From: yfu <fevin86@gmail.com>
Date: Sat, 20 Jul 2024 05:55:42 +1000
Subject: [PATCH 093/357] make unparser Dialect Send + Sync (#11504)

---
 datafusion/sql/src/unparser/dialect.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index 87453f81ee3d8..1e82fc2b3c1ba 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -27,7 +27,7 @@ use sqlparser::{ast, keywords::ALL_KEYWORDS};
 ///
 /// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
 /// See also the discussion in <https://github.com/apache/datafusion/pull/10625>
-pub trait Dialect {
+pub trait Dialect: Send + Sync {
     /// Return the character used to quote identifiers.
     fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
 

From f1953528187828bc3636e90fa7d640d5cb3e54d1 Mon Sep 17 00:00:00 2001
From: yfu <fevin86@gmail.com>
Date: Sat, 20 Jul 2024 05:56:28 +1000
Subject: [PATCH 094/357] fix: unparser generates wrong sql for derived table
 with columns (#17) (#11505)

* fix unparser for derived table with columns

* refactoring

* renaming

* case in tests
---
 datafusion/sql/src/unparser/plan.rs       | 77 ++++++++++++++++++++---
 datafusion/sql/tests/cases/plan_to_sql.rs | 29 +++++++++
 2 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 26fd472996376..7f050d8a0690e 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -19,7 +19,7 @@ use datafusion_common::{internal_err, not_impl_err, plan_err, DataFusionError, R
 use datafusion_expr::{
     expr::Alias, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, Projection,
 };
-use sqlparser::ast::{self, SetExpr};
+use sqlparser::ast::{self, Ident, SetExpr};
 
 use crate::unparser::utils::unproject_agg_exprs;
 
@@ -457,15 +457,11 @@ impl Unparser<'_> {
             }
             LogicalPlan::SubqueryAlias(plan_alias) => {
                 // Handle bottom-up to allocate relation
-                self.select_to_sql_recursively(
-                    plan_alias.input.as_ref(),
-                    query,
-                    select,
-                    relation,
-                )?;
+                let (plan, columns) = subquery_alias_inner_query_and_columns(plan_alias);
 
+                self.select_to_sql_recursively(plan, query, select, relation)?;
                 relation.alias(Some(
-                    self.new_table_alias(plan_alias.alias.table().to_string()),
+                    self.new_table_alias(plan_alias.alias.table().to_string(), columns),
                 ));
 
                 Ok(())
@@ -599,10 +595,10 @@ impl Unparser<'_> {
         self.binary_op_to_sql(lhs, rhs, ast::BinaryOperator::And)
     }
 
-    fn new_table_alias(&self, alias: String) -> ast::TableAlias {
+    fn new_table_alias(&self, alias: String, columns: Vec<Ident>) -> ast::TableAlias {
         ast::TableAlias {
             name: self.new_ident_quoted_if_needs(alias),
-            columns: Vec::new(),
+            columns,
         }
     }
 
@@ -611,6 +607,67 @@ impl Unparser<'_> {
     }
 }
 
+// This logic is to work out the columns and inner query for SubqueryAlias plan for both types of
+// subquery
+// - `(SELECT column_a as a from table) AS A`
+// - `(SELECT column_a from table) AS A (a)`
+//
+// A roundtrip example for table alias with columns
+//
+// query: SELECT id FROM (SELECT j1_id from j1) AS c (id)
+//
+// LogicPlan:
+// Projection: c.id
+//   SubqueryAlias: c
+//     Projection: j1.j1_id AS id
+//       Projection: j1.j1_id
+//         TableScan: j1
+//
+// Before introducing this logic, the unparsed query would be `SELECT c.id FROM (SELECT j1.j1_id AS
+// id FROM (SELECT j1.j1_id FROM j1)) AS c`.
+// The query is invalid as `j1.j1_id` is not a valid identifier in the derived table
+// `(SELECT j1.j1_id FROM j1)`
+//
+// With this logic, the unparsed query will be:
+// `SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)`
+//
+// Caveat: this won't handle the case like `select * from (select 1, 2) AS a (b, c)`
+// as the parser gives a wrong plan which has mismatch `Int(1)` types: Literal and
+// Column in the Projections. Once the parser side is fixed, this logic should work
+fn subquery_alias_inner_query_and_columns(
+    subquery_alias: &datafusion_expr::SubqueryAlias,
+) -> (&LogicalPlan, Vec<Ident>) {
+    let plan: &LogicalPlan = subquery_alias.input.as_ref();
+
+    let LogicalPlan::Projection(outer_projections) = plan else {
+        return (plan, vec![]);
+    };
+
+    // check if it's projection inside projection
+    let LogicalPlan::Projection(inner_projection) = outer_projections.input.as_ref()
+    else {
+        return (plan, vec![]);
+    };
+
+    let mut columns: Vec<Ident> = vec![];
+    // check if the inner projection and outer projection have a matching pattern like
+    //     Projection: j1.j1_id AS id
+    //       Projection: j1.j1_id
+    for (i, inner_expr) in inner_projection.expr.iter().enumerate() {
+        let Expr::Alias(ref outer_alias) = &outer_projections.expr[i] else {
+            return (plan, vec![]);
+        };
+
+        if outer_alias.expr.as_ref() != inner_expr {
+            return (plan, vec![]);
+        };
+
+        columns.push(outer_alias.name.as_str().into());
+    }
+
+    (outer_projections.input.as_ref(), columns)
+}
+
 impl From<BuilderError> for DataFusionError {
     fn from(e: BuilderError) -> Self {
         DataFusionError::External(Box::new(e))
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 91295b2e8aae9..ed79a1dfc0c74 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -240,6 +240,35 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
             parser_dialect: Box::new(GenericDialect {}),
             unparser_dialect: Box::new(UnparserDefaultDialect {}),
         },
+        // more tests around subquery/derived table roundtrip
+        TestStatementWithDialect {
+            sql: "SELECT string_count FROM (
+                    SELECT
+                        j1_id,
+                        MIN(j2_string)
+                    FROM
+                        j1 LEFT OUTER JOIN j2 ON
+                                    j1_id = j2_id
+                    GROUP BY
+                        j1_id
+                ) AS agg (id, string_count)
+            ",
+            expected: r#"SELECT agg.string_count FROM (SELECT j1.j1_id, MIN(j2.j2_string) FROM j1 LEFT JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        TestStatementWithDialect {
+            sql: "SELECT id FROM (SELECT j1_id from j1) AS c (id)",
+            expected: r#"SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        TestStatementWithDialect {
+            sql: "SELECT id FROM (SELECT j1_id as id from j1) AS c",
+            expected: r#"SELECT c.id FROM (SELECT j1.j1_id AS id FROM j1) AS c"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
     ];
 
     for query in tests {

From 9189a1acddbe0da9ab3cbdb3a317a6a45a561f41 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Fri, 19 Jul 2024 22:02:30 +0200
Subject: [PATCH 095/357] Prevent bigger files from being checked in (#11508)

---
 .github/workflows/large_files.yml | 55 +++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 .github/workflows/large_files.yml

diff --git a/.github/workflows/large_files.yml b/.github/workflows/large_files.yml
new file mode 100644
index 0000000000000..aa96d55a0d851
--- /dev/null
+++ b/.github/workflows/large_files.yml
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Large files PR check
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+on:
+  pull_request:
+
+jobs:
+  check-files:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Check size of new Git objects
+        env:
+          # 1 MB ought to be enough for anybody.
+          # TODO in case we may want to consciously commit a bigger file to the repo without using Git LFS we may disable the check e.g. with a label
+          MAX_FILE_SIZE_BYTES: 1048576
+        shell: bash
+        run: |
+          git rev-list --objects ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }} \
+            > pull-request-objects.txt
+          exit_code=0
+          while read -r id path; do
+            # Skip objects which are not files (commits, trees)
+            if [ ! -z "${path}" ]; then
+              size="$(git cat-file -s "${id}")"
+              if [ "${size}" -gt "${MAX_FILE_SIZE_BYTES}" ]; then
+                exit_code=1
+                echo "Object ${id} [${path}] has size ${size}, exceeding ${MAX_FILE_SIZE_BYTES} limit." >&2
+                echo "::error file=${path}::File ${path} has size ${size}, exceeding ${MAX_FILE_SIZE_BYTES} limit."
+              fi
+            fi
+          done < pull-request-objects.txt
+          exit "${exit_code}"

From ebe61bae2aeda41b576c4a6e6fc96c5a502e7150 Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Sat, 20 Jul 2024 04:22:31 +0800
Subject: [PATCH 096/357] fix: make `UnKnownColumn`s not equal to others
 physical exprs (#11536)

* fix: fall back to `UnionExec` if can't interleave

* alternative fix

* check interleavable in with_new_children

* link to pr
---
 .../physical-expr/src/expressions/column.rs   | 10 ++---
 datafusion/physical-plan/src/union.rs         |  6 +++
 datafusion/sqllogictest/test_files/union.slt  | 45 +++++++++++++++++++
 3 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs
index 38779c54607fb..ab43201ceb75b 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -21,7 +21,6 @@ use std::any::Any;
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
-use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 
 use arrow::{
@@ -95,11 +94,10 @@ impl PhysicalExpr for UnKnownColumn {
 }
 
 impl PartialEq<dyn Any> for UnKnownColumn {
-    fn eq(&self, other: &dyn Any) -> bool {
-        down_cast_any_ref(other)
-            .downcast_ref::<Self>()
-            .map(|x| self == x)
-            .unwrap_or(false)
+    fn eq(&self, _other: &dyn Any) -> bool {
+        // UnknownColumn is not a valid expression, so it should not be equal to any other expression.
+        // See https://github.com/apache/datafusion/pull/11536
+        false
     }
 }
 
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index b39c6aee82b98..24c80048ab4aa 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -431,6 +431,12 @@ impl ExecutionPlan for InterleaveExec {
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
+        // New children are no longer interleavable, which might be a bug of optimization rewrite.
+        if !can_interleave(children.iter()) {
+            return internal_err!(
+                "Can not create InterleaveExec: new children can not be interleaved"
+            );
+        }
         Ok(Arc::new(InterleaveExec::try_new(children)?))
     }
 
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 31b16f975e9ea..2dc8385bf191f 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -602,3 +602,48 @@ physical_plan
 09)--ProjectionExec: expr=[1 as count, MAX(Int64(10))@0 as n]
 10)----AggregateExec: mode=Single, gby=[], aggr=[MAX(Int64(10))]
 11)------PlaceholderRowExec
+
+
+# Test issue: https://github.com/apache/datafusion/issues/11409
+statement ok
+CREATE TABLE t1(v0 BIGINT, v1 BIGINT, v2 BIGINT, v3 BOOLEAN);
+
+statement ok
+CREATE TABLE t2(v0 DOUBLE);
+
+query I
+INSERT INTO t1(v0, v2, v1) VALUES (-1229445667, -342312412, -1507138076);
+----
+1
+
+query I
+INSERT INTO t1(v0, v1) VALUES (1541512604, -1229445667);
+----
+1
+
+query I
+INSERT INTO t1(v1, v3, v0, v2) VALUES (-1020641465, false, -1493773377, 1751276473);
+----
+1
+
+query I
+INSERT INTO t1(v3) VALUES (true), (true), (false);
+----
+3
+
+query I
+INSERT INTO t2(v0) VALUES (0.28014577292925047);
+----
+1
+
+query II
+SELECT t1.v2, t1.v0 FROM t2 NATURAL JOIN t1
+    UNION ALL
+SELECT t1.v2, t1.v0 FROM t2 NATURAL JOIN t1 WHERE (t1.v2 IS NULL);
+----
+
+statement ok
+DROP TABLE t1;
+
+statement ok
+DROP TABLE t2;

From 827d0e3a29c0ea34bafbf03f5102407bd8e9b826 Mon Sep 17 00:00:00 2001
From: Qianqian <130200611+Sevenannn@users.noreply.github.com>
Date: Fri, 19 Jul 2024 13:23:32 -0700
Subject: [PATCH 097/357] Add dialect param to use double precision for float64
 in Postgres (#11495)

* Add dialect param to use double precision for float64 in Postgres

* return ast data type instead of bool

* Fix errors in merging

* fix
---
 datafusion/sql/src/unparser/dialect.rs | 28 ++++++++++++++++++++++++
 datafusion/sql/src/unparser/expr.rs    | 30 +++++++++++++++++++++++++-
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index 1e82fc2b3c1ba..ed0cfddc38273 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -46,11 +46,18 @@ pub trait Dialect: Send + Sync {
         IntervalStyle::PostgresVerbose
     }
 
+    // Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
+    // E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
+    fn float64_ast_dtype(&self) -> sqlparser::ast::DataType {
+        sqlparser::ast::DataType::Double
+    }
+
     // The SQL type to use for Arrow Utf8 unparsing
     // Most dialects use VARCHAR, but some, like MySQL, require CHAR
     fn utf8_cast_dtype(&self) -> ast::DataType {
         ast::DataType::Varchar(None)
     }
+
     // The SQL type to use for Arrow LargeUtf8 unparsing
     // Most dialects use TEXT, but some, like MySQL, require CHAR
     fn large_utf8_cast_dtype(&self) -> ast::DataType {
@@ -98,6 +105,10 @@ impl Dialect for PostgreSqlDialect {
     fn interval_style(&self) -> IntervalStyle {
         IntervalStyle::PostgresVerbose
     }
+
+    fn float64_ast_dtype(&self) -> sqlparser::ast::DataType {
+        sqlparser::ast::DataType::DoublePrecision
+    }
 }
 
 pub struct MySqlDialect {}
@@ -137,6 +148,7 @@ pub struct CustomDialect {
     supports_nulls_first_in_sort: bool,
     use_timestamp_for_date64: bool,
     interval_style: IntervalStyle,
+    float64_ast_dtype: sqlparser::ast::DataType,
     utf8_cast_dtype: ast::DataType,
     large_utf8_cast_dtype: ast::DataType,
 }
@@ -148,6 +160,7 @@ impl Default for CustomDialect {
             supports_nulls_first_in_sort: true,
             use_timestamp_for_date64: false,
             interval_style: IntervalStyle::SQLStandard,
+            float64_ast_dtype: sqlparser::ast::DataType::Double,
             utf8_cast_dtype: ast::DataType::Varchar(None),
             large_utf8_cast_dtype: ast::DataType::Text,
         }
@@ -182,6 +195,10 @@ impl Dialect for CustomDialect {
         self.interval_style
     }
 
+    fn float64_ast_dtype(&self) -> sqlparser::ast::DataType {
+        self.float64_ast_dtype.clone()
+    }
+
     fn utf8_cast_dtype(&self) -> ast::DataType {
         self.utf8_cast_dtype.clone()
     }
@@ -210,6 +227,7 @@ pub struct CustomDialectBuilder {
     supports_nulls_first_in_sort: bool,
     use_timestamp_for_date64: bool,
     interval_style: IntervalStyle,
+    float64_ast_dtype: sqlparser::ast::DataType,
     utf8_cast_dtype: ast::DataType,
     large_utf8_cast_dtype: ast::DataType,
 }
@@ -227,6 +245,7 @@ impl CustomDialectBuilder {
             supports_nulls_first_in_sort: true,
             use_timestamp_for_date64: false,
             interval_style: IntervalStyle::PostgresVerbose,
+            float64_ast_dtype: sqlparser::ast::DataType::Double,
             utf8_cast_dtype: ast::DataType::Varchar(None),
             large_utf8_cast_dtype: ast::DataType::Text,
         }
@@ -238,6 +257,7 @@ impl CustomDialectBuilder {
             supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
             use_timestamp_for_date64: self.use_timestamp_for_date64,
             interval_style: self.interval_style,
+            float64_ast_dtype: self.float64_ast_dtype,
             utf8_cast_dtype: self.utf8_cast_dtype,
             large_utf8_cast_dtype: self.large_utf8_cast_dtype,
         }
@@ -273,6 +293,14 @@ impl CustomDialectBuilder {
         self
     }
 
+    pub fn with_float64_ast_dtype(
+        mut self,
+        float64_ast_dtype: sqlparser::ast::DataType,
+    ) -> Self {
+        self.float64_ast_dtype = float64_ast_dtype;
+        self
+    }
+
     pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
         self.utf8_cast_dtype = utf8_cast_dtype;
         self
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 950e7e11288a7..2f7854c1a183c 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -1240,7 +1240,7 @@ impl Unparser<'_> {
                 not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
             }
             DataType::Float32 => Ok(ast::DataType::Float(None)),
-            DataType::Float64 => Ok(ast::DataType::Double),
+            DataType::Float64 => Ok(self.dialect.float64_ast_dtype()),
             DataType::Timestamp(_, tz) => {
                 let tz_info = match tz {
                     Some(_) => TimezoneInfo::WithTimeZone,
@@ -1822,6 +1822,34 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn custom_dialect_float64_ast_dtype() -> Result<()> {
+        for (float64_ast_dtype, identifier) in [
+            (sqlparser::ast::DataType::Double, "DOUBLE"),
+            (
+                sqlparser::ast::DataType::DoublePrecision,
+                "DOUBLE PRECISION",
+            ),
+        ] {
+            let dialect = CustomDialectBuilder::new()
+                .with_float64_ast_dtype(float64_ast_dtype)
+                .build();
+            let unparser = Unparser::new(&dialect);
+
+            let expr = Expr::Cast(Cast {
+                expr: Box::new(col("a")),
+                data_type: DataType::Float64,
+            });
+            let ast = unparser.expr_to_sql(&expr)?;
+
+            let actual = format!("{}", ast);
+
+            let expected = format!(r#"CAST(a AS {identifier})"#);
+            assert_eq!(actual, expected);
+        }
+        Ok(())
+    }
+
     #[test]
     fn customer_dialect_support_nulls_first_in_ort() -> Result<()> {
         let tests: Vec<(Expr, &str, bool)> = vec![

From 5da7ab300215c44ca5dc16771091890de22af99b Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 20 Jul 2024 09:19:22 -0400
Subject: [PATCH 098/357] Minor: move `SessionStateDefaults` into its own
 module (#11566)

* Minor: move `SessionStateDefaults` into its own module

* Fix no default features
---
 .../core/src/datasource/listing/table.rs      |   2 +-
 .../core/src/datasource/schema_adapter.rs     |   1 +
 datafusion/core/src/execution/mod.rs          |   3 +
 .../core/src/execution/session_state.rs       | 185 +---------------
 .../src/execution/session_state_defaults.rs   | 202 ++++++++++++++++++
 5 files changed, 211 insertions(+), 182 deletions(-)
 create mode 100644 datafusion/core/src/execution/session_state_defaults.rs

diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 1a7390d46f898..4d0a7738b0392 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -1038,8 +1038,8 @@ mod tests {
     use crate::datasource::file_format::avro::AvroFormat;
     use crate::datasource::file_format::csv::CsvFormat;
     use crate::datasource::file_format::json::JsonFormat;
-    use crate::datasource::file_format::parquet::ParquetFormat;
     #[cfg(feature = "parquet")]
+    use crate::datasource::file_format::parquet::ParquetFormat;
     use crate::datasource::{provider_as_source, MemTable};
     use crate::execution::options::ArrowReadOptions;
     use crate::physical_plan::collect;
diff --git a/datafusion/core/src/datasource/schema_adapter.rs b/datafusion/core/src/datasource/schema_adapter.rs
index 715e2da5d9781..f485c49e91097 100644
--- a/datafusion/core/src/datasource/schema_adapter.rs
+++ b/datafusion/core/src/datasource/schema_adapter.rs
@@ -246,6 +246,7 @@ mod tests {
     use crate::datasource::schema_adapter::{
         SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
     };
+    #[cfg(feature = "parquet")]
     use parquet::arrow::ArrowWriter;
     use tempfile::TempDir;
 
diff --git a/datafusion/core/src/execution/mod.rs b/datafusion/core/src/execution/mod.rs
index ac02c73172567..a1b3eab25f33a 100644
--- a/datafusion/core/src/execution/mod.rs
+++ b/datafusion/core/src/execution/mod.rs
@@ -19,6 +19,9 @@
 
 pub mod context;
 pub mod session_state;
+mod session_state_defaults;
+
+pub use session_state_defaults::SessionStateDefaults;
 
 // backwards compatibility
 pub use crate::datasource::file_format::options;
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 0824b249b7d15..59cc620dae4d0 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -18,29 +18,17 @@
 //! [`SessionState`]: information required to run queries in a session
 
 use crate::catalog::information_schema::{InformationSchemaProvider, INFORMATION_SCHEMA};
-use crate::catalog::listing_schema::ListingSchemaProvider;
-use crate::catalog::schema::{MemorySchemaProvider, SchemaProvider};
-use crate::catalog::{
-    CatalogProvider, CatalogProviderList, MemoryCatalogProvider,
-    MemoryCatalogProviderList,
-};
+use crate::catalog::schema::SchemaProvider;
+use crate::catalog::{CatalogProviderList, MemoryCatalogProviderList};
 use crate::datasource::cte_worktable::CteWorkTable;
-use crate::datasource::file_format::arrow::ArrowFormatFactory;
-use crate::datasource::file_format::avro::AvroFormatFactory;
-use crate::datasource::file_format::csv::CsvFormatFactory;
-use crate::datasource::file_format::json::JsonFormatFactory;
-#[cfg(feature = "parquet")]
-use crate::datasource::file_format::parquet::ParquetFormatFactory;
 use crate::datasource::file_format::{format_as_file_type, FileFormatFactory};
 use crate::datasource::function::{TableFunction, TableFunctionImpl};
-use crate::datasource::provider::{DefaultTableFactory, TableProviderFactory};
+use crate::datasource::provider::TableProviderFactory;
 use crate::datasource::provider_as_source;
 use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner};
-#[cfg(feature = "array_expressions")]
-use crate::functions_array;
+use crate::execution::SessionStateDefaults;
 use crate::physical_optimizer::optimizer::PhysicalOptimizer;
 use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
-use crate::{functions, functions_aggregate};
 use arrow_schema::{DataType, SchemaRef};
 use async_trait::async_trait;
 use chrono::{DateTime, Utc};
@@ -54,7 +42,6 @@ use datafusion_common::{
     ResolvedTableReference, TableReference,
 };
 use datafusion_execution::config::SessionConfig;
-use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_execution::TaskContext;
 use datafusion_expr::execution_props::ExecutionProps;
@@ -85,7 +72,6 @@ use std::collections::hash_map::Entry;
 use std::collections::{HashMap, HashSet};
 use std::fmt::Debug;
 use std::sync::Arc;
-use url::Url;
 use uuid::Uuid;
 
 /// Execution context for registering data sources and executing queries.
@@ -1420,169 +1406,6 @@ impl From<SessionState> for SessionStateBuilder {
     }
 }
 
-/// Defaults that are used as part of creating a SessionState such as table providers,
-/// file formats, registering of builtin functions, etc.
-pub struct SessionStateDefaults {}
-
-impl SessionStateDefaults {
-    /// returns a map of the default [`TableProviderFactory`]s
-    pub fn default_table_factories() -> HashMap<String, Arc<dyn TableProviderFactory>> {
-        let mut table_factories: HashMap<String, Arc<dyn TableProviderFactory>> =
-            HashMap::new();
-        #[cfg(feature = "parquet")]
-        table_factories.insert("PARQUET".into(), Arc::new(DefaultTableFactory::new()));
-        table_factories.insert("CSV".into(), Arc::new(DefaultTableFactory::new()));
-        table_factories.insert("JSON".into(), Arc::new(DefaultTableFactory::new()));
-        table_factories.insert("NDJSON".into(), Arc::new(DefaultTableFactory::new()));
-        table_factories.insert("AVRO".into(), Arc::new(DefaultTableFactory::new()));
-        table_factories.insert("ARROW".into(), Arc::new(DefaultTableFactory::new()));
-
-        table_factories
-    }
-
-    /// returns the default MemoryCatalogProvider
-    pub fn default_catalog(
-        config: &SessionConfig,
-        table_factories: &HashMap<String, Arc<dyn TableProviderFactory>>,
-        runtime: &Arc<RuntimeEnv>,
-    ) -> MemoryCatalogProvider {
-        let default_catalog = MemoryCatalogProvider::new();
-
-        default_catalog
-            .register_schema(
-                &config.options().catalog.default_schema,
-                Arc::new(MemorySchemaProvider::new()),
-            )
-            .expect("memory catalog provider can register schema");
-
-        Self::register_default_schema(config, table_factories, runtime, &default_catalog);
-
-        default_catalog
-    }
-
-    /// returns the list of default [`ExprPlanner`]s
-    pub fn default_expr_planners() -> Vec<Arc<dyn ExprPlanner>> {
-        let expr_planners: Vec<Arc<dyn ExprPlanner>> = vec![
-            Arc::new(functions::core::planner::CoreFunctionPlanner::default()),
-            // register crate of array expressions (if enabled)
-            #[cfg(feature = "array_expressions")]
-            Arc::new(functions_array::planner::ArrayFunctionPlanner),
-            #[cfg(feature = "array_expressions")]
-            Arc::new(functions_array::planner::FieldAccessPlanner),
-            #[cfg(any(
-                feature = "datetime_expressions",
-                feature = "unicode_expressions"
-            ))]
-            Arc::new(functions::planner::UserDefinedFunctionPlanner),
-        ];
-
-        expr_planners
-    }
-
-    /// returns the list of default [`ScalarUDF']'s
-    pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
-        let mut functions: Vec<Arc<ScalarUDF>> = functions::all_default_functions();
-        #[cfg(feature = "array_expressions")]
-        functions.append(&mut functions_array::all_default_array_functions());
-
-        functions
-    }
-
-    /// returns the list of default [`AggregateUDF']'s
-    pub fn default_aggregate_functions() -> Vec<Arc<AggregateUDF>> {
-        functions_aggregate::all_default_aggregate_functions()
-    }
-
-    /// returns the list of default [`FileFormatFactory']'s
-    pub fn default_file_formats() -> Vec<Arc<dyn FileFormatFactory>> {
-        let file_formats: Vec<Arc<dyn FileFormatFactory>> = vec![
-            #[cfg(feature = "parquet")]
-            Arc::new(ParquetFormatFactory::new()),
-            Arc::new(JsonFormatFactory::new()),
-            Arc::new(CsvFormatFactory::new()),
-            Arc::new(ArrowFormatFactory::new()),
-            Arc::new(AvroFormatFactory::new()),
-        ];
-
-        file_formats
-    }
-
-    /// registers all builtin functions - scalar, array and aggregate
-    pub fn register_builtin_functions(state: &mut SessionState) {
-        Self::register_scalar_functions(state);
-        Self::register_array_functions(state);
-        Self::register_aggregate_functions(state);
-    }
-
-    /// registers all the builtin scalar functions
-    pub fn register_scalar_functions(state: &mut SessionState) {
-        functions::register_all(state).expect("can not register built in functions");
-    }
-
-    /// registers all the builtin array functions
-    pub fn register_array_functions(state: &mut SessionState) {
-        // register crate of array expressions (if enabled)
-        #[cfg(feature = "array_expressions")]
-        functions_array::register_all(state).expect("can not register array expressions");
-    }
-
-    /// registers all the builtin aggregate functions
-    pub fn register_aggregate_functions(state: &mut SessionState) {
-        functions_aggregate::register_all(state)
-            .expect("can not register aggregate functions");
-    }
-
-    /// registers the default schema
-    pub fn register_default_schema(
-        config: &SessionConfig,
-        table_factories: &HashMap<String, Arc<dyn TableProviderFactory>>,
-        runtime: &Arc<RuntimeEnv>,
-        default_catalog: &MemoryCatalogProvider,
-    ) {
-        let url = config.options().catalog.location.as_ref();
-        let format = config.options().catalog.format.as_ref();
-        let (url, format) = match (url, format) {
-            (Some(url), Some(format)) => (url, format),
-            _ => return,
-        };
-        let url = url.to_string();
-        let format = format.to_string();
-
-        let url = Url::parse(url.as_str()).expect("Invalid default catalog location!");
-        let authority = match url.host_str() {
-            Some(host) => format!("{}://{}", url.scheme(), host),
-            None => format!("{}://", url.scheme()),
-        };
-        let path = &url.as_str()[authority.len()..];
-        let path = object_store::path::Path::parse(path).expect("Can't parse path");
-        let store = ObjectStoreUrl::parse(authority.as_str())
-            .expect("Invalid default catalog url");
-        let store = match runtime.object_store(store) {
-            Ok(store) => store,
-            _ => return,
-        };
-        let factory = match table_factories.get(format.as_str()) {
-            Some(factory) => factory,
-            _ => return,
-        };
-        let schema =
-            ListingSchemaProvider::new(authority, path, factory.clone(), store, format);
-        let _ = default_catalog
-            .register_schema("default", Arc::new(schema))
-            .expect("Failed to register default schema");
-    }
-
-    /// registers the default [`FileFormatFactory`]s
-    pub fn register_default_file_formats(state: &mut SessionState) {
-        let formats = SessionStateDefaults::default_file_formats();
-        for format in formats {
-            if let Err(e) = state.register_file_format(format, false) {
-                log::info!("Unable to register default file format: {e}")
-            };
-        }
-    }
-}
-
 /// Adapter that implements the [`ContextProvider`] trait for a [`SessionState`]
 ///
 /// This is used so the SQL planner can access the state of the session without
diff --git a/datafusion/core/src/execution/session_state_defaults.rs b/datafusion/core/src/execution/session_state_defaults.rs
new file mode 100644
index 0000000000000..0b0465e446054
--- /dev/null
+++ b/datafusion/core/src/execution/session_state_defaults.rs
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::catalog::listing_schema::ListingSchemaProvider;
+use crate::catalog::{CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider};
+use crate::datasource::file_format::arrow::ArrowFormatFactory;
+use crate::datasource::file_format::avro::AvroFormatFactory;
+use crate::datasource::file_format::csv::CsvFormatFactory;
+use crate::datasource::file_format::json::JsonFormatFactory;
+#[cfg(feature = "parquet")]
+use crate::datasource::file_format::parquet::ParquetFormatFactory;
+use crate::datasource::file_format::FileFormatFactory;
+use crate::datasource::provider::{DefaultTableFactory, TableProviderFactory};
+use crate::execution::context::SessionState;
+#[cfg(feature = "array_expressions")]
+use crate::functions_array;
+use crate::{functions, functions_aggregate};
+use datafusion_execution::config::SessionConfig;
+use datafusion_execution::object_store::ObjectStoreUrl;
+use datafusion_execution::runtime_env::RuntimeEnv;
+use datafusion_expr::planner::ExprPlanner;
+use datafusion_expr::{AggregateUDF, ScalarUDF};
+use std::collections::HashMap;
+use std::sync::Arc;
+use url::Url;
+
+/// Defaults that are used as part of creating a SessionState such as table providers,
+/// file formats, registering of builtin functions, etc.
+pub struct SessionStateDefaults {}
+
+impl SessionStateDefaults {
+    /// returns a map of the default [`TableProviderFactory`]s
+    pub fn default_table_factories() -> HashMap<String, Arc<dyn TableProviderFactory>> {
+        let mut table_factories: HashMap<String, Arc<dyn TableProviderFactory>> =
+            HashMap::new();
+        #[cfg(feature = "parquet")]
+        table_factories.insert("PARQUET".into(), Arc::new(DefaultTableFactory::new()));
+        table_factories.insert("CSV".into(), Arc::new(DefaultTableFactory::new()));
+        table_factories.insert("JSON".into(), Arc::new(DefaultTableFactory::new()));
+        table_factories.insert("NDJSON".into(), Arc::new(DefaultTableFactory::new()));
+        table_factories.insert("AVRO".into(), Arc::new(DefaultTableFactory::new()));
+        table_factories.insert("ARROW".into(), Arc::new(DefaultTableFactory::new()));
+
+        table_factories
+    }
+
+    /// returns the default MemoryCatalogProvider
+    pub fn default_catalog(
+        config: &SessionConfig,
+        table_factories: &HashMap<String, Arc<dyn TableProviderFactory>>,
+        runtime: &Arc<RuntimeEnv>,
+    ) -> MemoryCatalogProvider {
+        let default_catalog = MemoryCatalogProvider::new();
+
+        default_catalog
+            .register_schema(
+                &config.options().catalog.default_schema,
+                Arc::new(MemorySchemaProvider::new()),
+            )
+            .expect("memory catalog provider can register schema");
+
+        Self::register_default_schema(config, table_factories, runtime, &default_catalog);
+
+        default_catalog
+    }
+
+    /// returns the list of default [`ExprPlanner`]s
+    pub fn default_expr_planners() -> Vec<Arc<dyn ExprPlanner>> {
+        let expr_planners: Vec<Arc<dyn ExprPlanner>> = vec![
+            Arc::new(functions::core::planner::CoreFunctionPlanner::default()),
+            // register crate of array expressions (if enabled)
+            #[cfg(feature = "array_expressions")]
+            Arc::new(functions_array::planner::ArrayFunctionPlanner),
+            #[cfg(feature = "array_expressions")]
+            Arc::new(functions_array::planner::FieldAccessPlanner),
+            #[cfg(any(
+                feature = "datetime_expressions",
+                feature = "unicode_expressions"
+            ))]
+            Arc::new(functions::planner::UserDefinedFunctionPlanner),
+        ];
+
+        expr_planners
+    }
+
+    /// returns the list of default [`ScalarUDF']'s
+    pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
+        let mut functions: Vec<Arc<ScalarUDF>> = functions::all_default_functions();
+        #[cfg(feature = "array_expressions")]
+        functions.append(&mut functions_array::all_default_array_functions());
+
+        functions
+    }
+
+    /// returns the list of default [`AggregateUDF']'s
+    pub fn default_aggregate_functions() -> Vec<Arc<AggregateUDF>> {
+        functions_aggregate::all_default_aggregate_functions()
+    }
+
+    /// returns the list of default [`FileFormatFactory']'s
+    pub fn default_file_formats() -> Vec<Arc<dyn FileFormatFactory>> {
+        let file_formats: Vec<Arc<dyn FileFormatFactory>> = vec![
+            #[cfg(feature = "parquet")]
+            Arc::new(ParquetFormatFactory::new()),
+            Arc::new(JsonFormatFactory::new()),
+            Arc::new(CsvFormatFactory::new()),
+            Arc::new(ArrowFormatFactory::new()),
+            Arc::new(AvroFormatFactory::new()),
+        ];
+
+        file_formats
+    }
+
+    /// registers all builtin functions - scalar, array and aggregate
+    pub fn register_builtin_functions(state: &mut SessionState) {
+        Self::register_scalar_functions(state);
+        Self::register_array_functions(state);
+        Self::register_aggregate_functions(state);
+    }
+
+    /// registers all the builtin scalar functions
+    pub fn register_scalar_functions(state: &mut SessionState) {
+        functions::register_all(state).expect("can not register built in functions");
+    }
+
+    /// registers all the builtin array functions
+    pub fn register_array_functions(state: &mut SessionState) {
+        // register crate of array expressions (if enabled)
+        #[cfg(feature = "array_expressions")]
+        functions_array::register_all(state).expect("can not register array expressions");
+    }
+
+    /// registers all the builtin aggregate functions
+    pub fn register_aggregate_functions(state: &mut SessionState) {
+        functions_aggregate::register_all(state)
+            .expect("can not register aggregate functions");
+    }
+
+    /// registers the default schema
+    pub fn register_default_schema(
+        config: &SessionConfig,
+        table_factories: &HashMap<String, Arc<dyn TableProviderFactory>>,
+        runtime: &Arc<RuntimeEnv>,
+        default_catalog: &MemoryCatalogProvider,
+    ) {
+        let url = config.options().catalog.location.as_ref();
+        let format = config.options().catalog.format.as_ref();
+        let (url, format) = match (url, format) {
+            (Some(url), Some(format)) => (url, format),
+            _ => return,
+        };
+        let url = url.to_string();
+        let format = format.to_string();
+
+        let url = Url::parse(url.as_str()).expect("Invalid default catalog location!");
+        let authority = match url.host_str() {
+            Some(host) => format!("{}://{}", url.scheme(), host),
+            None => format!("{}://", url.scheme()),
+        };
+        let path = &url.as_str()[authority.len()..];
+        let path = object_store::path::Path::parse(path).expect("Can't parse path");
+        let store = ObjectStoreUrl::parse(authority.as_str())
+            .expect("Invalid default catalog url");
+        let store = match runtime.object_store(store) {
+            Ok(store) => store,
+            _ => return,
+        };
+        let factory = match table_factories.get(format.as_str()) {
+            Some(factory) => factory,
+            _ => return,
+        };
+        let schema =
+            ListingSchemaProvider::new(authority, path, factory.clone(), store, format);
+        let _ = default_catalog
+            .register_schema("default", Arc::new(schema))
+            .expect("Failed to register default schema");
+    }
+
+    /// registers the default [`FileFormatFactory`]s
+    pub fn register_default_file_formats(state: &mut SessionState) {
+        let formats = SessionStateDefaults::default_file_formats();
+        for format in formats {
+            if let Err(e) = state.register_file_format(format, false) {
+                log::info!("Unable to register default file format: {e}")
+            };
+        }
+    }
+}

From 7df2bde8fc12554ad92b8941f7916069c1651f11 Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Sun, 21 Jul 2024 05:31:26 -0700
Subject: [PATCH 099/357] fix: fixes trig function order by (#11559)

* fix: remove assert

* tests: add tests from ticket

* tests: clean up table
---
 datafusion/common/src/scalar/mod.rs           |  3 --
 datafusion/sqllogictest/test_files/scalar.slt | 34 +++++++++++++++++++
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 38f70e4c1466c..0651013901154 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -1063,7 +1063,6 @@ impl ScalarValue {
 
     /// Create an one value in the given type.
     pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
-        assert!(datatype.is_primitive());
         Ok(match datatype {
             DataType::Int8 => ScalarValue::Int8(Some(1)),
             DataType::Int16 => ScalarValue::Int16(Some(1)),
@@ -1086,7 +1085,6 @@ impl ScalarValue {
 
     /// Create a negative one value in the given type.
     pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
-        assert!(datatype.is_primitive());
         Ok(match datatype {
             DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
             DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
@@ -1104,7 +1102,6 @@ impl ScalarValue {
     }
 
     pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
-        assert!(datatype.is_primitive());
         Ok(match datatype {
             DataType::Int8 => ScalarValue::Int8(Some(10)),
             DataType::Int16 => ScalarValue::Int16(Some(10)),
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index dd19a1344139d..48f94fc080a4f 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1982,3 +1982,37 @@ query I
 select strpos('joséésoj', arrow_cast(null, 'Utf8'));
 ----
 NULL
+
+statement ok
+CREATE TABLE t1 (v1 int) AS VALUES (1), (2), (3);
+
+query I
+SELECT * FROM t1 ORDER BY ACOS(SIN(v1));
+----
+2
+1
+3
+
+query I
+SELECT * FROM t1 ORDER BY ACOSH(SIN(v1));
+----
+1
+2
+3
+
+query I
+SELECT * FROM t1 ORDER BY ASIN(SIN(v1));
+----
+3
+1
+2
+
+query I
+SELECT * FROM t1 ORDER BY ATANH(SIN(v1));
+----
+3
+1
+2
+
+statement ok
+drop table t1;

From d232065c3b710d0c8e035de49730238a30073eb2 Mon Sep 17 00:00:00 2001
From: Lorrens Pantelis <100197010+LorrensP-2158466@users.noreply.github.com>
Date: Sun, 21 Jul 2024 14:31:41 +0200
Subject: [PATCH 100/357] refactor: rewrite mega type to an enum containing
 both cases (#11539)

---
 .../file_format/write/orchestration.rs        | 62 ++++++++++++++-----
 1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/write/orchestration.rs b/datafusion/core/src/datasource/file_format/write/orchestration.rs
index f788865b070f7..1d32063ee9f3f 100644
--- a/datafusion/core/src/datasource/file_format/write/orchestration.rs
+++ b/datafusion/core/src/datasource/file_format/write/orchestration.rs
@@ -42,6 +42,37 @@ use tokio::task::JoinSet;
 type WriterType = Box<dyn AsyncWrite + Send + Unpin>;
 type SerializerType = Arc<dyn BatchSerializer>;
 
+/// Result of calling [`serialize_rb_stream_to_object_store`]
+pub(crate) enum SerializedRecordBatchResult {
+    Success {
+        /// the writer
+        writer: WriterType,
+
+        /// the number of rows successfully written
+        row_count: usize,
+    },
+    Failure {
+        /// As explained in [`serialize_rb_stream_to_object_store`]:
+        /// - If an IO error occured that involved the ObjectStore writer, then the writer will not be returned to the caller
+        /// - Otherwise, the writer is returned to the caller
+        writer: Option<WriterType>,
+
+        /// the actual error that occured
+        err: DataFusionError,
+    },
+}
+
+impl SerializedRecordBatchResult {
+    /// Create the success variant
+    pub fn success(writer: WriterType, row_count: usize) -> Self {
+        Self::Success { writer, row_count }
+    }
+
+    pub fn failure(writer: Option<WriterType>, err: DataFusionError) -> Self {
+        Self::Failure { writer, err }
+    }
+}
+
 /// Serializes a single data stream in parallel and writes to an ObjectStore concurrently.
 /// Data order is preserved.
 ///
@@ -55,7 +86,7 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
     mut data_rx: Receiver<RecordBatch>,
     serializer: Arc<dyn BatchSerializer>,
     mut writer: WriterType,
-) -> std::result::Result<(WriterType, u64), (Option<WriterType>, DataFusionError)> {
+) -> SerializedRecordBatchResult {
     let (tx, mut rx) =
         mpsc::channel::<SpawnedTask<Result<(usize, Bytes), DataFusionError>>>(100);
     let serialize_task = SpawnedTask::spawn(async move {
@@ -86,43 +117,43 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
                 match writer.write_all(&bytes).await {
                     Ok(_) => (),
                     Err(e) => {
-                        return Err((
+                        return SerializedRecordBatchResult::failure(
                             None,
                             DataFusionError::Execution(format!(
                                 "Error writing to object store: {e}"
                             )),
-                        ))
+                        )
                     }
                 };
                 row_count += cnt;
             }
             Ok(Err(e)) => {
                 // Return the writer along with the error
-                return Err((Some(writer), e));
+                return SerializedRecordBatchResult::failure(Some(writer), e);
             }
             Err(e) => {
                 // Handle task panic or cancellation
-                return Err((
+                return SerializedRecordBatchResult::failure(
                     Some(writer),
                     DataFusionError::Execution(format!(
                         "Serialization task panicked or was cancelled: {e}"
                     )),
-                ));
+                );
             }
         }
     }
 
     match serialize_task.join().await {
         Ok(Ok(_)) => (),
-        Ok(Err(e)) => return Err((Some(writer), e)),
+        Ok(Err(e)) => return SerializedRecordBatchResult::failure(Some(writer), e),
         Err(_) => {
-            return Err((
+            return SerializedRecordBatchResult::failure(
                 Some(writer),
                 internal_datafusion_err!("Unknown error writing to object store"),
-            ))
+            )
         }
     }
-    Ok((writer, row_count as u64))
+    SerializedRecordBatchResult::success(writer, row_count)
 }
 
 type FileWriteBundle = (Receiver<RecordBatch>, SerializerType, WriterType);
@@ -153,14 +184,17 @@ pub(crate) async fn stateless_serialize_and_write_files(
     while let Some(result) = join_set.join_next().await {
         match result {
             Ok(res) => match res {
-                Ok((writer, cnt)) => {
+                SerializedRecordBatchResult::Success {
+                    writer,
+                    row_count: cnt,
+                } => {
                     finished_writers.push(writer);
                     row_count += cnt;
                 }
-                Err((writer, e)) => {
+                SerializedRecordBatchResult::Failure { writer, err } => {
                     finished_writers.extend(writer);
                     any_errors = true;
-                    triggering_error = Some(e);
+                    triggering_error = Some(err);
                 }
             },
             Err(e) => {
@@ -193,7 +227,7 @@ pub(crate) async fn stateless_serialize_and_write_files(
         }
     }
 
-    tx.send(row_count).map_err(|_| {
+    tx.send(row_count as u64).map_err(|_| {
         internal_datafusion_err!(
             "Error encountered while sending row count back to file sink!"
         )

From 36660fe10d9c0cdff62e0da0b94bee28422d3419 Mon Sep 17 00:00:00 2001
From: Dharan Aditya <dharan.aditya@gmail.com>
Date: Sun, 21 Jul 2024 18:03:27 +0530
Subject: [PATCH 101/357] Move `sql_compound_identifier_to_expr ` to
 `ExprPlanner` (#11487)

* move get_field to expr planner

* formatting

* formatting

* documentation

* refactor

* documentation & fix's

* move optimizer tests to core

* fix breaking tc's

* cleanup

* fix examples

* formatting

* rm datafusion-functions from optimizer

* update compound identifier

* update planner

* update planner

* formatting

* reverting optimizer tests

* formatting
---
 datafusion/expr/src/planner.rs            | 19 +++++++++-
 datafusion/functions/src/core/mod.rs      |  1 -
 datafusion/functions/src/core/planner.rs  | 27 +++++++++++++-
 datafusion/sql/examples/sql.rs            | 20 +++++++++-
 datafusion/sql/src/expr/identifier.rs     | 45 +++++++++++------------
 datafusion/sql/tests/cases/plan_to_sql.rs | 11 ++++--
 datafusion/sql/tests/common/mod.rs        | 11 ++++++
 datafusion/sql/tests/sql_integration.rs   |  5 ++-
 8 files changed, 106 insertions(+), 33 deletions(-)

diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs
index 415af1bf94dce..c775427df1384 100644
--- a/datafusion/expr/src/planner.rs
+++ b/datafusion/expr/src/planner.rs
@@ -19,7 +19,7 @@
 
 use std::sync::Arc;
 
-use arrow::datatypes::{DataType, SchemaRef};
+use arrow::datatypes::{DataType, Field, SchemaRef};
 use datafusion_common::{
     config::ConfigOptions, file_options::file_type::FileType, not_impl_err, DFSchema,
     Result, TableReference,
@@ -180,6 +180,23 @@ pub trait ExprPlanner: Send + Sync {
     fn plan_make_map(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
         Ok(PlannerResult::Original(args))
     }
+
+    /// Plans compound identifier eg `db.schema.table` for non-empty nested names
+    ///
+    /// Note:
+    /// Currently compound identifier for outer query schema is not supported.
+    ///
+    /// Returns planned expression
+    fn plan_compound_identifier(
+        &self,
+        _field: &Field,
+        _qualifier: Option<&TableReference>,
+        _nested_names: &[String],
+    ) -> Result<PlannerResult<Vec<Expr>>> {
+        not_impl_err!(
+            "Default planner compound identifier hasn't been implemented for ExprPlanner"
+        )
+    }
 }
 
 /// An operator with two arguments to plan
diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index cbfaa592b012b..ee0309e593820 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -100,7 +100,6 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         nvl2(),
         arrow_typeof(),
         named_struct(),
-        get_field(),
         coalesce(),
         map(),
     ]
diff --git a/datafusion/functions/src/core/planner.rs b/datafusion/functions/src/core/planner.rs
index 63eaa9874c2b9..889f191d592f5 100644
--- a/datafusion/functions/src/core/planner.rs
+++ b/datafusion/functions/src/core/planner.rs
@@ -15,11 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion_common::DFSchema;
+use arrow::datatypes::Field;
 use datafusion_common::Result;
+use datafusion_common::{not_impl_err, Column, DFSchema, ScalarValue, TableReference};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::planner::{ExprPlanner, PlannerResult, RawDictionaryExpr};
-use datafusion_expr::Expr;
+use datafusion_expr::{lit, Expr};
 
 use super::named_struct;
 
@@ -62,4 +63,26 @@ impl ExprPlanner for CoreFunctionPlanner {
             ScalarFunction::new_udf(crate::string::overlay(), args),
         )))
     }
+
+    fn plan_compound_identifier(
+        &self,
+        field: &Field,
+        qualifier: Option<&TableReference>,
+        nested_names: &[String],
+    ) -> Result<PlannerResult<Vec<Expr>>> {
+        // TODO: remove when can support multiple nested identifiers
+        if nested_names.len() > 1 {
+            return not_impl_err!(
+                "Nested identifiers not yet supported for column {}",
+                Column::from((qualifier, field)).quoted_flat_name()
+            );
+        }
+        let nested_name = nested_names[0].to_string();
+
+        let col = Expr::Column(Column::from((qualifier, field)));
+        let get_field_args = vec![col, lit(ScalarValue::from(nested_name))];
+        Ok(PlannerResult::Planned(Expr::ScalarFunction(
+            ScalarFunction::new_udf(crate::core::get_field(), get_field_args),
+        )))
+    }
 }
diff --git a/datafusion/sql/examples/sql.rs b/datafusion/sql/examples/sql.rs
index b724afabaf097..d9ee1b4db8e2d 100644
--- a/datafusion/sql/examples/sql.rs
+++ b/datafusion/sql/examples/sql.rs
@@ -15,13 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::{collections::HashMap, sync::Arc};
+
 use arrow_schema::{DataType, Field, Schema};
+
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::{plan_err, Result};
+use datafusion_expr::planner::ExprPlanner;
 use datafusion_expr::WindowUDF;
 use datafusion_expr::{
     logical_plan::builder::LogicalTableSource, AggregateUDF, ScalarUDF, TableSource,
 };
+use datafusion_functions::core::planner::CoreFunctionPlanner;
 use datafusion_functions_aggregate::count::count_udaf;
 use datafusion_functions_aggregate::sum::sum_udaf;
 use datafusion_sql::{
@@ -29,7 +34,6 @@ use datafusion_sql::{
     sqlparser::{dialect::GenericDialect, parser::Parser},
     TableReference,
 };
-use std::{collections::HashMap, sync::Arc};
 
 fn main() {
     let sql = "SELECT \
@@ -53,7 +57,8 @@ fn main() {
     // create a logical query plan
     let context_provider = MyContextProvider::new()
         .with_udaf(sum_udaf())
-        .with_udaf(count_udaf());
+        .with_udaf(count_udaf())
+        .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
     let sql_to_rel = SqlToRel::new(&context_provider);
     let plan = sql_to_rel.sql_statement_to_plan(statement.clone()).unwrap();
 
@@ -65,6 +70,7 @@ struct MyContextProvider {
     options: ConfigOptions,
     tables: HashMap<String, Arc<dyn TableSource>>,
     udafs: HashMap<String, Arc<AggregateUDF>>,
+    expr_planners: Vec<Arc<dyn ExprPlanner>>,
 }
 
 impl MyContextProvider {
@@ -73,6 +79,11 @@ impl MyContextProvider {
         self
     }
 
+    fn with_expr_planner(mut self, planner: Arc<dyn ExprPlanner>) -> Self {
+        self.expr_planners.push(planner);
+        self
+    }
+
     fn new() -> Self {
         let mut tables = HashMap::new();
         tables.insert(
@@ -105,6 +116,7 @@ impl MyContextProvider {
             tables,
             options: Default::default(),
             udafs: Default::default(),
+            expr_planners: vec![],
         }
     }
 }
@@ -154,4 +166,8 @@ impl ContextProvider for MyContextProvider {
     fn udwf_names(&self) -> Vec<String> {
         Vec::new()
     }
+
+    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
+        &self.expr_planners
+    }
 }
diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs
index 39736b1fbba59..f8979bde30867 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -15,14 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use arrow_schema::Field;
+use sqlparser::ast::{Expr as SQLExpr, Ident};
+
 use datafusion_common::{
     internal_err, not_impl_err, plan_datafusion_err, Column, DFSchema, DataFusionError,
-    Result, ScalarValue, TableReference,
+    Result, TableReference,
 };
-use datafusion_expr::{expr::ScalarFunction, lit, Case, Expr};
-use sqlparser::ast::{Expr as SQLExpr, Ident};
+use datafusion_expr::planner::PlannerResult;
+use datafusion_expr::{Case, Expr};
+
+use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     pub(super) fn sql_identifier_to_expr(
@@ -125,26 +128,22 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             match search_result {
                 // found matching field with spare identifier(s) for nested field(s) in structure
                 Some((field, qualifier, nested_names)) if !nested_names.is_empty() => {
-                    // TODO: remove when can support multiple nested identifiers
-                    if nested_names.len() > 1 {
-                        return not_impl_err!(
-                            "Nested identifiers not yet supported for column {}",
-                            Column::from((qualifier, field)).quoted_flat_name()
-                        );
-                    }
-                    let nested_name = nested_names[0].to_string();
-
-                    let col = Expr::Column(Column::from((qualifier, field)));
-                    if let Some(udf) =
-                        self.context_provider.get_function_meta("get_field")
-                    {
-                        Ok(Expr::ScalarFunction(ScalarFunction::new_udf(
-                            udf,
-                            vec![col, lit(ScalarValue::from(nested_name))],
-                        )))
-                    } else {
-                        internal_err!("get_field not found")
+                    // found matching field with spare identifier(s) for nested field(s) in structure
+                    for planner in self.context_provider.get_expr_planners() {
+                        if let Ok(planner_result) = planner.plan_compound_identifier(
+                            field,
+                            qualifier,
+                            nested_names,
+                        ) {
+                            match planner_result {
+                                PlannerResult::Planned(expr) => return Ok(expr),
+                                PlannerResult::Original(_args) => {}
+                            }
+                        }
                     }
+                    not_impl_err!(
+                        "Compound identifiers not supported by ExprPlanner: {ids:?}"
+                    )
                 }
                 // found matching field with no spare identifier(s)
                 Some((field, qualifier, _nested_names)) => {
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index ed79a1dfc0c74..e9c4114353c07 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::sync::Arc;
 use std::vec;
 
 use arrow_schema::*;
@@ -28,6 +29,7 @@ use datafusion_sql::unparser::dialect::{
 };
 use datafusion_sql::unparser::{expr_to_sql, plan_to_sql, Unparser};
 
+use datafusion_functions::core::planner::CoreFunctionPlanner;
 use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect};
 use sqlparser::parser::Parser;
 
@@ -155,7 +157,8 @@ fn roundtrip_statement() -> Result<()> {
 
         let context = MockContextProvider::default()
             .with_udaf(sum_udaf())
-            .with_udaf(count_udaf());
+            .with_udaf(count_udaf())
+            .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
         let sql_to_rel = SqlToRel::new(&context);
         let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
@@ -184,7 +187,8 @@ fn roundtrip_crossjoin() -> Result<()> {
         .try_with_sql(query)?
         .parse_statement()?;
 
-    let context = MockContextProvider::default();
+    let context = MockContextProvider::default()
+        .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
@@ -276,7 +280,8 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
             .try_with_sql(query.sql)?
             .parse_statement()?;
 
-        let context = MockContextProvider::default();
+        let context = MockContextProvider::default()
+            .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
         let sql_to_rel = SqlToRel::new(&context);
         let plan = sql_to_rel
             .sql_statement_to_plan(statement)
diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs
index bcfb8f43848e4..374aa9db6714a 100644
--- a/datafusion/sql/tests/common/mod.rs
+++ b/datafusion/sql/tests/common/mod.rs
@@ -25,6 +25,7 @@ use arrow_schema::*;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::{plan_err, GetExt, Result, TableReference};
+use datafusion_expr::planner::ExprPlanner;
 use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF};
 use datafusion_sql::planner::ContextProvider;
 
@@ -53,6 +54,7 @@ pub(crate) struct MockContextProvider {
     options: ConfigOptions,
     udfs: HashMap<String, Arc<ScalarUDF>>,
     udafs: HashMap<String, Arc<AggregateUDF>>,
+    expr_planners: Vec<Arc<dyn ExprPlanner>>,
 }
 
 impl MockContextProvider {
@@ -73,6 +75,11 @@ impl MockContextProvider {
         self.udafs.insert(udaf.name().to_lowercase(), udaf);
         self
     }
+
+    pub(crate) fn with_expr_planner(mut self, planner: Arc<dyn ExprPlanner>) -> Self {
+        self.expr_planners.push(planner);
+        self
+    }
 }
 
 impl ContextProvider for MockContextProvider {
@@ -240,6 +247,10 @@ impl ContextProvider for MockContextProvider {
     fn udwf_names(&self) -> Vec<String> {
         Vec::new()
     }
+
+    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
+        &self.expr_planners
+    }
 }
 
 struct EmptyTable {
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 57dab81331b3f..3291560383dfd 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -18,6 +18,7 @@
 use std::any::Any;
 #[cfg(test)]
 use std::collections::HashMap;
+use std::sync::Arc;
 use std::vec;
 
 use arrow_schema::TimeUnit::Nanosecond;
@@ -37,6 +38,7 @@ use datafusion_sql::{
     planner::{ParserOptions, SqlToRel},
 };
 
+use datafusion_functions::core::planner::CoreFunctionPlanner;
 use datafusion_functions_aggregate::{
     approx_median::approx_median_udaf, count::count_udaf,
 };
@@ -2694,7 +2696,8 @@ fn logical_plan_with_dialect_and_options(
         .with_udaf(approx_median_udaf())
         .with_udaf(count_udaf())
         .with_udaf(avg_udaf())
-        .with_udaf(grouping_udaf());
+        .with_udaf(grouping_udaf())
+        .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
 
     let planner = SqlToRel::new_with_options(&context, options);
     let result = DFParser::parse_sql_with_dialect(sql, dialect);

From 2587df09c3fd9659f5076cedf98046e258764b2e Mon Sep 17 00:00:00 2001
From: Chris Connelly <chris@connec.co.uk>
Date: Mon, 22 Jul 2024 00:42:03 +0100
Subject: [PATCH 102/357] Support `newlines_in_values` CSV option (#11533)

* feat!: support `newlines_in_values` CSV option

This significantly simplifies the UX when dealing with large CSV files
that must support newlines in (quoted) values. By default, large CSV
files will be repartitioned into multiple parallel range scans. This is
great for performance in the common case but when large CSVs contain
newlines in values the parallel scan will fail due to splitting on
newlines within quotes rather than actual line terminators.

With the current implementation, this behaviour can be controlled by the
session-level `datafusion.optimizer.repartition_file_scans` and
`datafusion.optimizer.repartition_file_min_size` settings.

This commit introduces a `newlines_in_values` option to `CsvOptions` and
plumbs it through to `CsvExec`, which includes it in the test for whether
parallel execution is supported. This provides a convenient and
searchable way to disable file scan repartitioning on a per-CSV basis.

BREAKING CHANGE: This adds new public fields to types with all public
fields, which is a breaking change.

* docs: normalise `newlines_in_values` documentation

* test: add/fix sqllogictests for `newlines_in_values`

* docs: document `datafusion.catalog.newlines_in_values`

* fix: typo in config.md

* chore: suppress lint on too many arguments for `CsvExec::new`

* fix: always checkout `*.slt` with LF line endings

This is a bit of a stab in the dark, but it might fix multiline tests on
Windows.

* fix: always checkout `newlines_in_values.csv` with `LF` line endings

The default git behaviour of converting line endings for checked out files causes the `csv_files.slt` test to fail when testing `newlines_in_values`. This appears to be due to the quoted newlines being converted to CRLF, which are not then normalised when the CSV is read. Assuming that the sqllogictests do normalise line endings in the expected output, this could then lead to a "spurious" diff from the actual output.

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .gitattributes                                |  1 +
 datafusion/common/src/config.rs               | 30 +++++++++++
 .../core/src/datasource/file_format/csv.rs    | 50 +++++++++++++++++++
 .../src/datasource/file_format/options.rs     | 22 ++++++++
 .../core/src/datasource/physical_plan/csv.rs  | 27 ++++++++--
 .../enforce_distribution.rs                   |  3 ++
 .../physical_optimizer/projection_pushdown.rs |  3 ++
 .../replace_with_order_preserving_variants.rs |  1 +
 datafusion/core/src/test/mod.rs               |  3 ++
 .../core/tests/data/newlines_in_values.csv    | 13 +++++
 .../proto/datafusion_common.proto             |  1 +
 datafusion/proto-common/src/from_proto/mod.rs |  1 +
 .../proto-common/src/generated/pbjson.rs      | 21 ++++++++
 .../proto-common/src/generated/prost.rs       |  3 ++
 datafusion/proto-common/src/to_proto/mod.rs   |  3 ++
 datafusion/proto/proto/datafusion.proto       |  1 +
 .../src/generated/datafusion_proto_common.rs  |  3 ++
 datafusion/proto/src/generated/pbjson.rs      | 18 +++++++
 datafusion/proto/src/generated/prost.rs       |  2 +
 datafusion/proto/src/physical_plan/mod.rs     |  2 +
 .../sqllogictest/test_files/csv_files.slt     | 42 ++++++++++++++++
 .../test_files/information_schema.slt         |  2 +
 docs/source/user-guide/configs.md             |  1 +
 23 files changed, 250 insertions(+), 3 deletions(-)
 create mode 100644 datafusion/core/tests/data/newlines_in_values.csv

diff --git a/.gitattributes b/.gitattributes
index bcdeffc09a113..84b47a6fc56e1 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,4 @@
 .github/ export-ignore
+datafusion/core/tests/data/newlines_in_values.csv text eol=lf
 datafusion/proto/src/generated/prost.rs linguist-generated
 datafusion/proto/src/generated/pbjson.rs linguist-generated
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index b46b002baac02..3cbe14cb558eb 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -184,6 +184,16 @@ config_namespace! {
         /// Default value for `format.has_header` for `CREATE EXTERNAL TABLE`
         /// if not specified explicitly in the statement.
         pub has_header: bool, default = false
+
+        /// Specifies whether newlines in (quoted) CSV values are supported.
+        ///
+        /// This is the default value for `format.newlines_in_values` for `CREATE EXTERNAL TABLE`
+        /// if not specified explicitly in the statement.
+        ///
+        /// Parsing newlines in quoted values may be affected by execution behaviour such as
+        /// parallel file scanning. Setting this to `true` ensures that newlines in values are
+        /// parsed successfully, which may reduce performance.
+        pub newlines_in_values: bool, default = false
     }
 }
 
@@ -1593,6 +1603,14 @@ config_namespace! {
         pub quote: u8, default = b'"'
         pub escape: Option<u8>, default = None
         pub double_quote: Option<bool>, default = None
+        /// Specifies whether newlines in (quoted) values are supported.
+        ///
+        /// Parsing newlines in quoted values may be affected by execution behaviour such as
+        /// parallel file scanning. Setting this to `true` ensures that newlines in values are
+        /// parsed successfully, which may reduce performance.
+        ///
+        /// The default behaviour depends on the `datafusion.catalog.newlines_in_values` setting.
+        pub newlines_in_values: Option<bool>, default = None
         pub compression: CompressionTypeVariant, default = CompressionTypeVariant::UNCOMPRESSED
         pub schema_infer_max_rec: usize, default = 100
         pub date_format: Option<String>, default = None
@@ -1665,6 +1683,18 @@ impl CsvOptions {
         self
     }
 
+    /// Specifies whether newlines in (quoted) values are supported.
+    ///
+    /// Parsing newlines in quoted values may be affected by execution behaviour such as
+    /// parallel file scanning. Setting this to `true` ensures that newlines in values are
+    /// parsed successfully, which may reduce performance.
+    ///
+    /// The default behaviour depends on the `datafusion.catalog.newlines_in_values` setting.
+    pub fn with_newlines_in_values(mut self, newlines_in_values: bool) -> Self {
+        self.newlines_in_values = Some(newlines_in_values);
+        self
+    }
+
     /// Set a `CompressionTypeVariant` of CSV
     /// - defaults to `CompressionTypeVariant::UNCOMPRESSED`
     pub fn with_file_compression_type(
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 5daa8447551b1..185f50883b2ce 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -233,6 +233,18 @@ impl CsvFormat {
         self
     }
 
+    /// Specifies whether newlines in (quoted) values are supported.
+    ///
+    /// Parsing newlines in quoted values may be affected by execution behaviour such as
+    /// parallel file scanning. Setting this to `true` ensures that newlines in values are
+    /// parsed successfully, which may reduce performance.
+    ///
+    /// The default behaviour depends on the `datafusion.catalog.newlines_in_values` setting.
+    pub fn with_newlines_in_values(mut self, newlines_in_values: bool) -> Self {
+        self.options.newlines_in_values = Some(newlines_in_values);
+        self
+    }
+
     /// Set a `FileCompressionType` of CSV
     /// - defaults to `FileCompressionType::UNCOMPRESSED`
     pub fn with_file_compression_type(
@@ -330,6 +342,9 @@ impl FileFormat for CsvFormat {
             self.options.quote,
             self.options.escape,
             self.options.comment,
+            self.options
+                .newlines_in_values
+                .unwrap_or(state.config_options().catalog.newlines_in_values),
             self.options.compression.into(),
         );
         Ok(Arc::new(exec))
@@ -1052,6 +1067,41 @@ mod tests {
         Ok(())
     }
 
+    #[rstest(n_partitions, case(1), case(2), case(3), case(4))]
+    #[tokio::test]
+    async fn test_csv_parallel_newlines_in_values(n_partitions: usize) -> Result<()> {
+        let config = SessionConfig::new()
+            .with_repartition_file_scans(true)
+            .with_repartition_file_min_size(0)
+            .with_target_partitions(n_partitions);
+        let csv_options = CsvReadOptions::default()
+            .has_header(true)
+            .newlines_in_values(true);
+        let ctx = SessionContext::new_with_config(config);
+        let testdata = arrow_test_data();
+        ctx.register_csv(
+            "aggr",
+            &format!("{testdata}/csv/aggregate_test_100.csv"),
+            csv_options,
+        )
+        .await?;
+
+        let query = "select sum(c3) from aggr;";
+        let query_result = ctx.sql(query).await?.collect().await?;
+        let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
+
+        #[rustfmt::skip]
+        let expected = ["+--------------+",
+            "| sum(aggr.c3) |",
+            "+--------------+",
+            "| 781          |",
+            "+--------------+"];
+        assert_batches_eq!(expected, &query_result);
+        assert_eq!(1, actual_partitions); // csv won't be scanned in parallel when newlines_in_values is set
+
+        Ok(())
+    }
+
     /// Read a single empty csv file in parallel
     ///
     /// empty_0_byte.csv:
diff --git a/datafusion/core/src/datasource/file_format/options.rs b/datafusion/core/src/datasource/file_format/options.rs
index c6d143ed6749a..552977baba17b 100644
--- a/datafusion/core/src/datasource/file_format/options.rs
+++ b/datafusion/core/src/datasource/file_format/options.rs
@@ -63,6 +63,14 @@ pub struct CsvReadOptions<'a> {
     pub escape: Option<u8>,
     /// If enabled, lines beginning with this byte are ignored.
     pub comment: Option<u8>,
+    /// Specifies whether newlines in (quoted) values are supported.
+    ///
+    /// Parsing newlines in quoted values may be affected by execution behaviour such as
+    /// parallel file scanning. Setting this to `true` ensures that newlines in values are
+    /// parsed successfully, which may reduce performance.
+    ///
+    /// The default behaviour depends on the `datafusion.catalog.newlines_in_values` setting.
+    pub newlines_in_values: bool,
     /// An optional schema representing the CSV files. If None, CSV reader will try to infer it
     /// based on data in file.
     pub schema: Option<&'a Schema>,
@@ -95,6 +103,7 @@ impl<'a> CsvReadOptions<'a> {
             delimiter: b',',
             quote: b'"',
             escape: None,
+            newlines_in_values: false,
             file_extension: DEFAULT_CSV_EXTENSION,
             table_partition_cols: vec![],
             file_compression_type: FileCompressionType::UNCOMPRESSED,
@@ -133,6 +142,18 @@ impl<'a> CsvReadOptions<'a> {
         self
     }
 
+    /// Specifies whether newlines in (quoted) values are supported.
+    ///
+    /// Parsing newlines in quoted values may be affected by execution behaviour such as
+    /// parallel file scanning. Setting this to `true` ensures that newlines in values are
+    /// parsed successfully, which may reduce performance.
+    ///
+    /// The default behaviour depends on the `datafusion.catalog.newlines_in_values` setting.
+    pub fn newlines_in_values(mut self, newlines_in_values: bool) -> Self {
+        self.newlines_in_values = newlines_in_values;
+        self
+    }
+
     /// Specify the file extension for CSV file selection
     pub fn file_extension(mut self, file_extension: &'a str) -> Self {
         self.file_extension = file_extension;
@@ -490,6 +511,7 @@ impl ReadOptions<'_> for CsvReadOptions<'_> {
             .with_delimiter(self.delimiter)
             .with_quote(self.quote)
             .with_escape(self.escape)
+            .with_newlines_in_values(self.newlines_in_values)
             .with_schema_infer_max_rec(self.schema_infer_max_records)
             .with_file_compression_type(self.file_compression_type.to_owned());
 
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 327fbd976e877..fb0e23c6c1648 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -59,6 +59,7 @@ pub struct CsvExec {
     quote: u8,
     escape: Option<u8>,
     comment: Option<u8>,
+    newlines_in_values: bool,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Compression type of the file associated with CsvExec
@@ -68,6 +69,7 @@ pub struct CsvExec {
 
 impl CsvExec {
     /// Create a new CSV reader execution plan provided base and specific configurations
+    #[allow(clippy::too_many_arguments)]
     pub fn new(
         base_config: FileScanConfig,
         has_header: bool,
@@ -75,6 +77,7 @@ impl CsvExec {
         quote: u8,
         escape: Option<u8>,
         comment: Option<u8>,
+        newlines_in_values: bool,
         file_compression_type: FileCompressionType,
     ) -> Self {
         let (projected_schema, projected_statistics, projected_output_ordering) =
@@ -91,6 +94,7 @@ impl CsvExec {
             delimiter,
             quote,
             escape,
+            newlines_in_values,
             metrics: ExecutionPlanMetricsSet::new(),
             file_compression_type,
             cache,
@@ -126,6 +130,17 @@ impl CsvExec {
         self.escape
     }
 
+    /// Specifies whether newlines in (quoted) values are supported.
+    ///
+    /// Parsing newlines in quoted values may be affected by execution behaviour such as
+    /// parallel file scanning. Setting this to `true` ensures that newlines in values are
+    /// parsed successfully, which may reduce performance.
+    ///
+    /// The default behaviour depends on the `datafusion.catalog.newlines_in_values` setting.
+    pub fn newlines_in_values(&self) -> bool {
+        self.newlines_in_values
+    }
+
     fn output_partitioning_helper(file_scan_config: &FileScanConfig) -> Partitioning {
         Partitioning::UnknownPartitioning(file_scan_config.file_groups.len())
     }
@@ -196,15 +211,15 @@ impl ExecutionPlan for CsvExec {
     /// Redistribute files across partitions according to their size
     /// See comments on [`FileGroupPartitioner`] for more detail.
     ///
-    /// Return `None` if can't get repartitioned(empty/compressed file).
+    /// Return `None` if can't get repartitioned (empty, compressed file, or `newlines_in_values` set).
     fn repartitioned(
         &self,
         target_partitions: usize,
         config: &ConfigOptions,
     ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
         let repartition_file_min_size = config.optimizer.repartition_file_min_size;
-        // Parallel execution on compressed CSV file is not supported yet.
-        if self.file_compression_type.is_compressed() {
+        // Parallel execution on compressed CSV files or files that must support newlines in values is not supported yet.
+        if self.file_compression_type.is_compressed() || self.newlines_in_values {
             return Ok(None);
         }
 
@@ -589,6 +604,7 @@ mod tests {
             b'"',
             None,
             None,
+            false,
             file_compression_type.to_owned(),
         );
         assert_eq!(13, csv.base_config.file_schema.fields().len());
@@ -658,6 +674,7 @@ mod tests {
             b'"',
             None,
             None,
+            false,
             file_compression_type.to_owned(),
         );
         assert_eq!(13, csv.base_config.file_schema.fields().len());
@@ -727,6 +744,7 @@ mod tests {
             b'"',
             None,
             None,
+            false,
             file_compression_type.to_owned(),
         );
         assert_eq!(13, csv.base_config.file_schema.fields().len());
@@ -793,6 +811,7 @@ mod tests {
             b'"',
             None,
             None,
+            false,
             file_compression_type.to_owned(),
         );
         assert_eq!(14, csv.base_config.file_schema.fields().len());
@@ -858,6 +877,7 @@ mod tests {
             b'"',
             None,
             None,
+            false,
             file_compression_type.to_owned(),
         );
         assert_eq!(13, csv.base_config.file_schema.fields().len());
@@ -953,6 +973,7 @@ mod tests {
             b'"',
             None,
             None,
+            false,
             file_compression_type.to_owned(),
         );
 
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index afed5dd375351..9791f23f963e0 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -1472,6 +1472,7 @@ pub(crate) mod tests {
             b'"',
             None,
             None,
+            false,
             FileCompressionType::UNCOMPRESSED,
         ))
     }
@@ -1496,6 +1497,7 @@ pub(crate) mod tests {
             b'"',
             None,
             None,
+            false,
             FileCompressionType::UNCOMPRESSED,
         ))
     }
@@ -3770,6 +3772,7 @@ pub(crate) mod tests {
                     b'"',
                     None,
                     None,
+                    false,
                     compression_type,
                 )),
                 vec![("a".to_string(), "a".to_string())],
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 84f898431762b..d0d0c985b8b64 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -186,6 +186,7 @@ fn try_swapping_with_csv(
             csv.quote(),
             csv.escape(),
             csv.comment(),
+            csv.newlines_in_values(),
             csv.file_compression_type,
         )) as _
     })
@@ -1700,6 +1701,7 @@ mod tests {
             0,
             None,
             None,
+            false,
             FileCompressionType::UNCOMPRESSED,
         ))
     }
@@ -1723,6 +1725,7 @@ mod tests {
             0,
             None,
             None,
+            false,
             FileCompressionType::UNCOMPRESSED,
         ))
     }
diff --git a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
index 013155b8400a6..6565e3e7d0d23 100644
--- a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
+++ b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
@@ -1503,6 +1503,7 @@ mod tests {
             b'"',
             None,
             None,
+            false,
             FileCompressionType::UNCOMPRESSED,
         ))
     }
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index e8550a79cb0e0..5cb1b6ea7017b 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -99,6 +99,7 @@ pub fn scan_partitioned_csv(partitions: usize, work_dir: &Path) -> Result<Arc<Cs
         b'"',
         None,
         None,
+        false,
         FileCompressionType::UNCOMPRESSED,
     )))
 }
@@ -283,6 +284,7 @@ pub fn csv_exec_sorted(
         0,
         None,
         None,
+        false,
         FileCompressionType::UNCOMPRESSED,
     ))
 }
@@ -339,6 +341,7 @@ pub fn csv_exec_ordered(
         b'"',
         None,
         None,
+        false,
         FileCompressionType::UNCOMPRESSED,
     ))
 }
diff --git a/datafusion/core/tests/data/newlines_in_values.csv b/datafusion/core/tests/data/newlines_in_values.csv
new file mode 100644
index 0000000000000..de0cdb94a5d4a
--- /dev/null
+++ b/datafusion/core/tests/data/newlines_in_values.csv
@@ -0,0 +1,13 @@
+id,message
+1,"hello
+world"
+2,"something
+else"
+3,"
+many
+lines
+make
+good test
+"
+4,unquoted
+value,end
diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto
index e2a405595fb74..ca95136dadd96 100644
--- a/datafusion/proto-common/proto/datafusion_common.proto
+++ b/datafusion/proto-common/proto/datafusion_common.proto
@@ -410,6 +410,7 @@ message CsvOptions {
   string null_value = 12; // Optional representation of null value
   bytes comment = 13; // Optional comment character as a byte
   bytes double_quote = 14; // Indicates if quotes are doubled
+  bytes newlines_in_values = 15; // Indicates if newlines are supported in values
 }
 
 // Options controlling CSV format
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index 52ca5781dc963..9191ff185a046 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -860,6 +860,7 @@ impl TryFrom<&protobuf::CsvOptions> for CsvOptions {
             quote: proto_opts.quote[0],
             escape: proto_opts.escape.first().copied(),
             double_quote: proto_opts.has_header.first().map(|h| *h != 0),
+            newlines_in_values: proto_opts.newlines_in_values.first().map(|h| *h != 0),
             compression: proto_opts.compression().into(),
             schema_infer_max_rec: proto_opts.schema_infer_max_rec as usize,
             date_format: (!proto_opts.date_format.is_empty())
diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs
index be3cc58b23dfe..4b34660ae2ef5 100644
--- a/datafusion/proto-common/src/generated/pbjson.rs
+++ b/datafusion/proto-common/src/generated/pbjson.rs
@@ -1884,6 +1884,9 @@ impl serde::Serialize for CsvOptions {
         if !self.double_quote.is_empty() {
             len += 1;
         }
+        if !self.newlines_in_values.is_empty() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion_common.CsvOptions", len)?;
         if !self.has_header.is_empty() {
             #[allow(clippy::needless_borrow)]
@@ -1936,6 +1939,10 @@ impl serde::Serialize for CsvOptions {
             #[allow(clippy::needless_borrow)]
             struct_ser.serialize_field("doubleQuote", pbjson::private::base64::encode(&self.double_quote).as_str())?;
         }
+        if !self.newlines_in_values.is_empty() {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("newlinesInValues", pbjson::private::base64::encode(&self.newlines_in_values).as_str())?;
+        }
         struct_ser.end()
     }
 }
@@ -1969,6 +1976,8 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
             "comment",
             "double_quote",
             "doubleQuote",
+            "newlines_in_values",
+            "newlinesInValues",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -1987,6 +1996,7 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
             NullValue,
             Comment,
             DoubleQuote,
+            NewlinesInValues,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -2022,6 +2032,7 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
                             "nullValue" | "null_value" => Ok(GeneratedField::NullValue),
                             "comment" => Ok(GeneratedField::Comment),
                             "doubleQuote" | "double_quote" => Ok(GeneratedField::DoubleQuote),
+                            "newlinesInValues" | "newlines_in_values" => Ok(GeneratedField::NewlinesInValues),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -2055,6 +2066,7 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
                 let mut null_value__ = None;
                 let mut comment__ = None;
                 let mut double_quote__ = None;
+                let mut newlines_in_values__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::HasHeader => {
@@ -2155,6 +2167,14 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
                                 Some(map_.next_value::<::pbjson::private::BytesDeserialize<_>>()?.0)
                             ;
                         }
+                        GeneratedField::NewlinesInValues => {
+                            if newlines_in_values__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("newlinesInValues"));
+                            }
+                            newlines_in_values__ = 
+                                Some(map_.next_value::<::pbjson::private::BytesDeserialize<_>>()?.0)
+                            ;
+                        }
                     }
                 }
                 Ok(CsvOptions {
@@ -2172,6 +2192,7 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
                     null_value: null_value__.unwrap_or_default(),
                     comment: comment__.unwrap_or_default(),
                     double_quote: double_quote__.unwrap_or_default(),
+                    newlines_in_values: newlines_in_values__.unwrap_or_default(),
                 })
             }
         }
diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs
index b0674ff28d754..9a2770997f15e 100644
--- a/datafusion/proto-common/src/generated/prost.rs
+++ b/datafusion/proto-common/src/generated/prost.rs
@@ -633,6 +633,9 @@ pub struct CsvOptions {
     /// Indicates if quotes are doubled
     #[prost(bytes = "vec", tag = "14")]
     pub double_quote: ::prost::alloc::vec::Vec<u8>,
+    /// Indicates if newlines are supported in values
+    #[prost(bytes = "vec", tag = "15")]
+    pub newlines_in_values: ::prost::alloc::vec::Vec<u8>,
 }
 /// Options controlling CSV format
 #[allow(clippy::derive_partial_eq_without_eq)]
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index 705a479e01787..9dcb65444a470 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -900,6 +900,9 @@ impl TryFrom<&CsvOptions> for protobuf::CsvOptions {
             quote: vec![opts.quote],
             escape: opts.escape.map_or_else(Vec::new, |e| vec![e]),
             double_quote: opts.double_quote.map_or_else(Vec::new, |h| vec![h as u8]),
+            newlines_in_values: opts
+                .newlines_in_values
+                .map_or_else(Vec::new, |h| vec![h as u8]),
             compression: compression.into(),
             schema_infer_max_rec: opts.schema_infer_max_rec as u64,
             date_format: opts.date_format.clone().unwrap_or_default(),
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index dc551778c5fb2..49d9f2dde67f0 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -1007,6 +1007,7 @@ message CsvScanExecNode {
   oneof optional_comment {
     string comment = 6;
   }
+  bool newlines_in_values = 7;
 }
 
 message AvroScanExecNode {
diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs
index b0674ff28d754..9a2770997f15e 100644
--- a/datafusion/proto/src/generated/datafusion_proto_common.rs
+++ b/datafusion/proto/src/generated/datafusion_proto_common.rs
@@ -633,6 +633,9 @@ pub struct CsvOptions {
     /// Indicates if quotes are doubled
     #[prost(bytes = "vec", tag = "14")]
     pub double_quote: ::prost::alloc::vec::Vec<u8>,
+    /// Indicates if newlines are supported in values
+    #[prost(bytes = "vec", tag = "15")]
+    pub newlines_in_values: ::prost::alloc::vec::Vec<u8>,
 }
 /// Options controlling CSV format
 #[allow(clippy::derive_partial_eq_without_eq)]
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 8f77c24bd9117..25f6646d2a9af 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -3605,6 +3605,9 @@ impl serde::Serialize for CsvScanExecNode {
         if !self.quote.is_empty() {
             len += 1;
         }
+        if self.newlines_in_values {
+            len += 1;
+        }
         if self.optional_escape.is_some() {
             len += 1;
         }
@@ -3624,6 +3627,9 @@ impl serde::Serialize for CsvScanExecNode {
         if !self.quote.is_empty() {
             struct_ser.serialize_field("quote", &self.quote)?;
         }
+        if self.newlines_in_values {
+            struct_ser.serialize_field("newlinesInValues", &self.newlines_in_values)?;
+        }
         if let Some(v) = self.optional_escape.as_ref() {
             match v {
                 csv_scan_exec_node::OptionalEscape::Escape(v) => {
@@ -3654,6 +3660,8 @@ impl<'de> serde::Deserialize<'de> for CsvScanExecNode {
             "hasHeader",
             "delimiter",
             "quote",
+            "newlines_in_values",
+            "newlinesInValues",
             "escape",
             "comment",
         ];
@@ -3664,6 +3672,7 @@ impl<'de> serde::Deserialize<'de> for CsvScanExecNode {
             HasHeader,
             Delimiter,
             Quote,
+            NewlinesInValues,
             Escape,
             Comment,
         }
@@ -3691,6 +3700,7 @@ impl<'de> serde::Deserialize<'de> for CsvScanExecNode {
                             "hasHeader" | "has_header" => Ok(GeneratedField::HasHeader),
                             "delimiter" => Ok(GeneratedField::Delimiter),
                             "quote" => Ok(GeneratedField::Quote),
+                            "newlinesInValues" | "newlines_in_values" => Ok(GeneratedField::NewlinesInValues),
                             "escape" => Ok(GeneratedField::Escape),
                             "comment" => Ok(GeneratedField::Comment),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
@@ -3716,6 +3726,7 @@ impl<'de> serde::Deserialize<'de> for CsvScanExecNode {
                 let mut has_header__ = None;
                 let mut delimiter__ = None;
                 let mut quote__ = None;
+                let mut newlines_in_values__ = None;
                 let mut optional_escape__ = None;
                 let mut optional_comment__ = None;
                 while let Some(k) = map_.next_key()? {
@@ -3744,6 +3755,12 @@ impl<'de> serde::Deserialize<'de> for CsvScanExecNode {
                             }
                             quote__ = Some(map_.next_value()?);
                         }
+                        GeneratedField::NewlinesInValues => {
+                            if newlines_in_values__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("newlinesInValues"));
+                            }
+                            newlines_in_values__ = Some(map_.next_value()?);
+                        }
                         GeneratedField::Escape => {
                             if optional_escape__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("escape"));
@@ -3763,6 +3780,7 @@ impl<'de> serde::Deserialize<'de> for CsvScanExecNode {
                     has_header: has_header__.unwrap_or_default(),
                     delimiter: delimiter__.unwrap_or_default(),
                     quote: quote__.unwrap_or_default(),
+                    newlines_in_values: newlines_in_values__.unwrap_or_default(),
                     optional_escape: optional_escape__,
                     optional_comment: optional_comment__,
                 })
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 605c56fa946a3..ba288fe3d1b87 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -1542,6 +1542,8 @@ pub struct CsvScanExecNode {
     pub delimiter: ::prost::alloc::string::String,
     #[prost(string, tag = "4")]
     pub quote: ::prost::alloc::string::String,
+    #[prost(bool, tag = "7")]
+    pub newlines_in_values: bool,
     #[prost(oneof = "csv_scan_exec_node::OptionalEscape", tags = "5")]
     pub optional_escape: ::core::option::Option<csv_scan_exec_node::OptionalEscape>,
     #[prost(oneof = "csv_scan_exec_node::OptionalComment", tags = "6")]
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 1220f42ded836..9e17c19ecbc5d 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -211,6 +211,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                 } else {
                     None
                 },
+                scan.newlines_in_values,
                 FileCompressionType::UNCOMPRESSED,
             ))),
             #[cfg(feature = "parquet")]
@@ -1579,6 +1580,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                         } else {
                             None
                         },
+                        newlines_in_values: exec.newlines_in_values(),
                     },
                 )),
             });
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
index ca3bebe79f279..f7f5aa54dd0d5 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -293,3 +293,45 @@ id0 "value0"
 id1 "value1"
 id2 "value2"
 id3 "value3"
+
+# Handling of newlines in values
+
+statement ok
+SET datafusion.optimizer.repartition_file_min_size = 1;
+
+statement ok
+CREATE EXTERNAL TABLE stored_table_with_newlines_in_values_unsafe (
+col1 TEXT,
+col2 TEXT
+) STORED AS CSV
+LOCATION '../core/tests/data/newlines_in_values.csv';
+
+statement error incorrect number of fields
+select * from stored_table_with_newlines_in_values_unsafe;
+
+statement ok
+CREATE EXTERNAL TABLE stored_table_with_newlines_in_values_safe (
+col1 TEXT,
+col2 TEXT
+) STORED AS CSV
+LOCATION '../core/tests/data/newlines_in_values.csv'
+OPTIONS ('format.newlines_in_values' 'true');
+
+query TT
+select * from stored_table_with_newlines_in_values_safe;
+----
+id message
+1
+01)hello
+02)world
+2
+01)something
+02)else
+3
+01)
+02)many
+03)lines
+04)make
+05)good test
+4 unquoted
+value end
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index f7b755b019118..c8c0d1d45b974 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -168,6 +168,7 @@ datafusion.catalog.format NULL
 datafusion.catalog.has_header false
 datafusion.catalog.information_schema true
 datafusion.catalog.location NULL
+datafusion.catalog.newlines_in_values false
 datafusion.execution.aggregate.scalar_update_factor 10
 datafusion.execution.batch_size 8192
 datafusion.execution.coalesce_batches true
@@ -252,6 +253,7 @@ datafusion.catalog.format NULL Type of `TableProvider` to use when loading `defa
 datafusion.catalog.has_header false Default value for `format.has_header` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement.
 datafusion.catalog.information_schema true Should DataFusion provide access to `information_schema` virtual tables for displaying schema information
 datafusion.catalog.location NULL Location scanned to load tables for `default` schema
+datafusion.catalog.newlines_in_values false Specifies whether newlines in (quoted) CSV values are supported. This is the default value for `format.newlines_in_values` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement. Parsing newlines in quoted values may be affected by execution behaviour such as parallel file scanning. Setting this to `true` ensures that newlines in values are parsed successfully, which may reduce performance.
 datafusion.execution.aggregate.scalar_update_factor 10 Specifies the threshold for using `ScalarValue`s to update accumulators during high-cardinality aggregations for each input batch. The aggregation is considered high-cardinality if the number of affected groups is greater than or equal to `batch_size / scalar_update_factor`. In such cases, `ScalarValue`s are utilized for updating accumulators, rather than the default batch-slice approach. This can lead to performance improvements. By adjusting the `scalar_update_factor`, you can balance the trade-off between more efficient accumulator updates and the number of groups affected.
 datafusion.execution.batch_size 8192 Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption
 datafusion.execution.coalesce_batches true When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 8d3ecbc985447..5e5de016e375e 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -44,6 +44,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.catalog.location                                             | NULL                      | Location scanned to load tables for `default` schema                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.catalog.format                                               | NULL                      | Type of `TableProvider` to use when loading `default` schema                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | datafusion.catalog.has_header                                           | false                     | Default value for `format.has_header` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.catalog.newlines_in_values                                   | false                     | Specifies whether newlines in (quoted) CSV values are supported. This is the default value for `format.newlines_in_values` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement. Parsing newlines in quoted values may be affected by execution behaviour such as parallel file scanning. Setting this to `true` ensures that newlines in values are parsed successfully, which may reduce performance.                                                                                                                                                                             |
 | datafusion.execution.batch_size                                         | 8192                      | Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.execution.coalesce_batches                                   | true                      | When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting                                                                                                                                                                                                                                                                                              |
 | datafusion.execution.collect_statistics                                 | false                     | Should DataFusion collect statistics after listing files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |

From 63efaee2555ddd1381b4885867860621ec791f82 Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Sun, 21 Jul 2024 17:09:54 -0700
Subject: [PATCH 103/357] Support SortMergeJoin spilling (#11218)

* Support SortMerge spilling
---
 datafusion/core/tests/memory_limit/mod.rs     |  27 +-
 datafusion/execution/src/memory_pool/mod.rs   |  19 +-
 .../src/joins/sort_merge_join.rs              | 457 +++++++++++++++---
 datafusion/physical-plan/src/sorts/sort.rs    |   7 +-
 datafusion/physical-plan/src/spill.rs         | 103 +++-
 5 files changed, 529 insertions(+), 84 deletions(-)

diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index f4f4f8cd89cb1..bc2c3315da592 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -164,7 +164,7 @@ async fn cross_join() {
 }
 
 #[tokio::test]
-async fn merge_join() {
+async fn sort_merge_join_no_spill() {
     // Planner chooses MergeJoin only if number of partitions > 1
     let config = SessionConfig::new()
         .with_target_partitions(2)
@@ -175,11 +175,32 @@ async fn merge_join() {
             "select t1.* from t t1 JOIN t t2 ON t1.pod = t2.pod AND t1.time = t2.time",
         )
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
+            "Failed to allocate additional",
             "SMJStream",
+            "Disk spilling disabled",
         ])
         .with_memory_limit(1_000)
         .with_config(config)
+        .with_scenario(Scenario::AccessLogStreaming)
+        .run()
+        .await
+}
+
+#[tokio::test]
+async fn sort_merge_join_spill() {
+    // Planner chooses MergeJoin only if number of partitions > 1
+    let config = SessionConfig::new()
+        .with_target_partitions(2)
+        .set_bool("datafusion.optimizer.prefer_hash_join", false);
+
+    TestCase::new()
+        .with_query(
+            "select t1.* from t t1 JOIN t t2 ON t1.pod = t2.pod AND t1.time = t2.time",
+        )
+        .with_memory_limit(1_000)
+        .with_config(config)
+        .with_disk_manager_config(DiskManagerConfig::NewOs)
+        .with_scenario(Scenario::AccessLogStreaming)
         .run()
         .await
 }
@@ -453,7 +474,7 @@ impl TestCase {
         let table = scenario.table();
 
         let rt_config = RuntimeConfig::new()
-            // do not allow spilling
+            // disk manager setting controls the spilling
             .with_disk_manager(disk_manager_config)
             .with_memory_limit(memory_limit, MEMORY_FRACTION);
 
diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs
index 3f66a304dc18c..92ed1b2918de0 100644
--- a/datafusion/execution/src/memory_pool/mod.rs
+++ b/datafusion/execution/src/memory_pool/mod.rs
@@ -18,7 +18,7 @@
 //! [`MemoryPool`] for memory management during query execution, [`proxy]` for
 //! help with allocation accounting.
 
-use datafusion_common::Result;
+use datafusion_common::{internal_err, Result};
 use std::{cmp::Ordering, sync::Arc};
 
 mod pool;
@@ -220,6 +220,23 @@ impl MemoryReservation {
         self.size = new_size
     }
 
+    /// Tries to free `capacity` bytes from this reservation
+    /// if `capacity` does not exceed [`Self::size`]
+    /// Returns new reservation size
+    /// or error if shrinking capacity is more than allocated size
+    pub fn try_shrink(&mut self, capacity: usize) -> Result<usize> {
+        if let Some(new_size) = self.size.checked_sub(capacity) {
+            self.registration.pool.shrink(self, capacity);
+            self.size = new_size;
+            Ok(new_size)
+        } else {
+            internal_err!(
+                "Cannot free the capacity {capacity} out of allocated size {}",
+                self.size
+            )
+        }
+    }
+
     /// Sets the size of this reservation to `capacity`
     pub fn resize(&mut self, capacity: usize) {
         match capacity.cmp(&self.size) {
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index a03e4a83fd2d8..5fde028c7f488 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -24,40 +24,46 @@ use std::any::Any;
 use std::cmp::Ordering;
 use std::collections::VecDeque;
 use std::fmt::Formatter;
+use std::fs::File;
+use std::io::BufReader;
 use std::mem;
 use std::ops::Range;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use crate::expressions::PhysicalSortExpr;
-use crate::joins::utils::{
-    build_join_schema, check_join_is_valid, estimate_join_statistics,
-    symmetric_join_output_partitioning, JoinFilter, JoinOn, JoinOnRef,
-};
-use crate::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
-use crate::{
-    execution_mode_from_children, metrics, DisplayAs, DisplayFormatType, Distribution,
-    ExecutionPlan, ExecutionPlanProperties, PhysicalExpr, PlanProperties,
-    RecordBatchStream, SendableRecordBatchStream, Statistics,
-};
-
 use arrow::array::*;
 use arrow::compute::{self, concat_batches, take, SortOptions};
 use arrow::datatypes::{DataType, SchemaRef, TimeUnit};
 use arrow::error::ArrowError;
+use arrow::ipc::reader::FileReader;
 use arrow_array::types::UInt64Type;
+use futures::{Stream, StreamExt};
+use hashbrown::HashSet;
 
 use datafusion_common::{
-    internal_err, not_impl_err, plan_err, DataFusionError, JoinSide, JoinType, Result,
+    exec_err, internal_err, not_impl_err, plan_err, DataFusionError, JoinSide, JoinType,
+    Result,
 };
+use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
+use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::equivalence::join_equivalence_properties;
 use datafusion_physical_expr::{PhysicalExprRef, PhysicalSortRequirement};
 
-use futures::{Stream, StreamExt};
-use hashbrown::HashSet;
+use crate::expressions::PhysicalSortExpr;
+use crate::joins::utils::{
+    build_join_schema, check_join_is_valid, estimate_join_statistics,
+    symmetric_join_output_partitioning, JoinFilter, JoinOn, JoinOnRef,
+};
+use crate::metrics::{Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
+use crate::spill::spill_record_batches;
+use crate::{
+    execution_mode_from_children, metrics, DisplayAs, DisplayFormatType, Distribution,
+    ExecutionPlan, ExecutionPlanProperties, PhysicalExpr, PlanProperties,
+    RecordBatchStream, SendableRecordBatchStream, Statistics,
+};
 
 /// join execution plan executes partitions in parallel and combines them into a set of
 /// partitions.
@@ -234,11 +240,6 @@ impl SortMergeJoinExec {
 
 impl DisplayAs for SortMergeJoinExec {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
-        let display_filter = self.filter.as_ref().map_or_else(
-            || "".to_string(),
-            |f| format!(", filter={}", f.expression()),
-        );
-
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
                 let on = self
@@ -250,7 +251,12 @@ impl DisplayAs for SortMergeJoinExec {
                 write!(
                     f,
                     "SortMergeJoin: join_type={:?}, on=[{}]{}",
-                    self.join_type, on, display_filter
+                    self.join_type,
+                    on,
+                    self.filter.as_ref().map_or("".to_string(), |f| format!(
+                        ", filter={}",
+                        f.expression()
+                    ))
                 )
             }
         }
@@ -375,6 +381,7 @@ impl ExecutionPlan for SortMergeJoinExec {
             batch_size,
             SortMergeJoinMetrics::new(partition, &self.metrics),
             reservation,
+            context.runtime_env(),
         )?))
     }
 
@@ -412,6 +419,12 @@ struct SortMergeJoinMetrics {
     /// Peak memory used for buffered data.
     /// Calculated as sum of peak memory values across partitions
     peak_mem_used: metrics::Gauge,
+    /// count of spills during the execution of the operator
+    spill_count: Count,
+    /// total spilled bytes during the execution of the operator
+    spilled_bytes: Count,
+    /// total spilled rows during the execution of the operator
+    spilled_rows: Count,
 }
 
 impl SortMergeJoinMetrics {
@@ -425,6 +438,9 @@ impl SortMergeJoinMetrics {
             MetricBuilder::new(metrics).counter("output_batches", partition);
         let output_rows = MetricBuilder::new(metrics).output_rows(partition);
         let peak_mem_used = MetricBuilder::new(metrics).gauge("peak_mem_used", partition);
+        let spill_count = MetricBuilder::new(metrics).spill_count(partition);
+        let spilled_bytes = MetricBuilder::new(metrics).spilled_bytes(partition);
+        let spilled_rows = MetricBuilder::new(metrics).spilled_rows(partition);
 
         Self {
             join_time,
@@ -433,6 +449,9 @@ impl SortMergeJoinMetrics {
             output_batches,
             output_rows,
             peak_mem_used,
+            spill_count,
+            spilled_bytes,
+            spilled_rows,
         }
     }
 }
@@ -565,7 +584,8 @@ impl StreamedBatch {
 #[derive(Debug)]
 struct BufferedBatch {
     /// The buffered record batch
-    pub batch: RecordBatch,
+    /// None if the batch spilled to disk th
+    pub batch: Option<RecordBatch>,
     /// The range in which the rows share the same join key
     pub range: Range<usize>,
     /// Array refs of the join key
@@ -577,6 +597,14 @@ struct BufferedBatch {
     /// The indices of buffered batch that failed the join filter.
     /// When dequeuing the buffered batch, we need to produce null joined rows for these indices.
     pub join_filter_failed_idxs: HashSet<u64>,
+    /// Current buffered batch number of rows. Equal to batch.num_rows()
+    /// but if batch is spilled to disk this property is preferable
+    /// and less expensive
+    pub num_rows: usize,
+    /// An optional temp spill file name on the disk if the batch spilled
+    /// None by default
+    /// Some(fileName) if the batch spilled to the disk
+    pub spill_file: Option<RefCountedTempFile>,
 }
 
 impl BufferedBatch {
@@ -602,13 +630,16 @@ impl BufferedBatch {
             + mem::size_of::<Range<usize>>()
             + mem::size_of::<usize>();
 
+        let num_rows = batch.num_rows();
         BufferedBatch {
-            batch,
+            batch: Some(batch),
             range,
             join_arrays,
             null_joined: vec![],
             size_estimation,
             join_filter_failed_idxs: HashSet::new(),
+            num_rows,
+            spill_file: None,
         }
     }
 }
@@ -666,6 +697,8 @@ struct SMJStream {
     pub join_metrics: SortMergeJoinMetrics,
     /// Memory reservation
     pub reservation: MemoryReservation,
+    /// Runtime env
+    pub runtime_env: Arc<RuntimeEnv>,
 }
 
 impl RecordBatchStream for SMJStream {
@@ -785,6 +818,7 @@ impl SMJStream {
         batch_size: usize,
         join_metrics: SortMergeJoinMetrics,
         reservation: MemoryReservation,
+        runtime_env: Arc<RuntimeEnv>,
     ) -> Result<Self> {
         let streamed_schema = streamed.schema();
         let buffered_schema = buffered.schema();
@@ -813,6 +847,7 @@ impl SMJStream {
             join_type,
             join_metrics,
             reservation,
+            runtime_env,
         })
     }
 
@@ -858,6 +893,58 @@ impl SMJStream {
         }
     }
 
+    fn free_reservation(&mut self, buffered_batch: BufferedBatch) -> Result<()> {
+        // Shrink memory usage for in-memory batches only
+        if buffered_batch.spill_file.is_none() && buffered_batch.batch.is_some() {
+            self.reservation
+                .try_shrink(buffered_batch.size_estimation)?;
+        }
+
+        Ok(())
+    }
+
+    fn allocate_reservation(&mut self, mut buffered_batch: BufferedBatch) -> Result<()> {
+        match self.reservation.try_grow(buffered_batch.size_estimation) {
+            Ok(_) => {
+                self.join_metrics
+                    .peak_mem_used
+                    .set_max(self.reservation.size());
+                Ok(())
+            }
+            Err(_) if self.runtime_env.disk_manager.tmp_files_enabled() => {
+                // spill buffered batch to disk
+                let spill_file = self
+                    .runtime_env
+                    .disk_manager
+                    .create_tmp_file("sort_merge_join_buffered_spill")?;
+
+                if let Some(batch) = buffered_batch.batch {
+                    spill_record_batches(
+                        vec![batch],
+                        spill_file.path().into(),
+                        Arc::clone(&self.buffered_schema),
+                    )?;
+                    buffered_batch.spill_file = Some(spill_file);
+                    buffered_batch.batch = None;
+
+                    // update metrics to register spill
+                    self.join_metrics.spill_count.add(1);
+                    self.join_metrics
+                        .spilled_bytes
+                        .add(buffered_batch.size_estimation);
+                    self.join_metrics.spilled_rows.add(buffered_batch.num_rows);
+                    Ok(())
+                } else {
+                    internal_err!("Buffered batch has empty body")
+                }
+            }
+            Err(e) => exec_err!("{}. Disk spilling disabled.", e.message()),
+        }?;
+
+        self.buffered_data.batches.push_back(buffered_batch);
+        Ok(())
+    }
+
     /// Poll next buffered batches
     fn poll_buffered_batches(&mut self, cx: &mut Context) -> Poll<Option<Result<()>>> {
         loop {
@@ -867,12 +954,12 @@ impl SMJStream {
                     while !self.buffered_data.batches.is_empty() {
                         let head_batch = self.buffered_data.head_batch();
                         // If the head batch is fully processed, dequeue it and produce output of it.
-                        if head_batch.range.end == head_batch.batch.num_rows() {
+                        if head_batch.range.end == head_batch.num_rows {
                             self.freeze_dequeuing_buffered()?;
                             if let Some(buffered_batch) =
                                 self.buffered_data.batches.pop_front()
                             {
-                                self.reservation.shrink(buffered_batch.size_estimation);
+                                self.free_reservation(buffered_batch)?;
                             }
                         } else {
                             // If the head batch is not fully processed, break the loop.
@@ -900,25 +987,22 @@ impl SMJStream {
                     Poll::Ready(Some(batch)) => {
                         self.join_metrics.input_batches.add(1);
                         self.join_metrics.input_rows.add(batch.num_rows());
+
                         if batch.num_rows() > 0 {
                             let buffered_batch =
                                 BufferedBatch::new(batch, 0..1, &self.on_buffered);
-                            self.reservation.try_grow(buffered_batch.size_estimation)?;
-                            self.join_metrics
-                                .peak_mem_used
-                                .set_max(self.reservation.size());
 
-                            self.buffered_data.batches.push_back(buffered_batch);
+                            self.allocate_reservation(buffered_batch)?;
                             self.buffered_state = BufferedState::PollingRest;
                         }
                     }
                 },
                 BufferedState::PollingRest => {
                     if self.buffered_data.tail_batch().range.end
-                        < self.buffered_data.tail_batch().batch.num_rows()
+                        < self.buffered_data.tail_batch().num_rows
                     {
                         while self.buffered_data.tail_batch().range.end
-                            < self.buffered_data.tail_batch().batch.num_rows()
+                            < self.buffered_data.tail_batch().num_rows
                         {
                             if is_join_arrays_equal(
                                 &self.buffered_data.head_batch().join_arrays,
@@ -941,6 +1025,7 @@ impl SMJStream {
                                 self.buffered_state = BufferedState::Ready;
                             }
                             Poll::Ready(Some(batch)) => {
+                                // Polling batches coming concurrently as multiple partitions
                                 self.join_metrics.input_batches.add(1);
                                 self.join_metrics.input_rows.add(batch.num_rows());
                                 if batch.num_rows() > 0 {
@@ -949,12 +1034,7 @@ impl SMJStream {
                                         0..0,
                                         &self.on_buffered,
                                     );
-                                    self.reservation
-                                        .try_grow(buffered_batch.size_estimation)?;
-                                    self.join_metrics
-                                        .peak_mem_used
-                                        .set_max(self.reservation.size());
-                                    self.buffered_data.batches.push_back(buffered_batch);
+                                    self.allocate_reservation(buffered_batch)?;
                                 }
                             }
                         }
@@ -1473,13 +1553,8 @@ fn produce_buffered_null_batch(
     }
 
     // Take buffered (right) columns
-    let buffered_columns = buffered_batch
-        .batch
-        .columns()
-        .iter()
-        .map(|column| take(column, &buffered_indices, None))
-        .collect::<Result<Vec<_>, ArrowError>>()
-        .map_err(Into::<DataFusionError>::into)?;
+    let buffered_columns =
+        get_buffered_columns_from_batch(buffered_batch, buffered_indices)?;
 
     // Create null streamed (left) columns
     let mut streamed_columns = streamed_schema
@@ -1502,13 +1577,45 @@ fn get_buffered_columns(
     buffered_data: &BufferedData,
     buffered_batch_idx: usize,
     buffered_indices: &UInt64Array,
-) -> Result<Vec<ArrayRef>, ArrowError> {
-    buffered_data.batches[buffered_batch_idx]
-        .batch
-        .columns()
-        .iter()
-        .map(|column| take(column, &buffered_indices, None))
-        .collect::<Result<Vec<_>, ArrowError>>()
+) -> Result<Vec<ArrayRef>> {
+    get_buffered_columns_from_batch(
+        &buffered_data.batches[buffered_batch_idx],
+        buffered_indices,
+    )
+}
+
+#[inline(always)]
+fn get_buffered_columns_from_batch(
+    buffered_batch: &BufferedBatch,
+    buffered_indices: &UInt64Array,
+) -> Result<Vec<ArrayRef>> {
+    match (&buffered_batch.spill_file, &buffered_batch.batch) {
+        // In memory batch
+        (None, Some(batch)) => Ok(batch
+            .columns()
+            .iter()
+            .map(|column| take(column, &buffered_indices, None))
+            .collect::<Result<Vec<_>, ArrowError>>()
+            .map_err(Into::<DataFusionError>::into)?),
+        // If the batch was spilled to disk, less likely
+        (Some(spill_file), None) => {
+            let mut buffered_cols: Vec<ArrayRef> =
+                Vec::with_capacity(buffered_indices.len());
+
+            let file = BufReader::new(File::open(spill_file.path())?);
+            let reader = FileReader::try_new(file, None)?;
+
+            for batch in reader {
+                batch?.columns().iter().for_each(|column| {
+                    buffered_cols.extend(take(column, &buffered_indices, None))
+                });
+            }
+
+            Ok(buffered_cols)
+        }
+        // Invalid combination
+        (spill, batch) => internal_err!("Unexpected buffered batch spill status. Spill exists: {}. In-memory exists: {}", spill.is_some(), batch.is_some()),
+    }
 }
 
 /// Calculate join filter bit mask considering join type specifics
@@ -1854,6 +1961,7 @@ mod tests {
         assert_batches_eq, assert_batches_sorted_eq, assert_contains, JoinType, Result,
     };
     use datafusion_execution::config::SessionConfig;
+    use datafusion_execution::disk_manager::DiskManagerConfig;
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
     use datafusion_execution::TaskContext;
 
@@ -2749,7 +2857,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn overallocation_single_batch() -> Result<()> {
+    async fn overallocation_single_batch_no_spill() -> Result<()> {
         let left = build_table(
             ("a1", &vec![0, 1, 2, 3, 4, 5]),
             ("b1", &vec![1, 2, 3, 4, 5, 6]),
@@ -2775,14 +2883,17 @@ mod tests {
             JoinType::LeftAnti,
         ];
 
-        for join_type in join_types {
-            let runtime_config = RuntimeConfig::new().with_memory_limit(100, 1.0);
-            let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
-            let session_config = SessionConfig::default().with_batch_size(50);
+        // Disable DiskManager to prevent spilling
+        let runtime_config = RuntimeConfig::new()
+            .with_memory_limit(100, 1.0)
+            .with_disk_manager(DiskManagerConfig::Disabled);
+        let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+        let session_config = SessionConfig::default().with_batch_size(50);
 
+        for join_type in join_types {
             let task_ctx = TaskContext::default()
-                .with_session_config(session_config)
-                .with_runtime(runtime);
+                .with_session_config(session_config.clone())
+                .with_runtime(Arc::clone(&runtime));
             let task_ctx = Arc::new(task_ctx);
 
             let join = join_with_options(
@@ -2797,18 +2908,20 @@ mod tests {
             let stream = join.execute(0, task_ctx)?;
             let err = common::collect(stream).await.unwrap_err();
 
-            assert_contains!(
-                err.to_string(),
-                "Resources exhausted: Failed to allocate additional"
-            );
+            assert_contains!(err.to_string(), "Failed to allocate additional");
             assert_contains!(err.to_string(), "SMJStream[0]");
+            assert_contains!(err.to_string(), "Disk spilling disabled");
+            assert!(join.metrics().is_some());
+            assert_eq!(join.metrics().unwrap().spill_count(), Some(0));
+            assert_eq!(join.metrics().unwrap().spilled_bytes(), Some(0));
+            assert_eq!(join.metrics().unwrap().spilled_rows(), Some(0));
         }
 
         Ok(())
     }
 
     #[tokio::test]
-    async fn overallocation_multi_batch() -> Result<()> {
+    async fn overallocation_multi_batch_no_spill() -> Result<()> {
         let left_batch_1 = build_table_i32(
             ("a1", &vec![0, 1]),
             ("b1", &vec![1, 1]),
@@ -2855,13 +2968,17 @@ mod tests {
             JoinType::LeftAnti,
         ];
 
+        // Disable DiskManager to prevent spilling
+        let runtime_config = RuntimeConfig::new()
+            .with_memory_limit(100, 1.0)
+            .with_disk_manager(DiskManagerConfig::Disabled);
+        let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+        let session_config = SessionConfig::default().with_batch_size(50);
+
         for join_type in join_types {
-            let runtime_config = RuntimeConfig::new().with_memory_limit(100, 1.0);
-            let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
-            let session_config = SessionConfig::default().with_batch_size(50);
             let task_ctx = TaskContext::default()
-                .with_session_config(session_config)
-                .with_runtime(runtime);
+                .with_session_config(session_config.clone())
+                .with_runtime(Arc::clone(&runtime));
             let task_ctx = Arc::new(task_ctx);
             let join = join_with_options(
                 Arc::clone(&left),
@@ -2875,11 +2992,205 @@ mod tests {
             let stream = join.execute(0, task_ctx)?;
             let err = common::collect(stream).await.unwrap_err();
 
-            assert_contains!(
-                err.to_string(),
-                "Resources exhausted: Failed to allocate additional"
-            );
+            assert_contains!(err.to_string(), "Failed to allocate additional");
             assert_contains!(err.to_string(), "SMJStream[0]");
+            assert_contains!(err.to_string(), "Disk spilling disabled");
+            assert!(join.metrics().is_some());
+            assert_eq!(join.metrics().unwrap().spill_count(), Some(0));
+            assert_eq!(join.metrics().unwrap().spilled_bytes(), Some(0));
+            assert_eq!(join.metrics().unwrap().spilled_rows(), Some(0));
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn overallocation_single_batch_spill() -> Result<()> {
+        let left = build_table(
+            ("a1", &vec![0, 1, 2, 3, 4, 5]),
+            ("b1", &vec![1, 2, 3, 4, 5, 6]),
+            ("c1", &vec![4, 5, 6, 7, 8, 9]),
+        );
+        let right = build_table(
+            ("a2", &vec![0, 10, 20, 30, 40]),
+            ("b2", &vec![1, 3, 4, 6, 8]),
+            ("c2", &vec![50, 60, 70, 80, 90]),
+        );
+        let on = vec![(
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b2", &right.schema())?) as _,
+        )];
+        let sort_options = vec![SortOptions::default(); on.len()];
+
+        let join_types = [
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::LeftSemi,
+            JoinType::LeftAnti,
+        ];
+
+        // Enable DiskManager to allow spilling
+        let runtime_config = RuntimeConfig::new()
+            .with_memory_limit(100, 1.0)
+            .with_disk_manager(DiskManagerConfig::NewOs);
+        let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+
+        for batch_size in [1, 50] {
+            let session_config = SessionConfig::default().with_batch_size(batch_size);
+
+            for join_type in &join_types {
+                let task_ctx = TaskContext::default()
+                    .with_session_config(session_config.clone())
+                    .with_runtime(Arc::clone(&runtime));
+                let task_ctx = Arc::new(task_ctx);
+
+                let join = join_with_options(
+                    Arc::clone(&left),
+                    Arc::clone(&right),
+                    on.clone(),
+                    *join_type,
+                    sort_options.clone(),
+                    false,
+                )?;
+
+                let stream = join.execute(0, task_ctx)?;
+                let spilled_join_result = common::collect(stream).await.unwrap();
+
+                assert!(join.metrics().is_some());
+                assert!(join.metrics().unwrap().spill_count().unwrap() > 0);
+                assert!(join.metrics().unwrap().spilled_bytes().unwrap() > 0);
+                assert!(join.metrics().unwrap().spilled_rows().unwrap() > 0);
+
+                // Run the test with no spill configuration as
+                let task_ctx_no_spill =
+                    TaskContext::default().with_session_config(session_config.clone());
+                let task_ctx_no_spill = Arc::new(task_ctx_no_spill);
+
+                let join = join_with_options(
+                    Arc::clone(&left),
+                    Arc::clone(&right),
+                    on.clone(),
+                    *join_type,
+                    sort_options.clone(),
+                    false,
+                )?;
+                let stream = join.execute(0, task_ctx_no_spill)?;
+                let no_spilled_join_result = common::collect(stream).await.unwrap();
+
+                assert!(join.metrics().is_some());
+                assert_eq!(join.metrics().unwrap().spill_count(), Some(0));
+                assert_eq!(join.metrics().unwrap().spilled_bytes(), Some(0));
+                assert_eq!(join.metrics().unwrap().spilled_rows(), Some(0));
+                // Compare spilled and non spilled data to check spill logic doesn't corrupt the data
+                assert_eq!(spilled_join_result, no_spilled_join_result);
+            }
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn overallocation_multi_batch_spill() -> Result<()> {
+        let left_batch_1 = build_table_i32(
+            ("a1", &vec![0, 1]),
+            ("b1", &vec![1, 1]),
+            ("c1", &vec![4, 5]),
+        );
+        let left_batch_2 = build_table_i32(
+            ("a1", &vec![2, 3]),
+            ("b1", &vec![1, 1]),
+            ("c1", &vec![6, 7]),
+        );
+        let left_batch_3 = build_table_i32(
+            ("a1", &vec![4, 5]),
+            ("b1", &vec![1, 1]),
+            ("c1", &vec![8, 9]),
+        );
+        let right_batch_1 = build_table_i32(
+            ("a2", &vec![0, 10]),
+            ("b2", &vec![1, 1]),
+            ("c2", &vec![50, 60]),
+        );
+        let right_batch_2 = build_table_i32(
+            ("a2", &vec![20, 30]),
+            ("b2", &vec![1, 1]),
+            ("c2", &vec![70, 80]),
+        );
+        let right_batch_3 =
+            build_table_i32(("a2", &vec![40]), ("b2", &vec![1]), ("c2", &vec![90]));
+        let left =
+            build_table_from_batches(vec![left_batch_1, left_batch_2, left_batch_3]);
+        let right =
+            build_table_from_batches(vec![right_batch_1, right_batch_2, right_batch_3]);
+        let on = vec![(
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b2", &right.schema())?) as _,
+        )];
+        let sort_options = vec![SortOptions::default(); on.len()];
+
+        let join_types = [
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::LeftSemi,
+            JoinType::LeftAnti,
+        ];
+
+        // Enable DiskManager to allow spilling
+        let runtime_config = RuntimeConfig::new()
+            .with_memory_limit(500, 1.0)
+            .with_disk_manager(DiskManagerConfig::NewOs);
+        let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+        for batch_size in [1, 50] {
+            let session_config = SessionConfig::default().with_batch_size(batch_size);
+
+            for join_type in &join_types {
+                let task_ctx = TaskContext::default()
+                    .with_session_config(session_config.clone())
+                    .with_runtime(Arc::clone(&runtime));
+                let task_ctx = Arc::new(task_ctx);
+                let join = join_with_options(
+                    Arc::clone(&left),
+                    Arc::clone(&right),
+                    on.clone(),
+                    *join_type,
+                    sort_options.clone(),
+                    false,
+                )?;
+
+                let stream = join.execute(0, task_ctx)?;
+                let spilled_join_result = common::collect(stream).await.unwrap();
+                assert!(join.metrics().is_some());
+                assert!(join.metrics().unwrap().spill_count().unwrap() > 0);
+                assert!(join.metrics().unwrap().spilled_bytes().unwrap() > 0);
+                assert!(join.metrics().unwrap().spilled_rows().unwrap() > 0);
+
+                // Run the test with no spill configuration as
+                let task_ctx_no_spill =
+                    TaskContext::default().with_session_config(session_config.clone());
+                let task_ctx_no_spill = Arc::new(task_ctx_no_spill);
+
+                let join = join_with_options(
+                    Arc::clone(&left),
+                    Arc::clone(&right),
+                    on.clone(),
+                    *join_type,
+                    sort_options.clone(),
+                    false,
+                )?;
+                let stream = join.execute(0, task_ctx_no_spill)?;
+                let no_spilled_join_result = common::collect(stream).await.unwrap();
+
+                assert!(join.metrics().is_some());
+                assert_eq!(join.metrics().unwrap().spill_count(), Some(0));
+                assert_eq!(join.metrics().unwrap().spilled_bytes(), Some(0));
+                assert_eq!(join.metrics().unwrap().spilled_rows(), Some(0));
+                // Compare spilled and non spilled data to check spill logic doesn't corrupt the data
+                assert_eq!(spilled_join_result, no_spilled_join_result);
+            }
         }
 
         Ok(())
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index d576f77d9f742..13ff63c174055 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -45,7 +45,7 @@ use arrow::record_batch::RecordBatch;
 use arrow::row::{RowConverter, SortField};
 use arrow_array::{Array, RecordBatchOptions, UInt32Array};
 use arrow_schema::DataType;
-use datafusion_common::{DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::runtime_env::RuntimeEnv;
@@ -333,10 +333,7 @@ impl ExternalSorter {
 
             for spill in self.spills.drain(..) {
                 if !spill.path().exists() {
-                    return Err(DataFusionError::Internal(format!(
-                        "Spill file {:?} does not exist",
-                        spill.path()
-                    )));
+                    return internal_err!("Spill file {:?} does not exist", spill.path());
                 }
                 let stream = read_spill_as_stream(spill, Arc::clone(&self.schema), 2)?;
                 streams.push(stream);
diff --git a/datafusion/physical-plan/src/spill.rs b/datafusion/physical-plan/src/spill.rs
index 0018a27bd22bb..21ca58fa0a9fa 100644
--- a/datafusion/physical-plan/src/spill.rs
+++ b/datafusion/physical-plan/src/spill.rs
@@ -40,7 +40,7 @@ use crate::stream::RecordBatchReceiverStream;
 /// `path` - temp file
 /// `schema` - batches schema, should be the same across batches
 /// `buffer` - internal buffer of capacity batches
-pub fn read_spill_as_stream(
+pub(crate) fn read_spill_as_stream(
     path: RefCountedTempFile,
     schema: SchemaRef,
     buffer: usize,
@@ -56,7 +56,7 @@ pub fn read_spill_as_stream(
 /// Spills in-memory `batches` to disk.
 ///
 /// Returns total number of the rows spilled to disk.
-pub fn spill_record_batches(
+pub(crate) fn spill_record_batches(
     batches: Vec<RecordBatch>,
     path: PathBuf,
     schema: SchemaRef,
@@ -85,3 +85,102 @@ fn read_spill(sender: Sender<Result<RecordBatch>>, path: &Path) -> Result<()> {
     }
     Ok(())
 }
+
+/// Spill the `RecordBatch` to disk as smaller batches
+/// split by `batch_size_rows`
+/// Return `total_rows` what is spilled
+pub fn spill_record_batch_by_size(
+    batch: &RecordBatch,
+    path: PathBuf,
+    schema: SchemaRef,
+    batch_size_rows: usize,
+) -> Result<()> {
+    let mut offset = 0;
+    let total_rows = batch.num_rows();
+    let mut writer = IPCWriter::new(&path, schema.as_ref())?;
+
+    while offset < total_rows {
+        let length = std::cmp::min(total_rows - offset, batch_size_rows);
+        let batch = batch.slice(offset, length);
+        offset += batch.num_rows();
+        writer.write(&batch)?;
+    }
+    writer.finish()?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::spill::{spill_record_batch_by_size, spill_record_batches};
+    use crate::test::build_table_i32;
+    use datafusion_common::Result;
+    use datafusion_execution::disk_manager::DiskManagerConfig;
+    use datafusion_execution::DiskManager;
+    use std::fs::File;
+    use std::io::BufReader;
+    use std::sync::Arc;
+
+    #[test]
+    fn test_batch_spill_and_read() -> Result<()> {
+        let batch1 = build_table_i32(
+            ("a2", &vec![0, 1, 2]),
+            ("b2", &vec![3, 4, 5]),
+            ("c2", &vec![4, 5, 6]),
+        );
+
+        let batch2 = build_table_i32(
+            ("a2", &vec![10, 11, 12]),
+            ("b2", &vec![13, 14, 15]),
+            ("c2", &vec![14, 15, 16]),
+        );
+
+        let disk_manager = DiskManager::try_new(DiskManagerConfig::NewOs)?;
+
+        let spill_file = disk_manager.create_tmp_file("Test Spill")?;
+        let schema = batch1.schema();
+        let num_rows = batch1.num_rows() + batch2.num_rows();
+        let cnt = spill_record_batches(
+            vec![batch1, batch2],
+            spill_file.path().into(),
+            Arc::clone(&schema),
+        );
+        assert_eq!(cnt.unwrap(), num_rows);
+
+        let file = BufReader::new(File::open(spill_file.path())?);
+        let reader = arrow::ipc::reader::FileReader::try_new(file, None)?;
+
+        assert_eq!(reader.num_batches(), 2);
+        assert_eq!(reader.schema(), schema);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_batch_spill_by_size() -> Result<()> {
+        let batch1 = build_table_i32(
+            ("a2", &vec![0, 1, 2, 3]),
+            ("b2", &vec![3, 4, 5, 6]),
+            ("c2", &vec![4, 5, 6, 7]),
+        );
+
+        let disk_manager = DiskManager::try_new(DiskManagerConfig::NewOs)?;
+
+        let spill_file = disk_manager.create_tmp_file("Test Spill")?;
+        let schema = batch1.schema();
+        spill_record_batch_by_size(
+            &batch1,
+            spill_file.path().into(),
+            Arc::clone(&schema),
+            1,
+        )?;
+
+        let file = BufReader::new(File::open(spill_file.path())?);
+        let reader = arrow::ipc::reader::FileReader::try_new(file, None)?;
+
+        assert_eq!(reader.num_batches(), 4);
+        assert_eq!(reader.schema(), schema);
+
+        Ok(())
+    }
+}

From ecf5323eaa38869ed2f911b02f98e17aa6db639a Mon Sep 17 00:00:00 2001
From: yfu <fevin86@gmail.com>
Date: Mon, 22 Jul 2024 21:04:36 +1000
Subject: [PATCH 104/357] Fix unparser invalid sql for query with order
 (#11527)

* wip

* fix wrong unparsed query for original query that has derived table with limit/sort/distinct; fix wrong unparsed query for original query with sort column that is not in select

* clippy

* addressed the comments, also fix one issue when selected column is aliased - see test
---
 datafusion/sql/src/unparser/plan.rs       | 67 +++++++++-------
 datafusion/sql/src/unparser/rewrite.rs    | 80 +++++++++++++++++-
 datafusion/sql/tests/cases/plan_to_sql.rs | 98 +++++++++++++++++++++++
 3 files changed, 215 insertions(+), 30 deletions(-)

diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 7f050d8a0690e..59660f4f0404f 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -29,6 +29,7 @@ use super::{
         SelectBuilder, TableRelationBuilder, TableWithJoinsBuilder,
     },
     rewrite::normalize_union_schema,
+    rewrite::rewrite_plan_for_sort_on_non_projected_fields,
     utils::{find_agg_node_within_select, unproject_window_exprs, AggVariant},
     Unparser,
 };
@@ -199,33 +200,21 @@ impl Unparser<'_> {
         Ok(())
     }
 
-    fn projection_to_sql(
-        &self,
-        plan: &LogicalPlan,
-        p: &Projection,
-        query: &mut Option<QueryBuilder>,
-        select: &mut SelectBuilder,
-        relation: &mut RelationBuilder,
-    ) -> Result<()> {
-        // A second projection implies a derived tablefactor
-        if !select.already_projected() {
-            self.reconstruct_select_statement(plan, p, select)?;
-            self.select_to_sql_recursively(p.input.as_ref(), query, select, relation)
-        } else {
-            let mut derived_builder = DerivedRelationBuilder::default();
-            derived_builder.lateral(false).alias(None).subquery({
-                let inner_statement = self.plan_to_sql(plan)?;
-                if let ast::Statement::Query(inner_query) = inner_statement {
-                    inner_query
-                } else {
-                    return internal_err!(
-                        "Subquery must be a Query, but found {inner_statement:?}"
-                    );
-                }
-            });
-            relation.derived(derived_builder);
-            Ok(())
-        }
+    fn derive(&self, plan: &LogicalPlan, relation: &mut RelationBuilder) -> Result<()> {
+        let mut derived_builder = DerivedRelationBuilder::default();
+        derived_builder.lateral(false).alias(None).subquery({
+            let inner_statement = self.plan_to_sql(plan)?;
+            if let ast::Statement::Query(inner_query) = inner_statement {
+                inner_query
+            } else {
+                return internal_err!(
+                    "Subquery must be a Query, but found {inner_statement:?}"
+                );
+            }
+        });
+        relation.derived(derived_builder);
+
+        Ok(())
     }
 
     fn select_to_sql_recursively(
@@ -256,7 +245,17 @@ impl Unparser<'_> {
                 Ok(())
             }
             LogicalPlan::Projection(p) => {
-                self.projection_to_sql(plan, p, query, select, relation)
+                if let Some(new_plan) = rewrite_plan_for_sort_on_non_projected_fields(p) {
+                    return self
+                        .select_to_sql_recursively(&new_plan, query, select, relation);
+                }
+
+                // Projection can be top-level plan for derived table
+                if select.already_projected() {
+                    return self.derive(plan, relation);
+                }
+                self.reconstruct_select_statement(plan, p, select)?;
+                self.select_to_sql_recursively(p.input.as_ref(), query, select, relation)
             }
             LogicalPlan::Filter(filter) => {
                 if let Some(AggVariant::Aggregate(agg)) =
@@ -278,6 +277,10 @@ impl Unparser<'_> {
                 )
             }
             LogicalPlan::Limit(limit) => {
+                // Limit can be top-level plan for derived table
+                if select.already_projected() {
+                    return self.derive(plan, relation);
+                }
                 if let Some(fetch) = limit.fetch {
                     let Some(query) = query.as_mut() else {
                         return internal_err!(
@@ -298,6 +301,10 @@ impl Unparser<'_> {
                 )
             }
             LogicalPlan::Sort(sort) => {
+                // Sort can be top-level plan for derived table
+                if select.already_projected() {
+                    return self.derive(plan, relation);
+                }
                 if let Some(query_ref) = query {
                     query_ref.order_by(self.sort_to_sql(sort.expr.clone())?);
                 } else {
@@ -323,6 +330,10 @@ impl Unparser<'_> {
                 )
             }
             LogicalPlan::Distinct(distinct) => {
+                // Distinct can be top-level plan for derived table
+                if select.already_projected() {
+                    return self.derive(plan, relation);
+                }
                 let (select_distinct, input) = match distinct {
                     Distinct::All(input) => (ast::Distinct::Distinct, input.as_ref()),
                     Distinct::On(on) => {
diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
index a73fce30ced3c..fba95ad48f32f 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -15,13 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::sync::Arc;
+use std::{
+    collections::{HashMap, HashSet},
+    sync::Arc,
+};
 
 use datafusion_common::{
     tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeIterator},
     Result,
 };
-use datafusion_expr::{Expr, LogicalPlan, Sort};
+use datafusion_expr::{Expr, LogicalPlan, Projection, Sort};
 
 /// Normalize the schema of a union plan to remove qualifiers from the schema fields and sort expressions.
 ///
@@ -99,3 +102,76 @@ fn rewrite_sort_expr_for_union(exprs: Vec<Expr>) -> Result<Vec<Expr>> {
 
     Ok(sort_exprs)
 }
+
+// Rewrite logic plan for query that order by columns are not in projections
+// Plan before rewrite:
+//
+// Projection: j1.j1_string, j2.j2_string
+//   Sort: j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST
+//     Projection: j1.j1_string, j2.j2_string, j1.j1_id, j2.j2_id
+//       Inner Join:  Filter: j1.j1_id = j2.j2_id
+//         TableScan: j1
+//         TableScan: j2
+//
+// Plan after rewrite
+//
+// Sort: j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST
+//   Projection: j1.j1_string, j2.j2_string
+//     Inner Join:  Filter: j1.j1_id = j2.j2_id
+//       TableScan: j1
+//       TableScan: j2
+//
+// This prevents the original plan generate query with derived table but missing alias.
+pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
+    p: &Projection,
+) -> Option<LogicalPlan> {
+    let LogicalPlan::Sort(sort) = p.input.as_ref() else {
+        return None;
+    };
+
+    let LogicalPlan::Projection(inner_p) = sort.input.as_ref() else {
+        return None;
+    };
+
+    let mut map = HashMap::new();
+    let inner_exprs = inner_p
+        .expr
+        .iter()
+        .map(|f| {
+            if let Expr::Alias(alias) = f {
+                let a = Expr::Column(alias.name.clone().into());
+                map.insert(a.clone(), f.clone());
+                a
+            } else {
+                f.clone()
+            }
+        })
+        .collect::<Vec<_>>();
+
+    let mut collects = p.expr.clone();
+    for expr in &sort.expr {
+        if let Expr::Sort(s) = expr {
+            collects.push(s.expr.as_ref().clone());
+        }
+    }
+
+    if collects.iter().collect::<HashSet<_>>()
+        == inner_exprs.iter().collect::<HashSet<_>>()
+    {
+        let mut sort = sort.clone();
+        let mut inner_p = inner_p.clone();
+
+        let new_exprs = p
+            .expr
+            .iter()
+            .map(|e| map.get(e).unwrap_or(e).clone())
+            .collect::<Vec<_>>();
+
+        inner_p.expr.clone_from(&new_exprs);
+        sort.input = Arc::new(LogicalPlan::Projection(inner_p));
+
+        Some(LogicalPlan::Sort(sort))
+    } else {
+        None
+    }
+}
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index e9c4114353c07..aada560fd884a 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -244,6 +244,50 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
             parser_dialect: Box::new(GenericDialect {}),
             unparser_dialect: Box::new(UnparserDefaultDialect {}),
         },
+        // Test query with derived tables that put distinct,sort,limit on the wrong level
+        TestStatementWithDialect {
+            sql: "SELECT j1_string from j1 order by j1_id",
+            expected: r#"SELECT j1.j1_string FROM j1 ORDER BY j1.j1_id ASC NULLS LAST"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        TestStatementWithDialect {
+            sql: "SELECT j1_string AS a from j1 order by j1_id",
+            expected: r#"SELECT j1.j1_string AS a FROM j1 ORDER BY j1.j1_id ASC NULLS LAST"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        TestStatementWithDialect {
+            sql: "SELECT j1_string from j1 join j2 on j1.j1_id = j2.j2_id order by j1_id",
+            expected: r#"SELECT j1.j1_string FROM j1 JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id ASC NULLS LAST"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        TestStatementWithDialect {
+            sql: "
+                SELECT
+                  j1_string,
+                  j2_string
+                FROM
+                  (
+                    SELECT
+                      distinct j1_id,
+                      j1_string,
+                      j2_string
+                    from
+                      j1
+                      INNER join j2 ON j1.j1_id = j2.j2_id
+                    order by
+                      j1.j1_id desc
+                    limit
+                      10
+                  ) abc
+                ORDER BY
+                  abc.j2_string",
+            expected: r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT DISTINCT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
         // more tests around subquery/derived table roundtrip
         TestStatementWithDialect {
             sql: "SELECT string_count FROM (
@@ -261,6 +305,60 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
             parser_dialect: Box::new(GenericDialect {}),
             unparser_dialect: Box::new(UnparserDefaultDialect {}),
         },
+        TestStatementWithDialect {
+            sql: "
+                SELECT
+                  j1_string,
+                  j2_string
+                FROM
+                  (
+                    SELECT
+                      j1_id,
+                      j1_string,
+                      j2_string
+                    from
+                      j1
+                      INNER join j2 ON j1.j1_id = j2.j2_id
+                    group by
+                      j1_id,
+                      j1_string,
+                      j2_string
+                    order by
+                      j1.j1_id desc
+                    limit
+                      10
+                  ) abc
+                ORDER BY
+                  abc.j2_string",
+            expected: r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id, j1.j1_string, j2.j2_string ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        // Test query that order by columns are not in select columns
+        TestStatementWithDialect {
+            sql: "
+                SELECT
+                  j1_string
+                FROM
+                  (
+                    SELECT
+                      j1_string,
+                      j2_string
+                    from
+                      j1
+                      INNER join j2 ON j1.j1_id = j2.j2_id
+                    order by
+                      j1.j1_id desc,
+                      j2.j2_id desc
+                    limit
+                      10
+                  ) abc
+                ORDER BY
+                  j2_string",
+            expected: r#"SELECT abc.j1_string FROM (SELECT j1.j1_string, j2.j2_string FROM j1 JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
         TestStatementWithDialect {
             sql: "SELECT id FROM (SELECT j1_id from j1) AS c (id)",
             expected: r#"SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)"#,

From 12c0a1e2e21a750e2672bf3109e244836a12b399 Mon Sep 17 00:00:00 2001
From: Kaviraj Kanagaraj <kavirajkanagaraj@gmail.com>
Date: Mon, 22 Jul 2024 14:27:07 +0200
Subject: [PATCH 105/357] chore: Minor cleanup `simplify_demo()` example
 (#11576)

* chore: fix examples and comments

Signed-off-by: Kaviraj <kavirajkanagaraj@gmail.com>

* remove unused `b` field

Signed-off-by: Kaviraj <kavirajkanagaraj@gmail.com>

* fix the number of days

Signed-off-by: Kaviraj <kavirajkanagaraj@gmail.com>

---------

Signed-off-by: Kaviraj <kavirajkanagaraj@gmail.com>
---
 datafusion-examples/examples/expr_api.rs | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs
index a5cf7011f8113..a48171c625a81 100644
--- a/datafusion-examples/examples/expr_api.rs
+++ b/datafusion-examples/examples/expr_api.rs
@@ -177,16 +177,12 @@ fn simplify_demo() -> Result<()> {
     );
 
     // here are some other examples of what DataFusion is capable of
-    let schema = Schema::new(vec![
-        make_field("i", DataType::Int64),
-        make_field("b", DataType::Boolean),
-    ])
-    .to_dfschema_ref()?;
+    let schema = Schema::new(vec![make_field("i", DataType::Int64)]).to_dfschema_ref()?;
     let context = SimplifyContext::new(&props).with_schema(schema.clone());
     let simplifier = ExprSimplifier::new(context);
 
     // basic arithmetic simplification
-    // i + 1 + 2 => a + 3
+    // i + 1 + 2 => i + 3
     // (note this is not done if the expr is (col("i") + (lit(1) + lit(2))))
     assert_eq!(
         simplifier.simplify(col("i") + (lit(1) + lit(2)))?,
@@ -209,7 +205,7 @@ fn simplify_demo() -> Result<()> {
     );
 
     // String --> Date simplification
-    // `cast('2020-09-01' as date)` --> 18500
+    // `cast('2020-09-01' as date)` --> 18506 # number of days since epoch 1970-01-01
     assert_eq!(
         simplifier.simplify(lit("2020-09-01").cast_to(&DataType::Date32, &schema)?)?,
         lit(ScalarValue::Date32(Some(18506)))

From f9457de779e213f610fc92dd9165076c7ee770a2 Mon Sep 17 00:00:00 2001
From: Devesh Rahatekar <79015420+devesh-2002@users.noreply.github.com>
Date: Mon, 22 Jul 2024 18:01:52 +0530
Subject: [PATCH 106/357] Move Datafusion Query Optimizer to library user guide
 (#11563)

* Added Datafusion Query Optimizer to user guide

* Updated Query optimizer name, Added to index and replaced the README content

* Fix RAT check

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/optimizer/README.md                | 318 +----------------
 docs/source/index.rst                         |   2 +-
 .../library-user-guide/query-optimizer.md     | 336 ++++++++++++++++++
 3 files changed, 339 insertions(+), 317 deletions(-)
 create mode 100644 docs/source/library-user-guide/query-optimizer.md

diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md
index 5aacfaf59cb10..61bc1cd70145b 100644
--- a/datafusion/optimizer/README.md
+++ b/datafusion/optimizer/README.md
@@ -17,320 +17,6 @@
   under the License.
 -->
 
-# DataFusion Query Optimizer
+Please see [Query Optimizer] in the Library User Guide
 
-[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory
-format.
-
-DataFusion has modular design, allowing individual crates to be re-used in other projects.
-
-This crate is a submodule of DataFusion that provides a query optimizer for logical plans, and
-contains an extensive set of OptimizerRules that may rewrite the plan and/or its expressions so
-they execute more quickly while still computing the same result.
-
-## Running the Optimizer
-
-The following code demonstrates the basic flow of creating the optimizer with a default set of optimization rules
-and applying it to a logical plan to produce an optimized logical plan.
-
-```rust
-
-// We need a logical plan as the starting point. There are many ways to build a logical plan:
-//
-// The `datafusion-expr` crate provides a LogicalPlanBuilder
-// The `datafusion-sql` crate provides a SQL query planner that can create a LogicalPlan from SQL
-// The `datafusion` crate provides a DataFrame API that can create a LogicalPlan
-let logical_plan = ...
-
-let mut config = OptimizerContext::default();
-let optimizer = Optimizer::new(&config);
-let optimized_plan = optimizer.optimize(&logical_plan, &config, observe)?;
-
-fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
-    println!(
-        "After applying rule '{}':\n{}",
-        rule.name(),
-        plan.display_indent()
-    )
-}
-```
-
-## Providing Custom Rules
-
-The optimizer can be created with a custom set of rules.
-
-```rust
-let optimizer = Optimizer::with_rules(vec![
-    Arc::new(MyRule {})
-]);
-```
-
-## Writing Optimization Rules
-
-Please refer to the
-[optimizer_rule.rs](../../datafusion-examples/examples/optimizer_rule.rs)
-example to learn more about the general approach to writing optimizer rules and
-then move onto studying the existing rules.
-
-All rules must implement the `OptimizerRule` trait.
-
-```rust
-/// `OptimizerRule` transforms one ['LogicalPlan'] into another which
-/// computes the same results, but in a potentially more efficient
-/// way. If there are no suitable transformations for the input plan,
-/// the optimizer can simply return it as is.
-pub trait OptimizerRule {
-    /// Rewrite `plan` to an optimized form
-    fn optimize(
-        &self,
-        plan: &LogicalPlan,
-        config: &dyn OptimizerConfig,
-    ) -> Result<LogicalPlan>;
-
-    /// A human readable name for this optimizer rule
-    fn name(&self) -> &str;
-}
-```
-
-### General Guidelines
-
-Rules typical walk the logical plan and walk the expression trees inside operators and selectively mutate
-individual operators or expressions.
-
-Sometimes there is an initial pass that visits the plan and builds state that is used in a second pass that performs
-the actual optimization. This approach is used in projection push down and filter push down.
-
-### Expression Naming
-
-Every expression in DataFusion has a name, which is used as the column name. For example, in this example the output
-contains a single column with the name `"COUNT(aggregate_test_100.c9)"`:
-
-```text
-> select count(c9) from aggregate_test_100;
-+------------------------------+
-| COUNT(aggregate_test_100.c9) |
-+------------------------------+
-| 100                          |
-+------------------------------+
-```
-
-These names are used to refer to the columns in both subqueries as well as internally from one stage of the LogicalPlan
-to another. For example:
-
-```text
-> select "COUNT(aggregate_test_100.c9)" + 1 from (select count(c9) from aggregate_test_100) as sq;
-+--------------------------------------------+
-| sq.COUNT(aggregate_test_100.c9) + Int64(1) |
-+--------------------------------------------+
-| 101                                        |
-+--------------------------------------------+
-```
-
-### Implication
-
-Because DataFusion identifies columns using a string name, it means it is critical that the names of expressions are
-not changed by the optimizer when it rewrites expressions. This is typically accomplished by renaming a rewritten
-expression by adding an alias.
-
-Here is a simple example of such a rewrite. The expression `1 + 2` can be internally simplified to 3 but must still be
-displayed the same as `1 + 2`:
-
-```text
-> select 1 + 2;
-+---------------------+
-| Int64(1) + Int64(2) |
-+---------------------+
-| 3                   |
-+---------------------+
-```
-
-Looking at the `EXPLAIN` output we can see that the optimizer has effectively rewritten `1 + 2` into effectively
-`3 as "1 + 2"`:
-
-```text
-> explain select 1 + 2;
-+---------------+-------------------------------------------------+
-| plan_type     | plan                                            |
-+---------------+-------------------------------------------------+
-| logical_plan  | Projection: Int64(3) AS Int64(1) + Int64(2)     |
-|               |   EmptyRelation                                 |
-| physical_plan | ProjectionExec: expr=[3 as Int64(1) + Int64(2)] |
-|               |   PlaceholderRowExec                            |
-|               |                                                 |
-+---------------+-------------------------------------------------+
-```
-
-If the expression name is not preserved, bugs such as [#3704](https://github.com/apache/datafusion/issues/3704)
-and [#3555](https://github.com/apache/datafusion/issues/3555) occur where the expected columns can not be found.
-
-### Building Expression Names
-
-There are currently two ways to create a name for an expression in the logical plan.
-
-```rust
-impl Expr {
-    /// Returns the name of this expression as it should appear in a schema. This name
-    /// will not include any CAST expressions.
-    pub fn display_name(&self) -> Result<String> {
-        create_name(self)
-    }
-
-    /// Returns a full and complete string representation of this expression.
-    pub fn canonical_name(&self) -> String {
-        format!("{}", self)
-    }
-}
-```
-
-When comparing expressions to determine if they are equivalent, `canonical_name` should be used, and when creating a
-name to be used in a schema, `display_name` should be used.
-
-### Utilities
-
-There are a number of utility methods provided that take care of some common tasks.
-
-### ExprVisitor
-
-The `ExprVisitor` and `ExprVisitable` traits provide a mechanism for applying a visitor pattern to an expression tree.
-
-Here is an example that demonstrates this.
-
-```rust
-fn extract_subquery_filters(expression: &Expr, extracted: &mut Vec<Expr>) -> Result<()> {
-    struct InSubqueryVisitor<'a> {
-        accum: &'a mut Vec<Expr>,
-    }
-
-    impl ExpressionVisitor for InSubqueryVisitor<'_> {
-        fn pre_visit(self, expr: &Expr) -> Result<Recursion<Self>> {
-            if let Expr::InSubquery(_) = expr {
-                self.accum.push(expr.to_owned());
-            }
-            Ok(Recursion::Continue(self))
-        }
-    }
-
-    expression.accept(InSubqueryVisitor { accum: extracted })?;
-    Ok(())
-}
-```
-
-### Rewriting Expressions
-
-The `MyExprRewriter` trait can be implemented to provide a way to rewrite expressions. This rule can then be applied
-to an expression by calling `Expr::rewrite` (from the `ExprRewritable` trait).
-
-The `rewrite` method will perform a depth first walk of the expression and its children to rewrite an expression,
-consuming `self` producing a new expression.
-
-```rust
-let mut expr_rewriter = MyExprRewriter {};
-let expr = expr.rewrite(&mut expr_rewriter)?;
-```
-
-Here is an example implementation which will rewrite `expr BETWEEN a AND b` as `expr >= a AND expr <= b`. Note that the
-implementation does not need to perform any recursion since this is handled by the `rewrite` method.
-
-```rust
-struct MyExprRewriter {}
-
-impl ExprRewriter for MyExprRewriter {
-    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
-        match expr {
-            Expr::Between {
-                negated,
-                expr,
-                low,
-                high,
-            } => {
-                let expr: Expr = expr.as_ref().clone();
-                let low: Expr = low.as_ref().clone();
-                let high: Expr = high.as_ref().clone();
-                if negated {
-                    Ok(expr.clone().lt(low).or(expr.clone().gt(high)))
-                } else {
-                    Ok(expr.clone().gt_eq(low).and(expr.clone().lt_eq(high)))
-                }
-            }
-            _ => Ok(expr.clone()),
-        }
-    }
-}
-```
-
-### optimize_children
-
-Typically a rule is applied recursively to all operators within a query plan. Rather than duplicate
-that logic in each rule, an `optimize_children` method is provided. This recursively invokes the `optimize` method on
-the plan's children and then returns a node of the same type.
-
-```rust
-fn optimize(
-    &self,
-    plan: &LogicalPlan,
-    _config: &mut OptimizerConfig,
-) -> Result<LogicalPlan> {
-    // recurse down and optimize children first
-    let plan = utils::optimize_children(self, plan, _config)?;
-
-    ...
-}
-```
-
-### Writing Tests
-
-There should be unit tests in the same file as the new rule that test the effect of the rule being applied to a plan
-in isolation (without any other rule being applied).
-
-There should also be a test in `integration-tests.rs` that tests the rule as part of the overall optimization process.
-
-### Debugging
-
-The `EXPLAIN VERBOSE` command can be used to show the effect of each optimization rule on a query.
-
-In the following example, the `type_coercion` and `simplify_expressions` passes have simplified the plan so that it returns the constant `"3.2"` rather than doing a computation at execution time.
-
-```text
-> explain verbose select cast(1 + 2.2 as string) as foo;
-+------------------------------------------------------------+---------------------------------------------------------------------------+
-| plan_type                                                  | plan                                                                      |
-+------------------------------------------------------------+---------------------------------------------------------------------------+
-| initial_logical_plan                                       | Projection: CAST(Int64(1) + Float64(2.2) AS Utf8) AS foo                  |
-|                                                            |   EmptyRelation                                                           |
-| logical_plan after type_coercion                           | Projection: CAST(CAST(Int64(1) AS Float64) + Float64(2.2) AS Utf8) AS foo |
-|                                                            |   EmptyRelation                                                           |
-| logical_plan after simplify_expressions                    | Projection: Utf8("3.2") AS foo                                            |
-|                                                            |   EmptyRelation                                                           |
-| logical_plan after unwrap_cast_in_comparison               | SAME TEXT AS ABOVE                                                        |
-| logical_plan after decorrelate_where_exists                | SAME TEXT AS ABOVE                                                        |
-| logical_plan after decorrelate_where_in                    | SAME TEXT AS ABOVE                                                        |
-| logical_plan after scalar_subquery_to_join                 | SAME TEXT AS ABOVE                                                        |
-| logical_plan after subquery_filter_to_join                 | SAME TEXT AS ABOVE                                                        |
-| logical_plan after simplify_expressions                    | SAME TEXT AS ABOVE                                                        |
-| logical_plan after eliminate_filter                        | SAME TEXT AS ABOVE                                                        |
-| logical_plan after reduce_cross_join                       | SAME TEXT AS ABOVE                                                        |
-| logical_plan after common_sub_expression_eliminate         | SAME TEXT AS ABOVE                                                        |
-| logical_plan after eliminate_limit                         | SAME TEXT AS ABOVE                                                        |
-| logical_plan after projection_push_down                    | SAME TEXT AS ABOVE                                                        |
-| logical_plan after rewrite_disjunctive_predicate           | SAME TEXT AS ABOVE                                                        |
-| logical_plan after reduce_outer_join                       | SAME TEXT AS ABOVE                                                        |
-| logical_plan after filter_push_down                        | SAME TEXT AS ABOVE                                                        |
-| logical_plan after limit_push_down                         | SAME TEXT AS ABOVE                                                        |
-| logical_plan after single_distinct_aggregation_to_group_by | SAME TEXT AS ABOVE                                                        |
-| logical_plan                                               | Projection: Utf8("3.2") AS foo                                            |
-|                                                            |   EmptyRelation                                                           |
-| initial_physical_plan                                      | ProjectionExec: expr=[3.2 as foo]                                         |
-|                                                            |   PlaceholderRowExec                                                      |
-|                                                            |                                                                           |
-| physical_plan after aggregate_statistics                   | SAME TEXT AS ABOVE                                                        |
-| physical_plan after join_selection                         | SAME TEXT AS ABOVE                                                        |
-| physical_plan after coalesce_batches                       | SAME TEXT AS ABOVE                                                        |
-| physical_plan after repartition                            | SAME TEXT AS ABOVE                                                        |
-| physical_plan after add_merge_exec                         | SAME TEXT AS ABOVE                                                        |
-| physical_plan                                              | ProjectionExec: expr=[3.2 as foo]                                         |
-|                                                            |   PlaceholderRowExec                                                      |
-|                                                            |                                                                           |
-+------------------------------------------------------------+---------------------------------------------------------------------------+
-```
-
-[df]: https://crates.io/crates/datafusion
+[query optimizer]: https://datafusion.apache.org/library-user-guide/query-optimizer.html
diff --git a/docs/source/index.rst b/docs/source/index.rst
index ca6905c434f35..9c8c886d25020 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -107,7 +107,7 @@ To get started, see
    library-user-guide/custom-table-providers
    library-user-guide/extending-operators
    library-user-guide/profiling
-
+   library-user-guide/query-optimizer
 .. _toc.contributor-guide:
 
 .. toctree::
diff --git a/docs/source/library-user-guide/query-optimizer.md b/docs/source/library-user-guide/query-optimizer.md
new file mode 100644
index 0000000000000..5aacfaf59cb10
--- /dev/null
+++ b/docs/source/library-user-guide/query-optimizer.md
@@ -0,0 +1,336 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# DataFusion Query Optimizer
+
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory
+format.
+
+DataFusion has modular design, allowing individual crates to be re-used in other projects.
+
+This crate is a submodule of DataFusion that provides a query optimizer for logical plans, and
+contains an extensive set of OptimizerRules that may rewrite the plan and/or its expressions so
+they execute more quickly while still computing the same result.
+
+## Running the Optimizer
+
+The following code demonstrates the basic flow of creating the optimizer with a default set of optimization rules
+and applying it to a logical plan to produce an optimized logical plan.
+
+```rust
+
+// We need a logical plan as the starting point. There are many ways to build a logical plan:
+//
+// The `datafusion-expr` crate provides a LogicalPlanBuilder
+// The `datafusion-sql` crate provides a SQL query planner that can create a LogicalPlan from SQL
+// The `datafusion` crate provides a DataFrame API that can create a LogicalPlan
+let logical_plan = ...
+
+let mut config = OptimizerContext::default();
+let optimizer = Optimizer::new(&config);
+let optimized_plan = optimizer.optimize(&logical_plan, &config, observe)?;
+
+fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
+    println!(
+        "After applying rule '{}':\n{}",
+        rule.name(),
+        plan.display_indent()
+    )
+}
+```
+
+## Providing Custom Rules
+
+The optimizer can be created with a custom set of rules.
+
+```rust
+let optimizer = Optimizer::with_rules(vec![
+    Arc::new(MyRule {})
+]);
+```
+
+## Writing Optimization Rules
+
+Please refer to the
+[optimizer_rule.rs](../../datafusion-examples/examples/optimizer_rule.rs)
+example to learn more about the general approach to writing optimizer rules and
+then move onto studying the existing rules.
+
+All rules must implement the `OptimizerRule` trait.
+
+```rust
+/// `OptimizerRule` transforms one ['LogicalPlan'] into another which
+/// computes the same results, but in a potentially more efficient
+/// way. If there are no suitable transformations for the input plan,
+/// the optimizer can simply return it as is.
+pub trait OptimizerRule {
+    /// Rewrite `plan` to an optimized form
+    fn optimize(
+        &self,
+        plan: &LogicalPlan,
+        config: &dyn OptimizerConfig,
+    ) -> Result<LogicalPlan>;
+
+    /// A human readable name for this optimizer rule
+    fn name(&self) -> &str;
+}
+```
+
+### General Guidelines
+
+Rules typical walk the logical plan and walk the expression trees inside operators and selectively mutate
+individual operators or expressions.
+
+Sometimes there is an initial pass that visits the plan and builds state that is used in a second pass that performs
+the actual optimization. This approach is used in projection push down and filter push down.
+
+### Expression Naming
+
+Every expression in DataFusion has a name, which is used as the column name. For example, in this example the output
+contains a single column with the name `"COUNT(aggregate_test_100.c9)"`:
+
+```text
+> select count(c9) from aggregate_test_100;
++------------------------------+
+| COUNT(aggregate_test_100.c9) |
++------------------------------+
+| 100                          |
++------------------------------+
+```
+
+These names are used to refer to the columns in both subqueries as well as internally from one stage of the LogicalPlan
+to another. For example:
+
+```text
+> select "COUNT(aggregate_test_100.c9)" + 1 from (select count(c9) from aggregate_test_100) as sq;
++--------------------------------------------+
+| sq.COUNT(aggregate_test_100.c9) + Int64(1) |
++--------------------------------------------+
+| 101                                        |
++--------------------------------------------+
+```
+
+### Implication
+
+Because DataFusion identifies columns using a string name, it means it is critical that the names of expressions are
+not changed by the optimizer when it rewrites expressions. This is typically accomplished by renaming a rewritten
+expression by adding an alias.
+
+Here is a simple example of such a rewrite. The expression `1 + 2` can be internally simplified to 3 but must still be
+displayed the same as `1 + 2`:
+
+```text
+> select 1 + 2;
++---------------------+
+| Int64(1) + Int64(2) |
++---------------------+
+| 3                   |
++---------------------+
+```
+
+Looking at the `EXPLAIN` output we can see that the optimizer has effectively rewritten `1 + 2` into effectively
+`3 as "1 + 2"`:
+
+```text
+> explain select 1 + 2;
++---------------+-------------------------------------------------+
+| plan_type     | plan                                            |
++---------------+-------------------------------------------------+
+| logical_plan  | Projection: Int64(3) AS Int64(1) + Int64(2)     |
+|               |   EmptyRelation                                 |
+| physical_plan | ProjectionExec: expr=[3 as Int64(1) + Int64(2)] |
+|               |   PlaceholderRowExec                            |
+|               |                                                 |
++---------------+-------------------------------------------------+
+```
+
+If the expression name is not preserved, bugs such as [#3704](https://github.com/apache/datafusion/issues/3704)
+and [#3555](https://github.com/apache/datafusion/issues/3555) occur where the expected columns can not be found.
+
+### Building Expression Names
+
+There are currently two ways to create a name for an expression in the logical plan.
+
+```rust
+impl Expr {
+    /// Returns the name of this expression as it should appear in a schema. This name
+    /// will not include any CAST expressions.
+    pub fn display_name(&self) -> Result<String> {
+        create_name(self)
+    }
+
+    /// Returns a full and complete string representation of this expression.
+    pub fn canonical_name(&self) -> String {
+        format!("{}", self)
+    }
+}
+```
+
+When comparing expressions to determine if they are equivalent, `canonical_name` should be used, and when creating a
+name to be used in a schema, `display_name` should be used.
+
+### Utilities
+
+There are a number of utility methods provided that take care of some common tasks.
+
+### ExprVisitor
+
+The `ExprVisitor` and `ExprVisitable` traits provide a mechanism for applying a visitor pattern to an expression tree.
+
+Here is an example that demonstrates this.
+
+```rust
+fn extract_subquery_filters(expression: &Expr, extracted: &mut Vec<Expr>) -> Result<()> {
+    struct InSubqueryVisitor<'a> {
+        accum: &'a mut Vec<Expr>,
+    }
+
+    impl ExpressionVisitor for InSubqueryVisitor<'_> {
+        fn pre_visit(self, expr: &Expr) -> Result<Recursion<Self>> {
+            if let Expr::InSubquery(_) = expr {
+                self.accum.push(expr.to_owned());
+            }
+            Ok(Recursion::Continue(self))
+        }
+    }
+
+    expression.accept(InSubqueryVisitor { accum: extracted })?;
+    Ok(())
+}
+```
+
+### Rewriting Expressions
+
+The `MyExprRewriter` trait can be implemented to provide a way to rewrite expressions. This rule can then be applied
+to an expression by calling `Expr::rewrite` (from the `ExprRewritable` trait).
+
+The `rewrite` method will perform a depth first walk of the expression and its children to rewrite an expression,
+consuming `self` producing a new expression.
+
+```rust
+let mut expr_rewriter = MyExprRewriter {};
+let expr = expr.rewrite(&mut expr_rewriter)?;
+```
+
+Here is an example implementation which will rewrite `expr BETWEEN a AND b` as `expr >= a AND expr <= b`. Note that the
+implementation does not need to perform any recursion since this is handled by the `rewrite` method.
+
+```rust
+struct MyExprRewriter {}
+
+impl ExprRewriter for MyExprRewriter {
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        match expr {
+            Expr::Between {
+                negated,
+                expr,
+                low,
+                high,
+            } => {
+                let expr: Expr = expr.as_ref().clone();
+                let low: Expr = low.as_ref().clone();
+                let high: Expr = high.as_ref().clone();
+                if negated {
+                    Ok(expr.clone().lt(low).or(expr.clone().gt(high)))
+                } else {
+                    Ok(expr.clone().gt_eq(low).and(expr.clone().lt_eq(high)))
+                }
+            }
+            _ => Ok(expr.clone()),
+        }
+    }
+}
+```
+
+### optimize_children
+
+Typically a rule is applied recursively to all operators within a query plan. Rather than duplicate
+that logic in each rule, an `optimize_children` method is provided. This recursively invokes the `optimize` method on
+the plan's children and then returns a node of the same type.
+
+```rust
+fn optimize(
+    &self,
+    plan: &LogicalPlan,
+    _config: &mut OptimizerConfig,
+) -> Result<LogicalPlan> {
+    // recurse down and optimize children first
+    let plan = utils::optimize_children(self, plan, _config)?;
+
+    ...
+}
+```
+
+### Writing Tests
+
+There should be unit tests in the same file as the new rule that test the effect of the rule being applied to a plan
+in isolation (without any other rule being applied).
+
+There should also be a test in `integration-tests.rs` that tests the rule as part of the overall optimization process.
+
+### Debugging
+
+The `EXPLAIN VERBOSE` command can be used to show the effect of each optimization rule on a query.
+
+In the following example, the `type_coercion` and `simplify_expressions` passes have simplified the plan so that it returns the constant `"3.2"` rather than doing a computation at execution time.
+
+```text
+> explain verbose select cast(1 + 2.2 as string) as foo;
++------------------------------------------------------------+---------------------------------------------------------------------------+
+| plan_type                                                  | plan                                                                      |
++------------------------------------------------------------+---------------------------------------------------------------------------+
+| initial_logical_plan                                       | Projection: CAST(Int64(1) + Float64(2.2) AS Utf8) AS foo                  |
+|                                                            |   EmptyRelation                                                           |
+| logical_plan after type_coercion                           | Projection: CAST(CAST(Int64(1) AS Float64) + Float64(2.2) AS Utf8) AS foo |
+|                                                            |   EmptyRelation                                                           |
+| logical_plan after simplify_expressions                    | Projection: Utf8("3.2") AS foo                                            |
+|                                                            |   EmptyRelation                                                           |
+| logical_plan after unwrap_cast_in_comparison               | SAME TEXT AS ABOVE                                                        |
+| logical_plan after decorrelate_where_exists                | SAME TEXT AS ABOVE                                                        |
+| logical_plan after decorrelate_where_in                    | SAME TEXT AS ABOVE                                                        |
+| logical_plan after scalar_subquery_to_join                 | SAME TEXT AS ABOVE                                                        |
+| logical_plan after subquery_filter_to_join                 | SAME TEXT AS ABOVE                                                        |
+| logical_plan after simplify_expressions                    | SAME TEXT AS ABOVE                                                        |
+| logical_plan after eliminate_filter                        | SAME TEXT AS ABOVE                                                        |
+| logical_plan after reduce_cross_join                       | SAME TEXT AS ABOVE                                                        |
+| logical_plan after common_sub_expression_eliminate         | SAME TEXT AS ABOVE                                                        |
+| logical_plan after eliminate_limit                         | SAME TEXT AS ABOVE                                                        |
+| logical_plan after projection_push_down                    | SAME TEXT AS ABOVE                                                        |
+| logical_plan after rewrite_disjunctive_predicate           | SAME TEXT AS ABOVE                                                        |
+| logical_plan after reduce_outer_join                       | SAME TEXT AS ABOVE                                                        |
+| logical_plan after filter_push_down                        | SAME TEXT AS ABOVE                                                        |
+| logical_plan after limit_push_down                         | SAME TEXT AS ABOVE                                                        |
+| logical_plan after single_distinct_aggregation_to_group_by | SAME TEXT AS ABOVE                                                        |
+| logical_plan                                               | Projection: Utf8("3.2") AS foo                                            |
+|                                                            |   EmptyRelation                                                           |
+| initial_physical_plan                                      | ProjectionExec: expr=[3.2 as foo]                                         |
+|                                                            |   PlaceholderRowExec                                                      |
+|                                                            |                                                                           |
+| physical_plan after aggregate_statistics                   | SAME TEXT AS ABOVE                                                        |
+| physical_plan after join_selection                         | SAME TEXT AS ABOVE                                                        |
+| physical_plan after coalesce_batches                       | SAME TEXT AS ABOVE                                                        |
+| physical_plan after repartition                            | SAME TEXT AS ABOVE                                                        |
+| physical_plan after add_merge_exec                         | SAME TEXT AS ABOVE                                                        |
+| physical_plan                                              | ProjectionExec: expr=[3.2 as foo]                                         |
+|                                                            |   PlaceholderRowExec                                                      |
+|                                                            |                                                                           |
++------------------------------------------------------------+---------------------------------------------------------------------------+
+```
+
+[df]: https://crates.io/crates/datafusion

From 47d5d1fe1ac7d2eb363d4b2b52268629e89b64f9 Mon Sep 17 00:00:00 2001
From: June <61218022+itsjunetime@users.noreply.github.com>
Date: Mon, 22 Jul 2024 06:32:12 -0600
Subject: [PATCH 107/357] feat: Error when a SHOW command is passed in with an
 accompanying non-existant variable (#11540)

* feat: Error when a SHOW command is passed in with an accompanying non-existant variable

* fix: Run fmt

* Switch to 'query error' instead of 'statement error' in sqllogictest test to see if that fixes CI

* Move some errors in sqllogictest to line above to maybe fix CI

* Fix (hopefully final) failing information_schema slt test due to multiline error message/placement
---
 datafusion/sql/src/statement.rs                  | 16 ++++++++++++++++
 .../test_files/information_schema.slt            | 11 ++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 6df25086305d2..8eb4113f80a6c 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1146,6 +1146,22 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             // we could introduce alias in OptionDefinition if this string matching thing grows
             format!("{base_query} WHERE name = 'datafusion.execution.time_zone'")
         } else {
+            // These values are what are used to make the information_schema table, so we just
+            // check here, before actually planning or executing the query, if it would produce no
+            // results, and error preemptively if it would (for a better UX)
+            let is_valid_variable = self
+                .context_provider
+                .options()
+                .entries()
+                .iter()
+                .any(|opt| opt.key == variable);
+
+            if !is_valid_variable {
+                return plan_err!(
+                    "'{variable}' is not a variable which can be viewed with 'SHOW'"
+                );
+            }
+
             format!("{base_query} WHERE name = '{variable}'")
         };
 
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index c8c0d1d45b974..1c6ffd44b1efc 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -370,9 +370,12 @@ datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g.
 
 
 # show empty verbose
-query TTT
+statement error DataFusion error: Error during planning: '' is not a variable which can be viewed with 'SHOW'
 SHOW VERBOSE
-----
+
+# show nonsense verbose
+statement error DataFusion error: Error during planning: 'nonsense' is not a variable which can be viewed with 'SHOW'
+SHOW NONSENSE VERBOSE
 
 # information_schema_describe_table
 
@@ -508,9 +511,7 @@ SHOW columns from datafusion.public.t2
 
 
 # show_non_existing_variable
-# FIXME
-# currently we cannot know whether a variable exists, this will output 0 row instead
-statement ok
+statement error DataFusion error: Error during planning: 'something_unknown' is not a variable which can be viewed with 'SHOW'
 SHOW SOMETHING_UNKNOWN;
 
 statement ok

From 5c65efc79954ce495328d63fd0445e982a7319a9 Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Mon, 22 Jul 2024 20:35:42 +0800
Subject: [PATCH 108/357] fix: CASE with NULL (#11542)

* fix: CASE with NULL

* chore: Add tests

* chore

* chore: Fix CI

* chore: Support all types are NULL

* chore: Fix CI

* chore: add more tests

* fix: Return first non-null type in then exprs

* chore: Fix CI

* Update datafusion/expr/src/expr_schema.rs

Co-authored-by: Jonah Gao <jonahgao@msn.com>

* Update datafusion/expr/src/expr_schema.rs

Co-authored-by: Jonah Gao <jonahgao@msn.com>

---------

Co-authored-by: Jonah Gao <jonahgao@msn.com>
---
 datafusion/expr/src/expr_schema.rs            | 12 +++++++-
 .../sqllogictest/test_files/aggregate.slt     | 28 +++++++++++++++++++
 datafusion/sqllogictest/test_files/scalar.slt |  8 +++---
 datafusion/sqllogictest/test_files/select.slt | 27 ++++++++++++++++++
 4 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 1df5d6c4d7363..5e0571f712ee5 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -112,7 +112,17 @@ impl ExprSchemable for Expr {
             Expr::OuterReferenceColumn(ty, _) => Ok(ty.clone()),
             Expr::ScalarVariable(ty, _) => Ok(ty.clone()),
             Expr::Literal(l) => Ok(l.data_type()),
-            Expr::Case(case) => case.when_then_expr[0].1.get_type(schema),
+            Expr::Case(case) => {
+                for (_, then_expr) in &case.when_then_expr {
+                    let then_type = then_expr.get_type(schema)?;
+                    if !then_type.is_null() {
+                        return Ok(then_type);
+                    }
+                }
+                case.else_expr
+                    .as_ref()
+                    .map_or(Ok(DataType::Null), |e| e.get_type(schema))
+            }
             Expr::Cast(Cast { data_type, .. })
             | Expr::TryCast(TryCast { data_type, .. }) => Ok(data_type.clone()),
             Expr::Unnest(Unnest { expr }) => {
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index d0f7f2d9ac7aa..bb5ce1150a58b 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -5418,6 +5418,34 @@ SELECT LAST_VALUE(column1 ORDER BY column2 DESC) IGNORE NULLS FROM t;
 statement ok
 DROP TABLE t;
 
+# Test for CASE with NULL in aggregate function
+statement ok
+CREATE TABLE example(data double precision);
+
+statement ok
+INSERT INTO example VALUES (1), (2), (NULL), (4);
+
+query RR
+SELECT
+    sum(CASE WHEN data is NULL THEN NULL ELSE data+1 END) as then_null,
+    sum(CASE WHEN data is NULL THEN data+1 ELSE NULL END) as else_null
+FROM example;
+----
+10 NULL
+
+query R
+SELECT
+   CASE data WHEN 1 THEN NULL WHEN 2 THEN 3.3 ELSE NULL END as case_null
+FROM example;
+----
+NULL
+3.3
+NULL
+NULL
+
+statement ok
+drop table example;
+
 # Test Convert FirstLast optimizer rule
 statement ok
 CREATE EXTERNAL TABLE convert_first_last_table (
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index 48f94fc080a4f..ff9afa94f40af 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1238,27 +1238,27 @@ SELECT CASE WHEN NULL THEN 'foo' ELSE 'bar' END
 bar
 
 # case_expr_with_null()
-query ?
+query I
 select case when b is null then null else b end from (select a,b from (values (1,null),(2,3)) as t (a,b)) a;
 ----
 NULL
 3
 
-query ?
+query I
 select case when b is null then null else b end from (select a,b from (values (1,1),(2,3)) as t (a,b)) a;
 ----
 1
 3
 
 # case_expr_with_nulls()
-query ?
+query I
 select case when b is null then null when b < 3 then null when b >=3 then b + 1 else b end from (select a,b from (values (1,null),(1,2),(2,3)) as t (a,b)) a
 ----
 NULL
 NULL
 4
 
-query ?
+query I
 select case b when 1 then null when 2 then null when 3 then b + 1 else b end from (select a,b from (values (1,null),(1,2),(2,3)) as t (a,b)) a;
 ----
 NULL
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index 03426dec874f3..6884efc07e159 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -613,6 +613,33 @@ END;
 ----
 2
 
+# select case when type is null
+query I
+select CASE
+  WHEN NULL THEN 1
+  ELSE 2
+END;
+----
+2
+
+# select case then type is null
+query I
+select CASE
+  WHEN 10 > 5 THEN NULL
+  ELSE 2
+END;
+----
+NULL
+
+# select case else type is null
+query I
+select CASE
+  WHEN 10 = 5 THEN 1
+  ELSE NULL
+END;
+----
+NULL
+
 # Binary Expression for LargeUtf8
 # issue: https://github.com/apache/datafusion/issues/5893
 statement ok

From 51da92fb9fe1b1bc2344fa78be52c448b36880d9 Mon Sep 17 00:00:00 2001
From: Jax Liu <liugs963@gmail.com>
Date: Mon, 22 Jul 2024 20:36:58 +0800
Subject: [PATCH 109/357] Provide DataFrame API for `map` and move `map` to
 `functions-array` (#11560)

* move map to `functions-array` and implement dataframe api

* add benchmark for dataframe api

* fix format

* add roundtrip_expr_api test
---
 datafusion/core/Cargo.toml                    |  5 +
 datafusion/core/benches/map_query_sql.rs      | 93 +++++++++++++++++++
 .../tests/dataframe/dataframe_functions.rs    | 22 +++++
 datafusion/functions-array/benches/map.rs     | 37 +++++++-
 datafusion/functions-array/src/lib.rs         |  3 +
 .../src/core => functions-array/src}/map.rs   | 35 ++++---
 datafusion/functions-array/src/planner.rs     |  6 +-
 datafusion/functions/Cargo.toml               |  5 -
 datafusion/functions/benches/map.rs           | 80 ----------------
 datafusion/functions/src/core/mod.rs          |  7 --
 .../tests/cases/roundtrip_logical_plan.rs     |  5 +
 11 files changed, 189 insertions(+), 109 deletions(-)
 create mode 100644 datafusion/core/benches/map_query_sql.rs
 rename datafusion/{functions/src/core => functions-array/src}/map.rs (83%)
 delete mode 100644 datafusion/functions/benches/map.rs

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index c937a6f6e59a9..4301396b231fe 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -217,3 +217,8 @@ name = "topk_aggregate"
 [[bench]]
 harness = false
 name = "parquet_statistic"
+
+[[bench]]
+harness = false
+name = "map_query_sql"
+required-features = ["array_expressions"]
diff --git a/datafusion/core/benches/map_query_sql.rs b/datafusion/core/benches/map_query_sql.rs
new file mode 100644
index 0000000000000..b6ac8b6b647a1
--- /dev/null
+++ b/datafusion/core/benches/map_query_sql.rs
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::{ArrayRef, Int32Array, RecordBatch};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use parking_lot::Mutex;
+use rand::prelude::ThreadRng;
+use rand::Rng;
+use tokio::runtime::Runtime;
+
+use datafusion::prelude::SessionContext;
+use datafusion_common::ScalarValue;
+use datafusion_expr::Expr;
+use datafusion_functions_array::map::map;
+
+mod data_utils;
+
+fn build_keys(rng: &mut ThreadRng) -> Vec<String> {
+    let mut keys = vec![];
+    for _ in 0..1000 {
+        keys.push(rng.gen_range(0..9999).to_string());
+    }
+    keys
+}
+
+fn build_values(rng: &mut ThreadRng) -> Vec<i32> {
+    let mut values = vec![];
+    for _ in 0..1000 {
+        values.push(rng.gen_range(0..9999));
+    }
+    values
+}
+
+fn t_batch(num: i32) -> RecordBatch {
+    let value: Vec<i32> = (0..num).collect();
+    let c1: ArrayRef = Arc::new(Int32Array::from(value));
+    RecordBatch::try_from_iter(vec![("c1", c1)]).unwrap()
+}
+
+fn create_context(num: i32) -> datafusion_common::Result<Arc<Mutex<SessionContext>>> {
+    let ctx = SessionContext::new();
+    ctx.register_batch("t", t_batch(num))?;
+    Ok(Arc::new(Mutex::new(ctx)))
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let ctx = create_context(1).unwrap();
+    let rt = Runtime::new().unwrap();
+    let df = rt.block_on(ctx.lock().table("t")).unwrap();
+
+    let mut rng = rand::thread_rng();
+    let keys = build_keys(&mut rng);
+    let values = build_values(&mut rng);
+    let mut key_buffer = Vec::new();
+    let mut value_buffer = Vec::new();
+
+    for i in 0..1000 {
+        key_buffer.push(Expr::Literal(ScalarValue::Utf8(Some(keys[i].clone()))));
+        value_buffer.push(Expr::Literal(ScalarValue::Int32(Some(values[i]))));
+    }
+    c.bench_function("map_1000_1", |b| {
+        b.iter(|| {
+            black_box(
+                rt.block_on(
+                    df.clone()
+                        .select(vec![map(key_buffer.clone(), value_buffer.clone())])
+                        .unwrap()
+                        .collect(),
+                )
+                .unwrap(),
+            );
+        });
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index 1c55c48fea40d..f7b02196d8ed5 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -34,6 +34,7 @@ use datafusion_common::{DFSchema, ScalarValue};
 use datafusion_expr::expr::Alias;
 use datafusion_expr::ExprSchemable;
 use datafusion_functions_aggregate::expr_fn::{approx_median, approx_percentile_cont};
+use datafusion_functions_array::map::map;
 
 fn test_schema() -> SchemaRef {
     Arc::new(Schema::new(vec![
@@ -1087,3 +1088,24 @@ async fn test_fn_array_to_string() -> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test]
+async fn test_fn_map() -> Result<()> {
+    let expr = map(
+        vec![lit("a"), lit("b"), lit("c")],
+        vec![lit(1), lit(2), lit(3)],
+    );
+    let expected = [
+        "+---------------------------------------------------------------------------------------+",
+        "| map(make_array(Utf8(\"a\"),Utf8(\"b\"),Utf8(\"c\")),make_array(Int32(1),Int32(2),Int32(3))) |",
+        "+---------------------------------------------------------------------------------------+",
+        "| {a: 1, b: 2, c: 3}                                                                    |",
+        "| {a: 1, b: 2, c: 3}                                                                    |",
+        "| {a: 1, b: 2, c: 3}                                                                    |",
+        "| {a: 1, b: 2, c: 3}                                                                    |",
+        "+---------------------------------------------------------------------------------------+",
+    ];
+    assert_fn_batches!(expr, expected);
+
+    Ok(())
+}
diff --git a/datafusion/functions-array/benches/map.rs b/datafusion/functions-array/benches/map.rs
index 2e9b45266abc6..c2e0e641e80d2 100644
--- a/datafusion/functions-array/benches/map.rs
+++ b/datafusion/functions-array/benches/map.rs
@@ -17,13 +17,18 @@
 
 extern crate criterion;
 
+use arrow_array::{Int32Array, ListArray, StringArray};
+use arrow_buffer::{OffsetBuffer, ScalarBuffer};
+use arrow_schema::{DataType, Field};
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use rand::prelude::ThreadRng;
 use rand::Rng;
+use std::sync::Arc;
 
 use datafusion_common::ScalarValue;
 use datafusion_expr::planner::ExprPlanner;
-use datafusion_expr::Expr;
+use datafusion_expr::{ColumnarValue, Expr};
+use datafusion_functions_array::map::map_udf;
 use datafusion_functions_array::planner::ArrayFunctionPlanner;
 
 fn keys(rng: &mut ThreadRng) -> Vec<String> {
@@ -63,6 +68,36 @@ fn criterion_benchmark(c: &mut Criterion) {
             );
         });
     });
+
+    c.bench_function("map_1000", |b| {
+        let mut rng = rand::thread_rng();
+        let field = Arc::new(Field::new("item", DataType::Utf8, true));
+        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 1000]));
+        let key_list = ListArray::new(
+            field,
+            offsets,
+            Arc::new(StringArray::from(keys(&mut rng))),
+            None,
+        );
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 1000]));
+        let value_list = ListArray::new(
+            field,
+            offsets,
+            Arc::new(Int32Array::from(values(&mut rng))),
+            None,
+        );
+        let keys = ColumnarValue::Scalar(ScalarValue::List(Arc::new(key_list)));
+        let values = ColumnarValue::Scalar(ScalarValue::List(Arc::new(value_list)));
+
+        b.iter(|| {
+            black_box(
+                map_udf()
+                    .invoke(&[keys.clone(), values.clone()])
+                    .expect("map should work on valid values"),
+            );
+        });
+    });
 }
 
 criterion_group!(benches, criterion_benchmark);
diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs
index 9717d29883fd5..f68f59dcd6a12 100644
--- a/datafusion/functions-array/src/lib.rs
+++ b/datafusion/functions-array/src/lib.rs
@@ -41,6 +41,7 @@ pub mod extract;
 pub mod flatten;
 pub mod length;
 pub mod make_array;
+pub mod map;
 pub mod planner;
 pub mod position;
 pub mod range;
@@ -53,6 +54,7 @@ pub mod set_ops;
 pub mod sort;
 pub mod string;
 pub mod utils;
+
 use datafusion_common::Result;
 use datafusion_execution::FunctionRegistry;
 use datafusion_expr::ScalarUDF;
@@ -140,6 +142,7 @@ pub fn all_default_array_functions() -> Vec<Arc<ScalarUDF>> {
         replace::array_replace_n_udf(),
         replace::array_replace_all_udf(),
         replace::array_replace_udf(),
+        map::map_udf(),
     ]
 }
 
diff --git a/datafusion/functions/src/core/map.rs b/datafusion/functions-array/src/map.rs
similarity index 83%
rename from datafusion/functions/src/core/map.rs
rename to datafusion/functions-array/src/map.rs
index 2deef242f8a02..e218b501dcf16 100644
--- a/datafusion/functions/src/core/map.rs
+++ b/datafusion/functions-array/src/map.rs
@@ -15,17 +15,26 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::make_array::make_array;
+use arrow::array::ArrayData;
+use arrow_array::{Array, ArrayRef, MapArray, StructArray};
+use arrow_buffer::{Buffer, ToByteSlice};
+use arrow_schema::{DataType, Field, SchemaBuilder};
+use datafusion_common::{exec_err, ScalarValue};
+use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 use std::collections::VecDeque;
 use std::sync::Arc;
 
-use arrow::array::{Array, ArrayData, ArrayRef, MapArray, StructArray};
-use arrow::datatypes::{DataType, Field, SchemaBuilder};
-use arrow_buffer::{Buffer, ToByteSlice};
+/// Returns a map created from a key list and a value list
+pub fn map(keys: Vec<Expr>, values: Vec<Expr>) -> Expr {
+    let keys = make_array(keys);
+    let values = make_array(values);
+    Expr::ScalarFunction(ScalarFunction::new_udf(map_udf(), vec![keys, values]))
+}
 
-use datafusion_common::Result;
-use datafusion_common::{exec_err, ScalarValue};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+create_func!(MapFunc, map_udf);
 
 /// Check if we can evaluate the expr to constant directly.
 ///
@@ -39,7 +48,7 @@ fn can_evaluate_to_const(args: &[ColumnarValue]) -> bool {
         .all(|arg| matches!(arg, ColumnarValue::Scalar(_)))
 }
 
-fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+fn make_map_batch(args: &[ColumnarValue]) -> datafusion_common::Result<ColumnarValue> {
     if args.len() != 2 {
         return exec_err!(
             "make_map requires exactly 2 arguments, got {} instead",
@@ -54,7 +63,9 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     make_map_batch_internal(key, value, can_evaluate_to_const)
 }
 
-fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result<ArrayRef> {
+fn get_first_array_ref(
+    columnar_value: &ColumnarValue,
+) -> datafusion_common::Result<ArrayRef> {
     match columnar_value {
         ColumnarValue::Scalar(value) => match value {
             ScalarValue::List(array) => Ok(array.value(0)),
@@ -70,7 +81,7 @@ fn make_map_batch_internal(
     keys: ArrayRef,
     values: ArrayRef,
     can_evaluate_to_const: bool,
-) -> Result<ColumnarValue> {
+) -> datafusion_common::Result<ColumnarValue> {
     if keys.null_count() > 0 {
         return exec_err!("map key cannot be null");
     }
@@ -150,7 +161,7 @@ impl ScalarUDFImpl for MapFunc {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+    fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
         if arg_types.len() % 2 != 0 {
             return exec_err!(
                 "map requires an even number of arguments, got {} instead",
@@ -175,12 +186,12 @@ impl ScalarUDFImpl for MapFunc {
         ))
     }
 
-    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result<ColumnarValue> {
         make_map_batch(args)
     }
 }
 
-fn get_element_type(data_type: &DataType) -> Result<&DataType> {
+fn get_element_type(data_type: &DataType) -> datafusion_common::Result<&DataType> {
     match data_type {
         DataType::List(element) => Ok(element.data_type()),
         DataType::LargeList(element) => Ok(element.data_type()),
diff --git a/datafusion/functions-array/src/planner.rs b/datafusion/functions-array/src/planner.rs
index fbb541d9b151e..c63c2c83e66e8 100644
--- a/datafusion/functions-array/src/planner.rs
+++ b/datafusion/functions-array/src/planner.rs
@@ -27,6 +27,7 @@ use datafusion_expr::{
 use datafusion_functions::expr_fn::get_field;
 use datafusion_functions_aggregate::nth_value::nth_value_udaf;
 
+use crate::map::map_udf;
 use crate::{
     array_has::array_has_all,
     expr_fn::{array_append, array_concat, array_prepend},
@@ -111,10 +112,7 @@ impl ExprPlanner for ArrayFunctionPlanner {
         let values = make_array(values.into_iter().map(|(_, e)| e).collect());
 
         Ok(PlannerResult::Planned(Expr::ScalarFunction(
-            ScalarFunction::new_udf(
-                datafusion_functions::core::map(),
-                vec![keys, values],
-            ),
+            ScalarFunction::new_udf(map_udf(), vec![keys, values]),
         )))
     }
 }
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index b143080b19626..0281676cabf2d 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -141,8 +141,3 @@ required-features = ["string_expressions"]
 harness = false
 name = "upper"
 required-features = ["string_expressions"]
-
-[[bench]]
-harness = false
-name = "map"
-required-features = ["core_expressions"]
diff --git a/datafusion/functions/benches/map.rs b/datafusion/functions/benches/map.rs
deleted file mode 100644
index 811c21a41b46d..0000000000000
--- a/datafusion/functions/benches/map.rs
+++ /dev/null
@@ -1,80 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate criterion;
-
-use arrow::array::{Int32Array, ListArray, StringArray};
-use arrow::datatypes::{DataType, Field};
-use arrow_buffer::{OffsetBuffer, ScalarBuffer};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use datafusion_common::ScalarValue;
-use datafusion_expr::ColumnarValue;
-use datafusion_functions::core::map;
-use rand::prelude::ThreadRng;
-use rand::Rng;
-use std::sync::Arc;
-
-fn keys(rng: &mut ThreadRng) -> Vec<String> {
-    let mut keys = vec![];
-    for _ in 0..1000 {
-        keys.push(rng.gen_range(0..9999).to_string());
-    }
-    keys
-}
-
-fn values(rng: &mut ThreadRng) -> Vec<i32> {
-    let mut values = vec![];
-    for _ in 0..1000 {
-        values.push(rng.gen_range(0..9999));
-    }
-    values
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("map_1000", |b| {
-        let mut rng = rand::thread_rng();
-        let field = Arc::new(Field::new("item", DataType::Utf8, true));
-        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 1000]));
-        let key_list = ListArray::new(
-            field,
-            offsets,
-            Arc::new(StringArray::from(keys(&mut rng))),
-            None,
-        );
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
-        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 1000]));
-        let value_list = ListArray::new(
-            field,
-            offsets,
-            Arc::new(Int32Array::from(values(&mut rng))),
-            None,
-        );
-        let keys = ColumnarValue::Scalar(ScalarValue::List(Arc::new(key_list)));
-        let values = ColumnarValue::Scalar(ScalarValue::List(Arc::new(value_list)));
-
-        b.iter(|| {
-            black_box(
-                map()
-                    .invoke(&[keys.clone(), values.clone()])
-                    .expect("map should work on valid values"),
-            );
-        });
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index ee0309e593820..8c51213972843 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -25,7 +25,6 @@ pub mod arrowtypeof;
 pub mod coalesce;
 pub mod expr_ext;
 pub mod getfield;
-pub mod map;
 pub mod named_struct;
 pub mod nullif;
 pub mod nvl;
@@ -43,7 +42,6 @@ make_udf_function!(r#struct::StructFunc, STRUCT, r#struct);
 make_udf_function!(named_struct::NamedStructFunc, NAMED_STRUCT, named_struct);
 make_udf_function!(getfield::GetFieldFunc, GET_FIELD, get_field);
 make_udf_function!(coalesce::CoalesceFunc, COALESCE, coalesce);
-make_udf_function!(map::MapFunc, MAP, map);
 
 pub mod expr_fn {
     use datafusion_expr::{Expr, Literal};
@@ -80,10 +78,6 @@ pub mod expr_fn {
         coalesce,
         "Returns `coalesce(args...)`, which evaluates to the value of the first expr which is not NULL",
         args,
-    ),(
-        map,
-        "Returns a map created from a key list and a value list",
-        args,
     ));
 
     #[doc = "Returns the value of the field with the given name from the struct"]
@@ -101,6 +95,5 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         arrow_typeof(),
         named_struct(),
         coalesce(),
-        map(),
     ]
 }
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 11945f39589a7..3476d5d042cc8 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -44,6 +44,7 @@ use datafusion::functions_aggregate::expr_fn::{
     count_distinct, covar_pop, covar_samp, first_value, grouping, median, stddev,
     stddev_pop, sum, var_pop, var_sample,
 };
+use datafusion::functions_array::map::map;
 use datafusion::prelude::*;
 use datafusion::test_util::{TestTableFactory, TestTableProvider};
 use datafusion_common::config::TableOptions;
@@ -704,6 +705,10 @@ async fn roundtrip_expr_api() -> Result<()> {
         bool_or(lit(true)),
         array_agg(lit(1)),
         array_agg(lit(1)).distinct().build().unwrap(),
+        map(
+            vec![lit(1), lit(2), lit(3)],
+            vec![lit(10), lit(20), lit(30)],
+        ),
     ];
 
     // ensure expressions created with the expr api can be round tripped

From 81d06f2e103385fe744fb909563d4fb4c4b13d49 Mon Sep 17 00:00:00 2001
From: Xin Li <33629085+xinlifoobar@users.noreply.github.com>
Date: Mon, 22 Jul 2024 05:37:55 -0700
Subject: [PATCH 110/357] Move OutputRequirements to
 datafusion-physical-optimizer crate (#11579)

* Move OutputRequirements to datafusion-physical-optimizer crate

* Fix fmt

* Fix cargo for cli
---
 datafusion-cli/Cargo.lock                     | 10 ++++----
 .../enforce_distribution.rs                   |  4 ++--
 datafusion/core/src/physical_optimizer/mod.rs |  1 -
 datafusion/physical-optimizer/Cargo.toml      |  2 ++
 datafusion/physical-optimizer/src/lib.rs      |  1 +
 .../src}/output_requirements.rs               | 24 +++++++++++--------
 6 files changed, 25 insertions(+), 17 deletions(-)
 rename datafusion/{core/src/physical_optimizer => physical-optimizer/src}/output_requirements.rs (94%)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 61d9c72b89d99..84bff8c87190a 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -118,9 +118,9 @@ dependencies = [
 
 [[package]]
 name = "arrayref"
-version = "0.3.7"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
+checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a"
 
 [[package]]
 name = "arrayvec"
@@ -875,9 +875,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.5"
+version = "1.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
+checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f"
 dependencies = [
  "jobserver",
  "libc",
@@ -1397,6 +1397,8 @@ name = "datafusion-physical-optimizer"
 version = "40.0.0"
 dependencies = [
  "datafusion-common",
+ "datafusion-execution",
+ "datafusion-physical-expr",
  "datafusion-physical-plan",
 ]
 
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 9791f23f963e0..62ac9089e2b4d 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -24,7 +24,6 @@
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use super::output_requirements::OutputRequirementExec;
 use crate::config::ConfigOptions;
 use crate::error::Result;
 use crate::physical_optimizer::utils::{
@@ -55,6 +54,7 @@ use datafusion_physical_expr::{
 use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec};
 use datafusion_physical_plan::ExecutionPlanProperties;
 
+use datafusion_physical_optimizer::output_requirements::OutputRequirementExec;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use itertools::izip;
 
@@ -1290,7 +1290,6 @@ pub(crate) mod tests {
     use crate::datasource::object_store::ObjectStoreUrl;
     use crate::datasource::physical_plan::{CsvExec, FileScanConfig, ParquetExec};
     use crate::physical_optimizer::enforce_sorting::EnforceSorting;
-    use crate::physical_optimizer::output_requirements::OutputRequirements;
     use crate::physical_optimizer::test_utils::{
         check_integrity, coalesce_partitions_exec, repartition_exec,
     };
@@ -1301,6 +1300,7 @@ pub(crate) mod tests {
     use crate::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
     use crate::physical_plan::sorts::sort::SortExec;
     use crate::physical_plan::{displayable, DisplayAs, DisplayFormatType, Statistics};
+    use datafusion_physical_optimizer::output_requirements::OutputRequirements;
 
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
     use datafusion_common::ScalarValue;
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index 582f340151ae5..a0c9c36977442 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -29,7 +29,6 @@ pub mod enforce_sorting;
 pub mod join_selection;
 pub mod limited_distinct_aggregation;
 pub mod optimizer;
-pub mod output_requirements;
 pub mod projection_pushdown;
 pub mod pruning;
 pub mod replace_with_order_preserving_variants;
diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml
index 9c0ee61da52a0..125ea6acc77fd 100644
--- a/datafusion/physical-optimizer/Cargo.toml
+++ b/datafusion/physical-optimizer/Cargo.toml
@@ -33,4 +33,6 @@ workspace = true
 
 [dependencies]
 datafusion-common = { workspace = true, default-features = true }
+datafusion-execution = { workspace = true }
+datafusion-physical-expr = { workspace = true }
 datafusion-physical-plan = { workspace = true }
diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs
index c5a49216f5fdc..6b9df7cad5c8b 100644
--- a/datafusion/physical-optimizer/src/lib.rs
+++ b/datafusion/physical-optimizer/src/lib.rs
@@ -18,5 +18,6 @@
 #![deny(clippy::clone_on_ref_ptr)]
 
 mod optimizer;
+pub mod output_requirements;
 
 pub use optimizer::PhysicalOptimizerRule;
diff --git a/datafusion/core/src/physical_optimizer/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs
similarity index 94%
rename from datafusion/core/src/physical_optimizer/output_requirements.rs
rename to datafusion/physical-optimizer/src/output_requirements.rs
index cb9a0cb90e6c7..f971d8f1f0aaf 100644
--- a/datafusion/core/src/physical_optimizer/output_requirements.rs
+++ b/datafusion/physical-optimizer/src/output_requirements.rs
@@ -24,17 +24,21 @@
 
 use std::sync::Arc;
 
-use crate::physical_plan::sorts::sort::SortExec;
-use crate::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
+use datafusion_execution::TaskContext;
+use datafusion_physical_plan::sorts::sort::SortExec;
+use datafusion_physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionPlan, SendableRecordBatchStream,
+};
 
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{Result, Statistics};
 use datafusion_physical_expr::{Distribution, LexRequirement, PhysicalSortRequirement};
-use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::{ExecutionPlanProperties, PlanProperties};
 
+use crate::PhysicalOptimizerRule;
+
 /// This rule either adds or removes [`OutputRequirements`]s to/from the physical
 /// plan according to its `mode` attribute, which is set by the constructors
 /// `new_add_mode` and `new_remove_mode`. With this rule, we can keep track of
@@ -86,7 +90,7 @@ enum RuleMode {
 ///
 /// See [`OutputRequirements`] for more details
 #[derive(Debug)]
-pub(crate) struct OutputRequirementExec {
+pub struct OutputRequirementExec {
     input: Arc<dyn ExecutionPlan>,
     order_requirement: Option<LexRequirement>,
     dist_requirement: Distribution,
@@ -94,7 +98,7 @@ pub(crate) struct OutputRequirementExec {
 }
 
 impl OutputRequirementExec {
-    pub(crate) fn new(
+    pub fn new(
         input: Arc<dyn ExecutionPlan>,
         requirements: Option<LexRequirement>,
         dist_requirement: Distribution,
@@ -108,8 +112,8 @@ impl OutputRequirementExec {
         }
     }
 
-    pub(crate) fn input(&self) -> Arc<dyn ExecutionPlan> {
-        self.input.clone()
+    pub fn input(&self) -> Arc<dyn ExecutionPlan> {
+        Arc::clone(&self.input)
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
@@ -179,8 +183,8 @@ impl ExecutionPlan for OutputRequirementExec {
     fn execute(
         &self,
         _partition: usize,
-        _context: Arc<crate::execution::context::TaskContext>,
-    ) -> Result<crate::physical_plan::SendableRecordBatchStream> {
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
         unreachable!();
     }
 
@@ -275,7 +279,7 @@ fn require_top_ordering_helper(
         // When an operator requires an ordering, any `SortExec` below can not
         // be responsible for (i.e. the originator of) the global ordering.
         let (new_child, is_changed) =
-            require_top_ordering_helper(children.swap_remove(0).clone())?;
+            require_top_ordering_helper(Arc::clone(children.swap_remove(0)))?;
         Ok((plan.with_new_children(vec![new_child])?, is_changed))
     } else {
         // Stop searching, there is no global ordering desired for the query.

From 4417a9404f99eeb662d887cbb12de3445eb9cd2a Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Mon, 22 Jul 2024 22:23:26 +0800
Subject: [PATCH 111/357] Minor: move `Column` related tests and rename
 `column.rs` (#11573)

* Minor: move `Column` related tests

* Rename column.rs to unknown_column.rs
---
 .../src/expressions/column.rs                 | 46 ++++++++++++++++++
 .../physical-expr/src/expressions/mod.rs      |  4 +-
 .../{column.rs => unknown_column.rs}          | 48 +------------------
 3 files changed, 49 insertions(+), 49 deletions(-)
 rename datafusion/physical-expr/src/expressions/{column.rs => unknown_column.rs} (56%)

diff --git a/datafusion/physical-expr-common/src/expressions/column.rs b/datafusion/physical-expr-common/src/expressions/column.rs
index d972d35b9e4e7..5397599ea2dcc 100644
--- a/datafusion/physical-expr-common/src/expressions/column.rs
+++ b/datafusion/physical-expr-common/src/expressions/column.rs
@@ -135,3 +135,49 @@ impl Column {
 pub fn col(name: &str, schema: &Schema) -> Result<Arc<dyn PhysicalExpr>> {
     Ok(Arc::new(Column::new_with_schema(name, schema)?))
 }
+
+#[cfg(test)]
+mod test {
+    use super::Column;
+    use crate::physical_expr::PhysicalExpr;
+
+    use arrow::array::StringArray;
+    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow::record_batch::RecordBatch;
+    use datafusion_common::Result;
+
+    use std::sync::Arc;
+
+    #[test]
+    fn out_of_bounds_data_type() {
+        let schema = Schema::new(vec![Field::new("foo", DataType::Utf8, true)]);
+        let col = Column::new("id", 9);
+        let error = col.data_type(&schema).expect_err("error").strip_backtrace();
+        assert!("Internal error: PhysicalExpr Column references column 'id' at index 9 (zero-based) \
+            but input schema only has 1 columns: [\"foo\"].\nThis was likely caused by a bug in \
+            DataFusion's code and we would welcome that you file an bug report in our issue tracker".starts_with(&error))
+    }
+
+    #[test]
+    fn out_of_bounds_nullable() {
+        let schema = Schema::new(vec![Field::new("foo", DataType::Utf8, true)]);
+        let col = Column::new("id", 9);
+        let error = col.nullable(&schema).expect_err("error").strip_backtrace();
+        assert!("Internal error: PhysicalExpr Column references column 'id' at index 9 (zero-based) \
+            but input schema only has 1 columns: [\"foo\"].\nThis was likely caused by a bug in \
+            DataFusion's code and we would welcome that you file an bug report in our issue tracker".starts_with(&error))
+    }
+
+    #[test]
+    fn out_of_bounds_evaluate() -> Result<()> {
+        let schema = Schema::new(vec![Field::new("foo", DataType::Utf8, true)]);
+        let data: StringArray = vec!["data"].into();
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data)])?;
+        let col = Column::new("id", 9);
+        let error = col.evaluate(&batch).expect_err("error").strip_backtrace();
+        assert!("Internal error: PhysicalExpr Column references column 'id' at index 9 (zero-based) \
+            but input schema only has 1 columns: [\"foo\"].\nThis was likely caused by a bug in \
+            DataFusion's code and we would welcome that you file an bug report in our issue tracker".starts_with(&error));
+        Ok(())
+    }
+}
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index fa80bc9873f04..5a2bcb63b18e6 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -20,7 +20,6 @@
 #[macro_use]
 mod binary;
 mod case;
-mod column;
 mod in_list;
 mod is_not_null;
 mod is_null;
@@ -29,6 +28,7 @@ mod negative;
 mod no_op;
 mod not;
 mod try_cast;
+mod unknown_column;
 
 /// Module with some convenient methods used in expression building
 pub mod helpers {
@@ -48,7 +48,6 @@ pub use crate::PhysicalSortExpr;
 
 pub use binary::{binary, BinaryExpr};
 pub use case::{case, CaseExpr};
-pub use column::UnKnownColumn;
 pub use datafusion_expr::utils::format_state_name;
 pub use datafusion_physical_expr_common::expressions::column::{col, Column};
 pub use datafusion_physical_expr_common::expressions::literal::{lit, Literal};
@@ -61,3 +60,4 @@ pub use negative::{negative, NegativeExpr};
 pub use no_op::NoOp;
 pub use not::{not, NotExpr};
 pub use try_cast::{try_cast, TryCastExpr};
+pub use unknown_column::UnKnownColumn;
diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/unknown_column.rs
similarity index 56%
rename from datafusion/physical-expr/src/expressions/column.rs
rename to datafusion/physical-expr/src/expressions/unknown_column.rs
index ab43201ceb75b..cb7221e7fa151 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/unknown_column.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Column expression
+//! UnKnownColumn expression
 
 use std::any::Any;
 use std::hash::{Hash, Hasher};
@@ -100,49 +100,3 @@ impl PartialEq<dyn Any> for UnKnownColumn {
         false
     }
 }
-
-#[cfg(test)]
-mod test {
-    use crate::expressions::Column;
-    use crate::PhysicalExpr;
-
-    use arrow::array::StringArray;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow::record_batch::RecordBatch;
-    use datafusion_common::Result;
-
-    use std::sync::Arc;
-
-    #[test]
-    fn out_of_bounds_data_type() {
-        let schema = Schema::new(vec![Field::new("foo", DataType::Utf8, true)]);
-        let col = Column::new("id", 9);
-        let error = col.data_type(&schema).expect_err("error").strip_backtrace();
-        assert!("Internal error: PhysicalExpr Column references column 'id' at index 9 (zero-based) \
-            but input schema only has 1 columns: [\"foo\"].\nThis was likely caused by a bug in \
-            DataFusion's code and we would welcome that you file an bug report in our issue tracker".starts_with(&error))
-    }
-
-    #[test]
-    fn out_of_bounds_nullable() {
-        let schema = Schema::new(vec![Field::new("foo", DataType::Utf8, true)]);
-        let col = Column::new("id", 9);
-        let error = col.nullable(&schema).expect_err("error").strip_backtrace();
-        assert!("Internal error: PhysicalExpr Column references column 'id' at index 9 (zero-based) \
-            but input schema only has 1 columns: [\"foo\"].\nThis was likely caused by a bug in \
-            DataFusion's code and we would welcome that you file an bug report in our issue tracker".starts_with(&error))
-    }
-
-    #[test]
-    fn out_of_bounds_evaluate() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("foo", DataType::Utf8, true)]);
-        let data: StringArray = vec!["data"].into();
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data)])?;
-        let col = Column::new("id", 9);
-        let error = col.evaluate(&batch).expect_err("error").strip_backtrace();
-        assert!("Internal error: PhysicalExpr Column references column 'id' at index 9 (zero-based) \
-            but input schema only has 1 columns: [\"foo\"].\nThis was likely caused by a bug in \
-            DataFusion's code and we would welcome that you file an bug report in our issue tracker".starts_with(&error));
-        Ok(())
-    }
-}

From b6e55d7e9cf17cfd1dcf633350cc6d205608ecd0 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Mon, 22 Jul 2024 09:51:40 -0600
Subject: [PATCH 112/357] feat: Optimize CASE expression for usage where then
 and else values are literals (#11553)

* Optimize CASE expression for usage where then and else values are literals

* add slt test

* add more test cases
---
 .../physical-expr/src/expressions/case.rs     | 44 ++++++++++++++
 datafusion/sqllogictest/test_files/case.slt   | 60 ++++++++++++++++++-
 2 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index 521a7ed9acae4..b428d562bd1b7 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -57,6 +57,11 @@ enum EvalMethod {
     ///
     /// CASE WHEN condition THEN column [ELSE NULL] END
     InfallibleExprOrNull,
+    /// This is a specialization for a specific use case where we can take a fast path
+    /// if there is just one when/then pair and both the `then` and `else` expressions
+    /// are literal values
+    /// CASE WHEN condition THEN literal ELSE literal END
+    ScalarOrScalar,
 }
 
 /// The CASE expression is similar to a series of nested if/else and there are two forms that
@@ -140,6 +145,12 @@ impl CaseExpr {
                 && else_expr.is_none()
             {
                 EvalMethod::InfallibleExprOrNull
+            } else if when_then_expr.len() == 1
+                && when_then_expr[0].1.as_any().is::<Literal>()
+                && else_expr.is_some()
+                && else_expr.as_ref().unwrap().as_any().is::<Literal>()
+            {
+                EvalMethod::ScalarOrScalar
             } else {
                 EvalMethod::NoExpression
             };
@@ -344,6 +355,38 @@ impl CaseExpr {
             internal_err!("predicate did not evaluate to an array")
         }
     }
+
+    fn scalar_or_scalar(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
+        let return_type = self.data_type(&batch.schema())?;
+
+        // evaluate when expression
+        let when_value = self.when_then_expr[0].0.evaluate(batch)?;
+        let when_value = when_value.into_array(batch.num_rows())?;
+        let when_value = as_boolean_array(&when_value).map_err(|e| {
+            DataFusionError::Context(
+                "WHEN expression did not return a BooleanArray".to_string(),
+                Box::new(e),
+            )
+        })?;
+
+        // Treat 'NULL' as false value
+        let when_value = match when_value.null_count() {
+            0 => Cow::Borrowed(when_value),
+            _ => Cow::Owned(prep_null_mask_filter(when_value)),
+        };
+
+        // evaluate then_value
+        let then_value = self.when_then_expr[0].1.evaluate(batch)?;
+        let then_value = Scalar::new(then_value.into_array(1)?);
+
+        // keep `else_expr`'s data type and return type consistent
+        let e = self.else_expr.as_ref().unwrap();
+        let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())
+            .unwrap_or_else(|_| Arc::clone(e));
+        let else_ = Scalar::new(expr.evaluate(batch)?.into_array(1)?);
+
+        Ok(ColumnarValue::Array(zip(&when_value, &then_value, &else_)?))
+    }
 }
 
 impl PhysicalExpr for CaseExpr {
@@ -406,6 +449,7 @@ impl PhysicalExpr for CaseExpr {
                 // Specialization for CASE WHEN expr THEN column [ELSE NULL] END
                 self.case_column_or_null(batch)
             }
+            EvalMethod::ScalarOrScalar => self.scalar_or_scalar(batch),
         }
     }
 
diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt
index fac1042bb6dd7..70063b88fb191 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -17,7 +17,7 @@
 
 # create test data
 statement ok
-create table foo (a int, b int) as values (1, 2), (3, 4), (5, 6);
+create table foo (a int, b int) as values (1, 2), (3, 4), (5, 6), (null, null), (6, null), (null, 7);
 
 # CASE WHEN with condition
 query T
@@ -26,6 +26,9 @@ SELECT CASE a WHEN 1 THEN 'one' WHEN 3 THEN 'three' ELSE '?' END FROM foo
 one
 three
 ?
+?
+?
+?
 
 # CASE WHEN with no condition
 query I
@@ -34,6 +37,9 @@ SELECT CASE WHEN a > 2 THEN a ELSE b END FROM foo
 2
 3
 5
+NULL
+6
+7
 
 # column or explicit null
 query I
@@ -42,6 +48,9 @@ SELECT CASE WHEN a > 2 THEN b ELSE null END FROM foo
 NULL
 4
 6
+NULL
+NULL
+7
 
 # column or implicit null
 query I
@@ -50,3 +59,52 @@ SELECT CASE WHEN a > 2 THEN b END FROM foo
 NULL
 4
 6
+NULL
+NULL
+7
+
+# scalar or scalar (string)
+query T
+SELECT CASE WHEN a > 2 THEN 'even' ELSE 'odd' END FROM foo
+----
+odd
+even
+even
+odd
+even
+odd
+
+# scalar or scalar (int)
+query I
+SELECT CASE WHEN a > 2 THEN 1 ELSE 0 END FROM foo
+----
+0
+1
+1
+0
+1
+0
+
+# predicate binary expression with scalars (does not make much sense because the expression in
+# this case is always false, so this expression could be rewritten as a literal 0 during planning
+query I
+SELECT CASE WHEN 1 > 2 THEN 1 ELSE 0 END FROM foo
+----
+0
+0
+0
+0
+0
+0
+
+# predicate using boolean literal (does not make much sense because the expression in
+# this case is always false, so this expression could be rewritten as a literal 0 during planning
+query I
+SELECT CASE WHEN false THEN 1 ELSE 0 END FROM foo
+----
+0
+0
+0
+0
+0
+0

From 7d078d8c11155fd098595126b1ed60cad9afce5a Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Mon, 22 Jul 2024 11:10:53 -0700
Subject: [PATCH 113/357] Fix SortMergeJoin antijoin flaky condition (#11604)

---
 .../physical-plan/src/joins/sort_merge_join.rs      | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 5fde028c7f488..96d5ba728a30b 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -1681,22 +1681,25 @@ fn get_filtered_join_mask(
         JoinType::LeftAnti => {
             // have we seen a filter match for a streaming index before
             for i in 0..streamed_indices_length {
-                if mask.value(i) && !seen_as_true {
+                let streamed_idx = streamed_indices.value(i);
+                if mask.value(i)
+                    && !seen_as_true
+                    && !matched_indices.contains(&streamed_idx)
+                {
                     seen_as_true = true;
-                    filter_matched_indices.push(streamed_indices.value(i));
+                    filter_matched_indices.push(streamed_idx);
                 }
 
                 // Reset `seen_as_true` flag and calculate mask for the current streaming index
                 // - if within the batch it switched to next streaming index(e.g. from 0 to 1, or from 1 to 2)
                 // - if it is at the end of the all buffered batches for the given streaming index, 0 index comes last
                 if (i < streamed_indices_length - 1
-                    && streamed_indices.value(i) != streamed_indices.value(i + 1))
+                    && streamed_idx != streamed_indices.value(i + 1))
                     || (i == streamed_indices_length - 1
                         && *scanning_buffered_offset == 0)
                 {
                     corrected_mask.append_value(
-                        !matched_indices.contains(&streamed_indices.value(i))
-                            && !seen_as_true,
+                        !matched_indices.contains(&streamed_idx) && !seen_as_true,
                     );
                     seen_as_true = false;
                 } else {

From a2ac00da1b3aa7879317ae88d1b356b27f49f887 Mon Sep 17 00:00:00 2001
From: Mustafa Akur <106137913+mustafasrepo@users.noreply.github.com>
Date: Mon, 22 Jul 2024 23:51:43 +0300
Subject: [PATCH 114/357] Improve Union Equivalence Propagation (#11506)

* Initial commit

* Fix formatting

* Minor changes

* Fix failing test

* Change union calculation algorithm to make it symmetric

* Minor changes

* Add unit tests

* Simplifications

* Review Part 1

* Move test and union equivalence

* Add new tests

* Support for union with different schema

* Address reviews

* Review Part 2

* Add new tests

* Final Review

---------

Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
---
 .../physical-expr-common/src/physical_expr.rs |  33 +-
 .../physical-expr/src/equivalence/mod.rs      |   4 +-
 .../src/equivalence/properties.rs             | 641 ++++++++++++++++--
 datafusion/physical-expr/src/lib.rs           |   2 +-
 datafusion/physical-plan/src/common.rs        | 356 +---------
 datafusion/physical-plan/src/union.rs         | 115 +---
 datafusion/sqllogictest/test_files/order.slt  |   7 +
 7 files changed, 647 insertions(+), 511 deletions(-)

diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs
index 1998f14396465..c74fb9c2d1b79 100644
--- a/datafusion/physical-expr-common/src/physical_expr.rs
+++ b/datafusion/physical-expr-common/src/physical_expr.rs
@@ -20,13 +20,15 @@ use std::fmt::{Debug, Display};
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
+use crate::expressions::column::Column;
 use crate::utils::scatter;
 
 use arrow::array::BooleanArray;
 use arrow::compute::filter_record_batch;
-use arrow::datatypes::{DataType, Schema};
+use arrow::datatypes::{DataType, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{internal_err, not_impl_err, Result};
+use datafusion_common::tree_node::{Transformed, TreeNode};
+use datafusion_common::{internal_err, not_impl_err, plan_err, Result};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::ExprProperties;
 use datafusion_expr::ColumnarValue;
@@ -191,6 +193,33 @@ pub fn with_new_children_if_necessary(
     }
 }
 
+/// Rewrites an expression according to new schema; i.e. changes the columns it
+/// refers to with the column at corresponding index in the new schema. Returns
+/// an error if the given schema has fewer columns than the original schema.
+/// Note that the resulting expression may not be valid if data types in the
+/// new schema is incompatible with expression nodes.
+pub fn with_new_schema(
+    expr: Arc<dyn PhysicalExpr>,
+    schema: &SchemaRef,
+) -> Result<Arc<dyn PhysicalExpr>> {
+    Ok(expr
+        .transform_up(|expr| {
+            if let Some(col) = expr.as_any().downcast_ref::<Column>() {
+                let idx = col.index();
+                let Some(field) = schema.fields().get(idx) else {
+                    return plan_err!(
+                        "New schema has fewer columns than original schema"
+                    );
+                };
+                let new_col = Column::new(field.name(), idx);
+                Ok(Transformed::yes(Arc::new(new_col) as _))
+            } else {
+                Ok(Transformed::no(expr))
+            }
+        })?
+        .data)
+}
+
 pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
     if any.is::<Arc<dyn PhysicalExpr>>() {
         any.downcast_ref::<Arc<dyn PhysicalExpr>>()
diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs
index 83f94057f740f..b9228282b081c 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -30,7 +30,9 @@ mod properties;
 pub use class::{ConstExpr, EquivalenceClass, EquivalenceGroup};
 pub use ordering::OrderingEquivalenceClass;
 pub use projection::ProjectionMapping;
-pub use properties::{join_equivalence_properties, EquivalenceProperties};
+pub use properties::{
+    calculate_union, join_equivalence_properties, EquivalenceProperties,
+};
 
 /// This function constructs a duplicate-free `LexOrderingReq` by filtering out
 /// duplicate entries that have same physical expression inside. For example,
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs
index 8c327fbaf4098..64c22064d4b79 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -21,7 +21,8 @@ use std::sync::Arc;
 use super::ordering::collapse_lex_ordering;
 use crate::equivalence::class::const_exprs_contains;
 use crate::equivalence::{
-    collapse_lex_req, EquivalenceGroup, OrderingEquivalenceClass, ProjectionMapping,
+    collapse_lex_req, EquivalenceClass, EquivalenceGroup, OrderingEquivalenceClass,
+    ProjectionMapping,
 };
 use crate::expressions::Literal;
 use crate::{
@@ -32,11 +33,12 @@ use crate::{
 
 use arrow_schema::{SchemaRef, SortOptions};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::{JoinSide, JoinType, Result};
+use datafusion_common::{plan_err, JoinSide, JoinType, Result};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_physical_expr_common::expressions::column::Column;
 use datafusion_physical_expr_common::expressions::CastExpr;
+use datafusion_physical_expr_common::physical_expr::with_new_schema;
 use datafusion_physical_expr_common::utils::ExprPropertiesNode;
 
 use indexmap::{IndexMap, IndexSet};
@@ -536,33 +538,6 @@ impl EquivalenceProperties {
             .then_some(if lhs.len() >= rhs.len() { lhs } else { rhs })
     }
 
-    /// Calculates the "meet" of the given orderings (`lhs` and `rhs`).
-    /// The meet of a set of orderings is the finest ordering that is satisfied
-    /// by all the orderings in that set. For details, see:
-    ///
-    /// <https://en.wikipedia.org/wiki/Join_and_meet>
-    ///
-    /// If there is no ordering that satisfies both `lhs` and `rhs`, returns
-    /// `None`. As an example, the meet of orderings `[a ASC]` and `[a ASC, b ASC]`
-    /// is `[a ASC]`.
-    pub fn get_meet_ordering(
-        &self,
-        lhs: LexOrderingRef,
-        rhs: LexOrderingRef,
-    ) -> Option<LexOrdering> {
-        let lhs = self.normalize_sort_exprs(lhs);
-        let rhs = self.normalize_sort_exprs(rhs);
-        let mut meet = vec![];
-        for (lhs, rhs) in lhs.into_iter().zip(rhs.into_iter()) {
-            if lhs.eq(&rhs) {
-                meet.push(lhs);
-            } else {
-                break;
-            }
-        }
-        (!meet.is_empty()).then_some(meet)
-    }
-
     /// we substitute the ordering according to input expression type, this is a simplified version
     /// In this case, we just substitute when the expression satisfy the following condition:
     /// I. just have one column and is a CAST expression
@@ -1007,6 +982,74 @@ impl EquivalenceProperties {
             .map(|node| node.data)
             .unwrap_or(ExprProperties::new_unknown())
     }
+
+    /// Transforms this `EquivalenceProperties` into a new `EquivalenceProperties`
+    /// by mapping columns in the original schema to columns in the new schema
+    /// by index.
+    pub fn with_new_schema(self, schema: SchemaRef) -> Result<Self> {
+        // The new schema and the original schema is aligned when they have the
+        // same number of columns, and fields at the same index have the same
+        // type in both schemas.
+        let schemas_aligned = (self.schema.fields.len() == schema.fields.len())
+            && self
+                .schema
+                .fields
+                .iter()
+                .zip(schema.fields.iter())
+                .all(|(lhs, rhs)| lhs.data_type().eq(rhs.data_type()));
+        if !schemas_aligned {
+            // Rewriting equivalence properties in terms of new schema is not
+            // safe when schemas are not aligned:
+            return plan_err!(
+                "Cannot rewrite old_schema:{:?} with new schema: {:?}",
+                self.schema,
+                schema
+            );
+        }
+        // Rewrite constants according to new schema:
+        let new_constants = self
+            .constants
+            .into_iter()
+            .map(|const_expr| {
+                let across_partitions = const_expr.across_partitions();
+                let new_const_expr = with_new_schema(const_expr.owned_expr(), &schema)?;
+                Ok(ConstExpr::new(new_const_expr)
+                    .with_across_partitions(across_partitions))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        // Rewrite orderings according to new schema:
+        let mut new_orderings = vec![];
+        for ordering in self.oeq_class.orderings {
+            let new_ordering = ordering
+                .into_iter()
+                .map(|mut sort_expr| {
+                    sort_expr.expr = with_new_schema(sort_expr.expr, &schema)?;
+                    Ok(sort_expr)
+                })
+                .collect::<Result<_>>()?;
+            new_orderings.push(new_ordering);
+        }
+
+        // Rewrite equivalence classes according to the new schema:
+        let mut eq_classes = vec![];
+        for eq_class in self.eq_group.classes {
+            let new_eq_exprs = eq_class
+                .into_vec()
+                .into_iter()
+                .map(|expr| with_new_schema(expr, &schema))
+                .collect::<Result<_>>()?;
+            eq_classes.push(EquivalenceClass::new(new_eq_exprs));
+        }
+
+        // Construct the resulting equivalence properties:
+        let mut result = EquivalenceProperties::new(schema);
+        result.constants = new_constants;
+        result.add_new_orderings(new_orderings);
+        result.add_equivalence_group(EquivalenceGroup::new(eq_classes));
+
+        Ok(result)
+    }
 }
 
 /// Calculates the properties of a given [`ExprPropertiesNode`].
@@ -1484,6 +1527,84 @@ impl Hash for ExprWrapper {
     }
 }
 
+/// Calculates the union (in the sense of `UnionExec`) `EquivalenceProperties`
+/// of  `lhs` and `rhs` according to the schema of `lhs`.
+fn calculate_union_binary(
+    lhs: EquivalenceProperties,
+    mut rhs: EquivalenceProperties,
+) -> Result<EquivalenceProperties> {
+    // TODO: In some cases, we should be able to preserve some equivalence
+    //       classes. Add support for such cases.
+
+    // Harmonize the schema of the rhs with the schema of the lhs (which is the accumulator schema):
+    if !rhs.schema.eq(&lhs.schema) {
+        rhs = rhs.with_new_schema(Arc::clone(&lhs.schema))?;
+    }
+
+    // First, calculate valid constants for the union. A quantity is constant
+    // after the union if it is constant in both sides.
+    let constants = lhs
+        .constants()
+        .iter()
+        .filter(|const_expr| const_exprs_contains(rhs.constants(), const_expr.expr()))
+        .map(|const_expr| {
+            // TODO: When both sides' constants are valid across partitions,
+            //       the union's constant should also be valid if values are
+            //       the same. However, we do not have the capability to
+            //       check this yet.
+            ConstExpr::new(Arc::clone(const_expr.expr())).with_across_partitions(false)
+        })
+        .collect();
+
+    // Next, calculate valid orderings for the union by searching for prefixes
+    // in both sides.
+    let mut orderings = vec![];
+    for mut ordering in lhs.normalized_oeq_class().orderings {
+        // Progressively shorten the ordering to search for a satisfied prefix:
+        while !rhs.ordering_satisfy(&ordering) {
+            ordering.pop();
+        }
+        // There is a non-trivial satisfied prefix, add it as a valid ordering:
+        if !ordering.is_empty() {
+            orderings.push(ordering);
+        }
+    }
+    for mut ordering in rhs.normalized_oeq_class().orderings {
+        // Progressively shorten the ordering to search for a satisfied prefix:
+        while !lhs.ordering_satisfy(&ordering) {
+            ordering.pop();
+        }
+        // There is a non-trivial satisfied prefix, add it as a valid ordering:
+        if !ordering.is_empty() {
+            orderings.push(ordering);
+        }
+    }
+    let mut eq_properties = EquivalenceProperties::new(lhs.schema);
+    eq_properties.constants = constants;
+    eq_properties.add_new_orderings(orderings);
+    Ok(eq_properties)
+}
+
+/// Calculates the union (in the sense of `UnionExec`) `EquivalenceProperties`
+/// of the given `EquivalenceProperties` in `eqps` according to the given
+/// output `schema` (which need not be the same with those of `lhs` and `rhs`
+/// as details such as nullability may be different).
+pub fn calculate_union(
+    eqps: Vec<EquivalenceProperties>,
+    schema: SchemaRef,
+) -> Result<EquivalenceProperties> {
+    // TODO: In some cases, we should be able to preserve some equivalence
+    //       classes. Add support for such cases.
+    let mut init = eqps[0].clone();
+    // Harmonize the schema of the init with the schema of the union:
+    if !init.schema.eq(&schema) {
+        init = init.with_new_schema(schema)?;
+    }
+    eqps.into_iter()
+        .skip(1)
+        .try_fold(init, calculate_union_binary)
+}
+
 #[cfg(test)]
 mod tests {
     use std::ops::Not;
@@ -2188,50 +2309,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn test_get_meet_ordering() -> Result<()> {
-        let schema = create_test_schema()?;
-        let col_a = &col("a", &schema)?;
-        let col_b = &col("b", &schema)?;
-        let eq_properties = EquivalenceProperties::new(schema);
-        let option_asc = SortOptions {
-            descending: false,
-            nulls_first: false,
-        };
-        let option_desc = SortOptions {
-            descending: true,
-            nulls_first: true,
-        };
-        let tests_cases = vec![
-            // Get meet ordering between [a ASC] and [a ASC, b ASC]
-            // result should be [a ASC]
-            (
-                vec![(col_a, option_asc)],
-                vec![(col_a, option_asc), (col_b, option_asc)],
-                Some(vec![(col_a, option_asc)]),
-            ),
-            // Get meet ordering between [a ASC] and [a DESC]
-            // result should be None.
-            (vec![(col_a, option_asc)], vec![(col_a, option_desc)], None),
-            // Get meet ordering between [a ASC, b ASC] and [a ASC, b DESC]
-            // result should be [a ASC].
-            (
-                vec![(col_a, option_asc), (col_b, option_asc)],
-                vec![(col_a, option_asc), (col_b, option_desc)],
-                Some(vec![(col_a, option_asc)]),
-            ),
-        ];
-        for (lhs, rhs, expected) in tests_cases {
-            let lhs = convert_to_sort_exprs(&lhs);
-            let rhs = convert_to_sort_exprs(&rhs);
-            let expected = expected.map(|expected| convert_to_sort_exprs(&expected));
-            let finer = eq_properties.get_meet_ordering(&lhs, &rhs);
-            assert_eq!(finer, expected)
-        }
-
-        Ok(())
-    }
-
     #[test]
     fn test_get_finer() -> Result<()> {
         let schema = create_test_schema()?;
@@ -2525,4 +2602,422 @@ mod tests {
 
         Ok(())
     }
+
+    fn append_fields(schema: &SchemaRef, text: &str) -> SchemaRef {
+        Arc::new(Schema::new(
+            schema
+                .fields()
+                .iter()
+                .map(|field| {
+                    Field::new(
+                        // Annotate name with `text`:
+                        format!("{}{}", field.name(), text),
+                        field.data_type().clone(),
+                        field.is_nullable(),
+                    )
+                })
+                .collect::<Vec<_>>(),
+        ))
+    }
+
+    #[tokio::test]
+    async fn test_union_equivalence_properties_multi_children() -> Result<()> {
+        let schema = create_test_schema()?;
+        let schema2 = append_fields(&schema, "1");
+        let schema3 = append_fields(&schema, "2");
+        let test_cases = vec![
+            // --------- TEST CASE 1 ----------
+            (
+                vec![
+                    // Children 1
+                    (
+                        // Orderings
+                        vec![vec!["a", "b", "c"]],
+                        Arc::clone(&schema),
+                    ),
+                    // Children 2
+                    (
+                        // Orderings
+                        vec![vec!["a1", "b1", "c1"]],
+                        Arc::clone(&schema2),
+                    ),
+                    // Children 3
+                    (
+                        // Orderings
+                        vec![vec!["a2", "b2"]],
+                        Arc::clone(&schema3),
+                    ),
+                ],
+                // Expected
+                vec![vec!["a", "b"]],
+            ),
+            // --------- TEST CASE 2 ----------
+            (
+                vec![
+                    // Children 1
+                    (
+                        // Orderings
+                        vec![vec!["a", "b", "c"]],
+                        Arc::clone(&schema),
+                    ),
+                    // Children 2
+                    (
+                        // Orderings
+                        vec![vec!["a1", "b1", "c1"]],
+                        Arc::clone(&schema2),
+                    ),
+                    // Children 3
+                    (
+                        // Orderings
+                        vec![vec!["a2", "b2", "c2"]],
+                        Arc::clone(&schema3),
+                    ),
+                ],
+                // Expected
+                vec![vec!["a", "b", "c"]],
+            ),
+            // --------- TEST CASE 3 ----------
+            (
+                vec![
+                    // Children 1
+                    (
+                        // Orderings
+                        vec![vec!["a", "b"]],
+                        Arc::clone(&schema),
+                    ),
+                    // Children 2
+                    (
+                        // Orderings
+                        vec![vec!["a1", "b1", "c1"]],
+                        Arc::clone(&schema2),
+                    ),
+                    // Children 3
+                    (
+                        // Orderings
+                        vec![vec!["a2", "b2", "c2"]],
+                        Arc::clone(&schema3),
+                    ),
+                ],
+                // Expected
+                vec![vec!["a", "b"]],
+            ),
+            // --------- TEST CASE 4 ----------
+            (
+                vec![
+                    // Children 1
+                    (
+                        // Orderings
+                        vec![vec!["a", "b"]],
+                        Arc::clone(&schema),
+                    ),
+                    // Children 2
+                    (
+                        // Orderings
+                        vec![vec!["a1", "b1"]],
+                        Arc::clone(&schema2),
+                    ),
+                    // Children 3
+                    (
+                        // Orderings
+                        vec![vec!["b2", "c2"]],
+                        Arc::clone(&schema3),
+                    ),
+                ],
+                // Expected
+                vec![],
+            ),
+            // --------- TEST CASE 5 ----------
+            (
+                vec![
+                    // Children 1
+                    (
+                        // Orderings
+                        vec![vec!["a", "b"], vec!["c"]],
+                        Arc::clone(&schema),
+                    ),
+                    // Children 2
+                    (
+                        // Orderings
+                        vec![vec!["a1", "b1"], vec!["c1"]],
+                        Arc::clone(&schema2),
+                    ),
+                ],
+                // Expected
+                vec![vec!["a", "b"], vec!["c"]],
+            ),
+        ];
+        for (children, expected) in test_cases {
+            let children_eqs = children
+                .iter()
+                .map(|(orderings, schema)| {
+                    let orderings = orderings
+                        .iter()
+                        .map(|ordering| {
+                            ordering
+                                .iter()
+                                .map(|name| PhysicalSortExpr {
+                                    expr: col(name, schema).unwrap(),
+                                    options: SortOptions::default(),
+                                })
+                                .collect::<Vec<_>>()
+                        })
+                        .collect::<Vec<_>>();
+                    EquivalenceProperties::new_with_orderings(
+                        Arc::clone(schema),
+                        &orderings,
+                    )
+                })
+                .collect::<Vec<_>>();
+            let actual = calculate_union(children_eqs, Arc::clone(&schema))?;
+
+            let expected_ordering = expected
+                .into_iter()
+                .map(|ordering| {
+                    ordering
+                        .into_iter()
+                        .map(|name| PhysicalSortExpr {
+                            expr: col(name, &schema).unwrap(),
+                            options: SortOptions::default(),
+                        })
+                        .collect::<Vec<_>>()
+                })
+                .collect::<Vec<_>>();
+            let expected = EquivalenceProperties::new_with_orderings(
+                Arc::clone(&schema),
+                &expected_ordering,
+            );
+            assert_eq_properties_same(
+                &actual,
+                &expected,
+                format!("expected: {:?}, actual: {:?}", expected, actual),
+            );
+        }
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_equivalence_properties_binary() -> Result<()> {
+        let schema = create_test_schema()?;
+        let schema2 = append_fields(&schema, "1");
+        let col_a = &col("a", &schema)?;
+        let col_b = &col("b", &schema)?;
+        let col_c = &col("c", &schema)?;
+        let col_a1 = &col("a1", &schema2)?;
+        let col_b1 = &col("b1", &schema2)?;
+        let options = SortOptions::default();
+        let options_desc = !SortOptions::default();
+        let test_cases = [
+            //-----------TEST CASE 1----------//
+            (
+                (
+                    // First child orderings
+                    vec![
+                        // [a ASC]
+                        (vec![(col_a, options)]),
+                    ],
+                    // First child constants
+                    vec![col_b, col_c],
+                    Arc::clone(&schema),
+                ),
+                (
+                    // Second child orderings
+                    vec![
+                        // [b ASC]
+                        (vec![(col_b, options)]),
+                    ],
+                    // Second child constants
+                    vec![col_a, col_c],
+                    Arc::clone(&schema),
+                ),
+                (
+                    // Union expected orderings
+                    vec![
+                        // [a ASC]
+                        vec![(col_a, options)],
+                        // [b ASC]
+                        vec![(col_b, options)],
+                    ],
+                    // Union
+                    vec![col_c],
+                ),
+            ),
+            //-----------TEST CASE 2----------//
+            // Meet ordering between [a ASC], [a ASC, b ASC] should be [a ASC]
+            (
+                (
+                    // First child orderings
+                    vec![
+                        // [a ASC]
+                        vec![(col_a, options)],
+                    ],
+                    // No constant
+                    vec![],
+                    Arc::clone(&schema),
+                ),
+                (
+                    // Second child orderings
+                    vec![
+                        // [a ASC, b ASC]
+                        vec![(col_a, options), (col_b, options)],
+                    ],
+                    // No constant
+                    vec![],
+                    Arc::clone(&schema),
+                ),
+                (
+                    // Union orderings
+                    vec![
+                        // [a ASC]
+                        vec![(col_a, options)],
+                    ],
+                    // No constant
+                    vec![],
+                ),
+            ),
+            //-----------TEST CASE 3----------//
+            // Meet ordering between [a ASC], [a DESC] should be []
+            (
+                (
+                    // First child orderings
+                    vec![
+                        // [a ASC]
+                        vec![(col_a, options)],
+                    ],
+                    // No constant
+                    vec![],
+                    Arc::clone(&schema),
+                ),
+                (
+                    // Second child orderings
+                    vec![
+                        // [a DESC]
+                        vec![(col_a, options_desc)],
+                    ],
+                    // No constant
+                    vec![],
+                    Arc::clone(&schema),
+                ),
+                (
+                    // Union doesn't have any ordering
+                    vec![],
+                    // No constant
+                    vec![],
+                ),
+            ),
+            //-----------TEST CASE 4----------//
+            // Meet ordering between [a ASC], [a1 ASC, b1 ASC] should be [a ASC]
+            // Where a, and a1 ath the same index for their corresponding schemas.
+            (
+                (
+                    // First child orderings
+                    vec![
+                        // [a ASC]
+                        vec![(col_a, options)],
+                    ],
+                    // No constant
+                    vec![],
+                    Arc::clone(&schema),
+                ),
+                (
+                    // Second child orderings
+                    vec![
+                        // [a1 ASC, b1 ASC]
+                        vec![(col_a1, options), (col_b1, options)],
+                    ],
+                    // No constant
+                    vec![],
+                    Arc::clone(&schema2),
+                ),
+                (
+                    // Union orderings
+                    vec![
+                        // [a ASC]
+                        vec![(col_a, options)],
+                    ],
+                    // No constant
+                    vec![],
+                ),
+            ),
+        ];
+
+        for (
+            test_idx,
+            (
+                (first_child_orderings, first_child_constants, first_schema),
+                (second_child_orderings, second_child_constants, second_schema),
+                (union_orderings, union_constants),
+            ),
+        ) in test_cases.iter().enumerate()
+        {
+            let first_orderings = first_child_orderings
+                .iter()
+                .map(|ordering| convert_to_sort_exprs(ordering))
+                .collect::<Vec<_>>();
+            let first_constants = first_child_constants
+                .iter()
+                .map(|expr| ConstExpr::new(Arc::clone(expr)))
+                .collect::<Vec<_>>();
+            let mut lhs = EquivalenceProperties::new(Arc::clone(first_schema));
+            lhs = lhs.add_constants(first_constants);
+            lhs.add_new_orderings(first_orderings);
+
+            let second_orderings = second_child_orderings
+                .iter()
+                .map(|ordering| convert_to_sort_exprs(ordering))
+                .collect::<Vec<_>>();
+            let second_constants = second_child_constants
+                .iter()
+                .map(|expr| ConstExpr::new(Arc::clone(expr)))
+                .collect::<Vec<_>>();
+            let mut rhs = EquivalenceProperties::new(Arc::clone(second_schema));
+            rhs = rhs.add_constants(second_constants);
+            rhs.add_new_orderings(second_orderings);
+
+            let union_expected_orderings = union_orderings
+                .iter()
+                .map(|ordering| convert_to_sort_exprs(ordering))
+                .collect::<Vec<_>>();
+            let union_constants = union_constants
+                .iter()
+                .map(|expr| ConstExpr::new(Arc::clone(expr)))
+                .collect::<Vec<_>>();
+            let mut union_expected_eq = EquivalenceProperties::new(Arc::clone(&schema));
+            union_expected_eq = union_expected_eq.add_constants(union_constants);
+            union_expected_eq.add_new_orderings(union_expected_orderings);
+
+            let actual_union_eq = calculate_union_binary(lhs, rhs)?;
+            let err_msg = format!(
+                "Error in test id: {:?}, test case: {:?}",
+                test_idx, test_cases[test_idx]
+            );
+            assert_eq_properties_same(&actual_union_eq, &union_expected_eq, err_msg);
+        }
+        Ok(())
+    }
+
+    fn assert_eq_properties_same(
+        lhs: &EquivalenceProperties,
+        rhs: &EquivalenceProperties,
+        err_msg: String,
+    ) {
+        // Check whether constants are same
+        let lhs_constants = lhs.constants();
+        let rhs_constants = rhs.constants();
+        assert_eq!(lhs_constants.len(), rhs_constants.len(), "{}", err_msg);
+        for rhs_constant in rhs_constants {
+            assert!(
+                const_exprs_contains(lhs_constants, rhs_constant.expr()),
+                "{}",
+                err_msg
+            );
+        }
+
+        // Check whether orderings are same.
+        let lhs_orderings = lhs.oeq_class();
+        let rhs_orderings = &rhs.oeq_class.orderings;
+        assert_eq!(lhs_orderings.len(), rhs_orderings.len(), "{}", err_msg);
+        for rhs_ordering in rhs_orderings {
+            assert!(lhs_orderings.contains(rhs_ordering), "{}", err_msg);
+        }
+    }
 }
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 4f83ae01959ba..2e78119eba468 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -48,7 +48,7 @@ pub use analysis::{analyze, AnalysisContext, ExprBoundaries};
 pub use datafusion_physical_expr_common::aggregate::{
     AggregateExpr, AggregatePhysicalExpressions,
 };
-pub use equivalence::{ConstExpr, EquivalenceProperties};
+pub use equivalence::{calculate_union, ConstExpr, EquivalenceProperties};
 pub use partitioning::{Distribution, Partitioning};
 pub use physical_expr::{
     physical_exprs_bag_equal, physical_exprs_contains, physical_exprs_equal,
diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs
index bf9d14e73dd87..4b5eea6b760df 100644
--- a/datafusion/physical-plan/src/common.rs
+++ b/datafusion/physical-plan/src/common.rs
@@ -22,9 +22,9 @@ use std::fs::{metadata, File};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
-use super::{ExecutionPlanProperties, SendableRecordBatchStream};
+use super::SendableRecordBatchStream;
 use crate::stream::RecordBatchReceiverStream;
-use crate::{ColumnStatistics, ExecutionPlan, Statistics};
+use crate::{ColumnStatistics, Statistics};
 
 use arrow::datatypes::Schema;
 use arrow::ipc::writer::{FileWriter, IpcWriteOptions};
@@ -33,8 +33,6 @@ use arrow_array::Array;
 use datafusion_common::stats::Precision;
 use datafusion_common::{plan_err, DataFusionError, Result};
 use datafusion_execution::memory_pool::MemoryReservation;
-use datafusion_physical_expr::expressions::{BinaryExpr, Column};
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
 
 use futures::{StreamExt, TryStreamExt};
 use parking_lot::Mutex;
@@ -178,71 +176,6 @@ pub fn compute_record_batch_statistics(
     }
 }
 
-/// Calculates the "meet" of given orderings.
-/// The meet is the finest ordering that satisfied by all the given
-/// orderings, see <https://en.wikipedia.org/wiki/Join_and_meet>.
-pub fn get_meet_of_orderings(
-    given: &[Arc<dyn ExecutionPlan>],
-) -> Option<&[PhysicalSortExpr]> {
-    given
-        .iter()
-        .map(|item| item.output_ordering())
-        .collect::<Option<Vec<_>>>()
-        .and_then(get_meet_of_orderings_helper)
-}
-
-fn get_meet_of_orderings_helper(
-    orderings: Vec<&[PhysicalSortExpr]>,
-) -> Option<&[PhysicalSortExpr]> {
-    let mut idx = 0;
-    let first = orderings[0];
-    loop {
-        for ordering in orderings.iter() {
-            if idx >= ordering.len() {
-                return Some(ordering);
-            } else {
-                let schema_aligned = check_expr_alignment(
-                    ordering[idx].expr.as_ref(),
-                    first[idx].expr.as_ref(),
-                );
-                if !schema_aligned || (ordering[idx].options != first[idx].options) {
-                    // In a union, the output schema is that of the first child (by convention).
-                    // Therefore, generate the result from the first child's schema:
-                    return if idx > 0 { Some(&first[..idx]) } else { None };
-                }
-            }
-        }
-        idx += 1;
-    }
-
-    fn check_expr_alignment(first: &dyn PhysicalExpr, second: &dyn PhysicalExpr) -> bool {
-        match (
-            first.as_any().downcast_ref::<Column>(),
-            second.as_any().downcast_ref::<Column>(),
-            first.as_any().downcast_ref::<BinaryExpr>(),
-            second.as_any().downcast_ref::<BinaryExpr>(),
-        ) {
-            (Some(first_col), Some(second_col), _, _) => {
-                first_col.index() == second_col.index()
-            }
-            (_, _, Some(first_binary), Some(second_binary)) => {
-                if first_binary.op() == second_binary.op() {
-                    check_expr_alignment(
-                        first_binary.left().as_ref(),
-                        second_binary.left().as_ref(),
-                    ) && check_expr_alignment(
-                        first_binary.right().as_ref(),
-                        second_binary.right().as_ref(),
-                    )
-                } else {
-                    false
-                }
-            }
-            (_, _, _, _) => false,
-        }
-    }
-}
-
 /// Write in Arrow IPC format.
 pub struct IPCWriter {
     /// path
@@ -342,297 +275,12 @@ pub fn can_project(
 
 #[cfg(test)]
 mod tests {
-    use std::ops::Not;
-
     use super::*;
-    use crate::memory::MemoryExec;
-    use crate::sorts::sort::SortExec;
-    use crate::union::UnionExec;
 
-    use arrow::compute::SortOptions;
     use arrow::{
         array::{Float32Array, Float64Array, UInt64Array},
         datatypes::{DataType, Field},
     };
-    use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::col;
-
-    #[test]
-    fn get_meet_of_orderings_helper_common_prefix_test() -> Result<()> {
-        let input1: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("a", 0)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("b", 1)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("c", 2)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let input2: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("x", 0)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("y", 1)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("z", 2)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let input3: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("d", 0)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("e", 1)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("f", 2)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let input4: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("g", 0)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("h", 1)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                // Note that index of this column is not 2. Hence this 3rd entry shouldn't be
-                // in the output ordering.
-                expr: Arc::new(Column::new("i", 3)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let expected = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("a", 0)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("b", 1)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("c", 2)),
-                options: SortOptions::default(),
-            },
-        ];
-        let result = get_meet_of_orderings_helper(vec![&input1, &input2, &input3]);
-        assert_eq!(result.unwrap(), expected);
-
-        let expected = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("a", 0)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("b", 1)),
-                options: SortOptions::default(),
-            },
-        ];
-        let result = get_meet_of_orderings_helper(vec![&input1, &input2, &input4]);
-        assert_eq!(result.unwrap(), expected);
-        Ok(())
-    }
-
-    #[test]
-    fn get_meet_of_orderings_helper_subset_test() -> Result<()> {
-        let input1: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("a", 0)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("b", 1)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let input2: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("c", 0)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("d", 1)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("e", 2)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let input3: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("f", 0)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("g", 1)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("h", 2)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let result = get_meet_of_orderings_helper(vec![&input1, &input2, &input3]);
-        assert_eq!(result.unwrap(), input1);
-        Ok(())
-    }
-
-    #[test]
-    fn get_meet_of_orderings_helper_no_overlap_test() -> Result<()> {
-        let input1: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("a", 0)),
-                // Since ordering is conflicting with other inputs
-                // output ordering should be empty
-                options: SortOptions::default().not(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("b", 1)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let input2: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("x", 0)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("a", 1)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let input3: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("a", 2)),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("y", 1)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let result = get_meet_of_orderings_helper(vec![&input1, &input2]);
-        assert!(result.is_none());
-
-        let result = get_meet_of_orderings_helper(vec![&input2, &input3]);
-        assert!(result.is_none());
-
-        let result = get_meet_of_orderings_helper(vec![&input1, &input3]);
-        assert!(result.is_none());
-        Ok(())
-    }
-
-    #[test]
-    fn get_meet_of_orderings_helper_binary_exprs() -> Result<()> {
-        let input1: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(BinaryExpr::new(
-                    Arc::new(Column::new("a", 0)),
-                    Operator::Plus,
-                    Arc::new(Column::new("b", 1)),
-                )),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("c", 2)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let input2: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(BinaryExpr::new(
-                    Arc::new(Column::new("x", 0)),
-                    Operator::Plus,
-                    Arc::new(Column::new("y", 1)),
-                )),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("z", 2)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        // erroneous input
-        let input3: Vec<PhysicalSortExpr> = vec![
-            PhysicalSortExpr {
-                expr: Arc::new(BinaryExpr::new(
-                    Arc::new(Column::new("a", 1)),
-                    Operator::Plus,
-                    Arc::new(Column::new("b", 0)),
-                )),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: Arc::new(Column::new("c", 2)),
-                options: SortOptions::default(),
-            },
-        ];
-
-        let result = get_meet_of_orderings_helper(vec![&input1, &input2]);
-        assert_eq!(input1, result.unwrap());
-
-        let result = get_meet_of_orderings_helper(vec![&input2, &input3]);
-        assert!(result.is_none());
-
-        let result = get_meet_of_orderings_helper(vec![&input1, &input3]);
-        assert!(result.is_none());
-        Ok(())
-    }
-
-    #[test]
-    fn test_meet_of_orderings() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("f32", DataType::Float32, false),
-            Field::new("f64", DataType::Float64, false),
-        ]));
-        let sort_expr = vec![PhysicalSortExpr {
-            expr: col("f32", &schema).unwrap(),
-            options: SortOptions::default(),
-        }];
-        let memory_exec =
-            Arc::new(MemoryExec::try_new(&[], Arc::clone(&schema), None)?) as _;
-        let sort_exec = Arc::new(SortExec::new(sort_expr.clone(), memory_exec))
-            as Arc<dyn ExecutionPlan>;
-        let memory_exec2 = Arc::new(MemoryExec::try_new(&[], schema, None)?) as _;
-        // memory_exec2 doesn't have output ordering
-        let union_exec = UnionExec::new(vec![Arc::clone(&sort_exec), memory_exec2]);
-        let res = get_meet_of_orderings(union_exec.inputs());
-        assert!(res.is_none());
-
-        let union_exec = UnionExec::new(vec![Arc::clone(&sort_exec), sort_exec]);
-        let res = get_meet_of_orderings(union_exec.inputs());
-        assert_eq!(res, Some(&sort_expr[..]));
-        Ok(())
-    }
 
     #[test]
     fn test_compute_record_batch_statistics_empty() -> Result<()> {
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 24c80048ab4aa..9321fdb2cadf8 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -41,7 +41,7 @@ use arrow::record_batch::RecordBatch;
 use datafusion_common::stats::Precision;
 use datafusion_common::{exec_err, internal_err, Result};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::{ConstExpr, EquivalenceProperties};
+use datafusion_physical_expr::{calculate_union, EquivalenceProperties};
 
 use futures::Stream;
 use itertools::Itertools;
@@ -99,7 +99,12 @@ impl UnionExec {
     /// Create a new UnionExec
     pub fn new(inputs: Vec<Arc<dyn ExecutionPlan>>) -> Self {
         let schema = union_schema(&inputs);
-        let cache = Self::compute_properties(&inputs, schema);
+        // The schema of the inputs and the union schema is consistent when:
+        // - They have the same number of fields, and
+        // - Their fields have same types at the same indices.
+        // Here, we know that schemas are consistent and the call below can
+        // not return an error.
+        let cache = Self::compute_properties(&inputs, schema).unwrap();
         UnionExec {
             inputs,
             metrics: ExecutionPlanMetricsSet::new(),
@@ -116,13 +121,13 @@ impl UnionExec {
     fn compute_properties(
         inputs: &[Arc<dyn ExecutionPlan>],
         schema: SchemaRef,
-    ) -> PlanProperties {
+    ) -> Result<PlanProperties> {
         // Calculate equivalence properties:
-        let children_eqs = inputs
+        let children_eqps = inputs
             .iter()
-            .map(|child| child.equivalence_properties())
+            .map(|child| child.equivalence_properties().clone())
             .collect::<Vec<_>>();
-        let eq_properties = calculate_union_eq_properties(&children_eqs, schema);
+        let eq_properties = calculate_union(children_eqps, schema)?;
 
         // Calculate output partitioning; i.e. sum output partitions of the inputs.
         let num_partitions = inputs
@@ -134,71 +139,13 @@ impl UnionExec {
         // Determine execution mode:
         let mode = execution_mode_from_children(inputs.iter());
 
-        PlanProperties::new(eq_properties, output_partitioning, mode)
+        Ok(PlanProperties::new(
+            eq_properties,
+            output_partitioning,
+            mode,
+        ))
     }
 }
-/// Calculate `EquivalenceProperties` for `UnionExec` from the `EquivalenceProperties`
-/// of its children.
-fn calculate_union_eq_properties(
-    children_eqs: &[&EquivalenceProperties],
-    schema: SchemaRef,
-) -> EquivalenceProperties {
-    // Calculate equivalence properties:
-    // TODO: In some cases, we should be able to preserve some equivalence
-    //       classes and constants. Add support for such cases.
-    let mut eq_properties = EquivalenceProperties::new(schema);
-    // Use the ordering equivalence class of the first child as the seed:
-    let mut meets = children_eqs[0]
-        .oeq_class()
-        .iter()
-        .map(|item| item.to_vec())
-        .collect::<Vec<_>>();
-    // Iterate over all the children:
-    for child_eqs in &children_eqs[1..] {
-        // Compute meet orderings of the current meets and the new ordering
-        // equivalence class.
-        let mut idx = 0;
-        while idx < meets.len() {
-            // Find all the meets of `current_meet` with this child's orderings:
-            let valid_meets = child_eqs.oeq_class().iter().filter_map(|ordering| {
-                child_eqs.get_meet_ordering(ordering, &meets[idx])
-            });
-            // Use the longest of these meets as others are redundant:
-            if let Some(next_meet) = valid_meets.max_by_key(|m| m.len()) {
-                meets[idx] = next_meet;
-                idx += 1;
-            } else {
-                meets.swap_remove(idx);
-            }
-        }
-    }
-    // We know have all the valid orderings after union, remove redundant
-    // entries (implicitly) and return:
-    eq_properties.add_new_orderings(meets);
-
-    let mut meet_constants = children_eqs[0].constants().to_vec();
-    // Iterate over all the children:
-    for child_eqs in &children_eqs[1..] {
-        let constants = child_eqs.constants();
-        meet_constants = meet_constants
-            .into_iter()
-            .filter_map(|meet_constant| {
-                for const_expr in constants {
-                    if const_expr.expr().eq(meet_constant.expr()) {
-                        // TODO: Check whether constant expressions evaluates the same value or not for each partition
-                        let across_partitions = false;
-                        return Some(
-                            ConstExpr::from(meet_constant.owned_expr())
-                                .with_across_partitions(across_partitions),
-                        );
-                    }
-                }
-                None
-            })
-            .collect::<Vec<_>>();
-    }
-    eq_properties.add_constants(meet_constants)
-}
 
 impl DisplayAs for UnionExec {
     fn fmt_as(
@@ -639,8 +586,8 @@ mod tests {
 
     use arrow_schema::{DataType, SortOptions};
     use datafusion_common::ScalarValue;
-    use datafusion_physical_expr::expressions::col;
     use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
+    use datafusion_physical_expr_common::expressions::column::col;
 
     // Generate a schema which consists of 7 columns (a, b, c, d, e, f, g)
     fn create_test_schema() -> Result<SchemaRef> {
@@ -856,23 +803,31 @@ mod tests {
                     .with_sort_information(second_orderings),
             );
 
+            let mut union_expected_eq = EquivalenceProperties::new(Arc::clone(&schema));
+            union_expected_eq.add_new_orderings(union_expected_orderings);
+
             let union = UnionExec::new(vec![child1, child2]);
             let union_eq_properties = union.properties().equivalence_properties();
-            let union_actual_orderings = union_eq_properties.oeq_class();
             let err_msg = format!(
                 "Error in test id: {:?}, test case: {:?}",
                 test_idx, test_cases[test_idx]
             );
-            assert_eq!(
-                union_actual_orderings.len(),
-                union_expected_orderings.len(),
-                "{}",
-                err_msg
-            );
-            for expected in &union_expected_orderings {
-                assert!(union_actual_orderings.contains(expected), "{}", err_msg);
-            }
+            assert_eq_properties_same(union_eq_properties, &union_expected_eq, err_msg);
         }
         Ok(())
     }
+
+    fn assert_eq_properties_same(
+        lhs: &EquivalenceProperties,
+        rhs: &EquivalenceProperties,
+        err_msg: String,
+    ) {
+        // Check whether orderings are same.
+        let lhs_orderings = lhs.oeq_class();
+        let rhs_orderings = &rhs.oeq_class.orderings;
+        assert_eq!(lhs_orderings.len(), rhs_orderings.len(), "{}", err_msg);
+        for rhs_ordering in rhs_orderings {
+            assert!(lhs_orderings.contains(rhs_ordering), "{}", err_msg);
+        }
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index 51de40fb19723..1aeaf9b76d48a 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -1132,3 +1132,10 @@ physical_plan
 02)--ProjectionExec: expr=[CAST(inc_col@0 > desc_col@1 AS Int32) as c]
 03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[inc_col, desc_col], output_orderings=[[inc_col@0 ASC NULLS LAST], [desc_col@1 DESC]], has_header=true
+
+# Union a query with the actual data and one with a constant
+query I
+SELECT (SELECT c from ordered_table ORDER BY c LIMIT 1) UNION ALL (SELECT 23 as c from ordered_table ORDER BY c LIMIT 1) ORDER BY c;
+----
+0
+23

From d941dc3f0196e85520c6aa923edbcefcc8c7b265 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Tue, 23 Jul 2024 08:02:11 +0800
Subject: [PATCH 115/357] Migrate `OrderSensitiveArrayAgg` to be a user defined
 aggregate  (#11564)

* first draft

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm old agg

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* replace udaf with interal function - create aggregate with dfschema

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm useless

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rename

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 .../aggregate_statistics.rs                   |   1 +
 .../combine_partial_final_agg.rs              |   2 +
 datafusion/core/src/physical_planner.rs       |  35 +-
 .../core/tests/fuzz_cases/aggregate_fuzz.rs   |   1 +
 datafusion/expr/src/function.rs               |   8 +-
 .../src/approx_percentile_cont.rs             |   9 +-
 .../functions-aggregate/src/array_agg.rs      | 423 +++++++++++++-
 .../functions-aggregate/src/first_last.rs     |  11 +-
 .../functions-aggregate/src/nth_value.rs      |   7 +-
 datafusion/functions-aggregate/src/stddev.rs  |   6 +
 .../physical-expr-common/src/aggregate/mod.rs |  95 +++-
 .../physical-expr-common/src/sort_expr.rs     |  13 +-
 datafusion/physical-expr-common/src/utils.rs  |  23 +-
 .../src/aggregate/array_agg_ordered.rs        | 520 ------------------
 .../physical-expr/src/aggregate/build_in.rs   |  24 +-
 datafusion/physical-expr/src/aggregate/mod.rs |   5 +-
 .../physical-expr/src/expressions/mod.rs      |   1 -
 .../physical-plan/src/aggregates/mod.rs       | 125 ++++-
 datafusion/physical-plan/src/lib.rs           |   2 +-
 datafusion/physical-plan/src/windows/mod.rs   |   1 +
 datafusion/proto/src/physical_plan/mod.rs     |   2 +-
 .../proto/src/physical_plan/to_proto.rs       |  10 +-
 .../tests/cases/roundtrip_physical_plan.rs    |   9 +
 23 files changed, 681 insertions(+), 652 deletions(-)
 delete mode 100644 datafusion/physical-expr/src/aggregate/array_agg_ordered.rs

diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
index e412d814239d1..e7580d3e33ef2 100644
--- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
@@ -429,6 +429,7 @@ pub(crate) mod tests {
                 self.column_name(),
                 false,
                 false,
+                false,
             )
             .unwrap()
         }
diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index 29148a594f31c..ddb7d36fb5950 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -288,6 +288,7 @@ mod tests {
             name,
             false,
             false,
+            false,
         )
         .unwrap()
     }
@@ -378,6 +379,7 @@ mod tests {
             "Sum(b)",
             false,
             false,
+            false,
         )?];
         let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
             vec![(col("c", &schema)?, "c".to_string())];
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 97533cd5276a5..329d343f13fc1 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -1839,34 +1839,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                 .unwrap_or(sqlparser::ast::NullTreatment::RespectNulls)
                 == NullTreatment::IgnoreNulls;
 
-            // TODO: Remove this after array_agg are all udafs
             let (agg_expr, filter, order_by) = match func_def {
-                AggregateFunctionDefinition::UDF(udf)
-                    if udf.name() == "ARRAY_AGG" && order_by.is_some() =>
-                {
-                    // not yet support UDAF, fallback to builtin
-                    let physical_sort_exprs = match order_by {
-                        Some(exprs) => Some(create_physical_sort_exprs(
-                            exprs,
-                            logical_input_schema,
-                            execution_props,
-                        )?),
-                        None => None,
-                    };
-                    let ordering_reqs: Vec<PhysicalSortExpr> =
-                        physical_sort_exprs.clone().unwrap_or(vec![]);
-                    let fun = aggregates::AggregateFunction::ArrayAgg;
-                    let agg_expr = aggregates::create_aggregate_expr(
-                        &fun,
-                        *distinct,
-                        &physical_args,
-                        &ordering_reqs,
-                        physical_input_schema,
-                        name,
-                        ignore_nulls,
-                    )?;
-                    (agg_expr, filter, physical_sort_exprs)
-                }
                 AggregateFunctionDefinition::BuiltIn(fun) => {
                     let physical_sort_exprs = match order_by {
                         Some(exprs) => Some(create_physical_sort_exprs(
@@ -1899,19 +1872,23 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                         )?),
                         None => None,
                     };
+
                     let ordering_reqs: Vec<PhysicalSortExpr> =
                         physical_sort_exprs.clone().unwrap_or(vec![]);
-                    let agg_expr = udaf::create_aggregate_expr(
+
+                    let agg_expr = udaf::create_aggregate_expr_with_dfschema(
                         fun,
                         &physical_args,
                         args,
                         &sort_exprs,
                         &ordering_reqs,
-                        physical_input_schema,
+                        logical_input_schema,
                         name,
                         ignore_nulls,
                         *distinct,
+                        false,
                     )?;
+
                     (agg_expr, filter, physical_sort_exprs)
                 }
             };
diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index a04f4f3491225..736560da97db9 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -113,6 +113,7 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
         "sum1",
         false,
         false,
+        false,
     )
     .unwrap()];
     let expr = group_by_columns
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index 73ab51494de6d..d722e55de487c 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -20,7 +20,7 @@
 use crate::ColumnarValue;
 use crate::{Accumulator, Expr, PartitionEvaluator};
 use arrow::datatypes::{DataType, Field, Schema};
-use datafusion_common::Result;
+use datafusion_common::{DFSchema, Result};
 use std::sync::Arc;
 
 #[derive(Debug, Clone, Copy)]
@@ -57,6 +57,9 @@ pub struct AccumulatorArgs<'a> {
     /// The schema of the input arguments
     pub schema: &'a Schema,
 
+    /// The schema of the input arguments
+    pub dfschema: &'a DFSchema,
+
     /// Whether to ignore nulls.
     ///
     /// SQL allows the user to specify `IGNORE NULLS`, for example:
@@ -78,6 +81,9 @@ pub struct AccumulatorArgs<'a> {
     /// If no `ORDER BY` is specified, `sort_exprs`` will be empty.
     pub sort_exprs: &'a [Expr],
 
+    /// Whether the aggregation is running in reverse order
+    pub is_reversed: bool,
+
     /// The name of the aggregate expression
     pub name: &'a str,
 
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index bbe7d21e24866..dfb94a84cbecc 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -30,7 +30,8 @@ use arrow::{
 use arrow_schema::{Field, Schema};
 
 use datafusion_common::{
-    downcast_value, internal_err, not_impl_err, plan_err, DataFusionError, ScalarValue,
+    downcast_value, internal_err, not_impl_err, plan_err, DFSchema, DataFusionError,
+    ScalarValue,
 };
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
@@ -42,7 +43,7 @@ use datafusion_expr::{
 use datafusion_physical_expr_common::aggregate::tdigest::{
     TDigest, TryIntoF64, DEFAULT_MAX_SIZE,
 };
-use datafusion_physical_expr_common::utils::limited_convert_logical_expr_to_physical_expr;
+use datafusion_physical_expr_common::utils::limited_convert_logical_expr_to_physical_expr_with_dfschema;
 
 make_udaf_expr_and_func!(
     ApproxPercentileCont,
@@ -135,7 +136,9 @@ impl ApproxPercentileCont {
 fn get_lit_value(expr: &Expr) -> datafusion_common::Result<ScalarValue> {
     let empty_schema = Arc::new(Schema::empty());
     let empty_batch = RecordBatch::new_empty(Arc::clone(&empty_schema));
-    let expr = limited_convert_logical_expr_to_physical_expr(expr, &empty_schema)?;
+    let dfschema = DFSchema::empty();
+    let expr =
+        limited_convert_logical_expr_to_physical_expr_with_dfschema(expr, &dfschema)?;
     let result = expr.evaluate(&empty_batch)?;
     match result {
         ColumnarValue::Array(_) => Err(DataFusionError::Internal(format!(
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index 9ad453d7a4b22..777a242aa27e3 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -17,19 +17,25 @@
 
 //! `ARRAY_AGG` aggregate implementation: [`ArrayAgg`]
 
-use arrow::array::{Array, ArrayRef, AsArray};
+use arrow::array::{new_empty_array, Array, ArrayRef, AsArray, StructArray};
 use arrow::datatypes::DataType;
-use arrow_schema::Field;
 
+use arrow_schema::{Field, Fields};
 use datafusion_common::cast::as_list_array;
-use datafusion_common::utils::array_into_list_array_nullable;
-use datafusion_common::ScalarValue;
+use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx};
+use datafusion_common::{exec_err, ScalarValue};
 use datafusion_common::{internal_err, Result};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::AggregateUDFImpl;
 use datafusion_expr::{Accumulator, Signature, Volatility};
-use std::collections::HashSet;
+use datafusion_physical_expr_common::aggregate::merge_arrays::merge_ordered_arrays;
+use datafusion_physical_expr_common::aggregate::utils::ordering_fields;
+use datafusion_physical_expr_common::sort_expr::{
+    limited_convert_logical_sort_exprs_to_physical_with_dfschema, LexOrdering,
+    PhysicalSortExpr,
+};
+use std::collections::{HashSet, VecDeque};
 use std::sync::Arc;
 
 make_udaf_expr_and_func!(
@@ -91,11 +97,24 @@ impl AggregateUDFImpl for ArrayAgg {
             )]);
         }
 
-        Ok(vec![Field::new_list(
+        let mut fields = vec![Field::new_list(
             format_state_name(args.name, "array_agg"),
             Field::new("item", args.input_type.clone(), true),
             true,
-        )])
+        )];
+
+        if args.ordering_fields.is_empty() {
+            return Ok(fields);
+        }
+
+        let orderings = args.ordering_fields.to_vec();
+        fields.push(Field::new_list(
+            format_state_name(args.name, "array_agg_orderings"),
+            Field::new("item", DataType::Struct(Fields::from(orderings)), true),
+            false,
+        ));
+
+        Ok(fields)
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -105,7 +124,31 @@ impl AggregateUDFImpl for ArrayAgg {
             )?));
         }
 
-        Ok(Box::new(ArrayAggAccumulator::try_new(acc_args.input_type)?))
+        if acc_args.sort_exprs.is_empty() {
+            return Ok(Box::new(ArrayAggAccumulator::try_new(acc_args.input_type)?));
+        }
+
+        let ordering_req = limited_convert_logical_sort_exprs_to_physical_with_dfschema(
+            acc_args.sort_exprs,
+            acc_args.dfschema,
+        )?;
+
+        let ordering_dtypes = ordering_req
+            .iter()
+            .map(|e| e.expr.data_type(acc_args.schema))
+            .collect::<Result<Vec<_>>>()?;
+
+        OrderSensitiveArrayAggAccumulator::try_new(
+            acc_args.input_type,
+            &ordering_dtypes,
+            ordering_req,
+            acc_args.is_reversed,
+        )
+        .map(|acc| Box::new(acc) as _)
+    }
+
+    fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
+        datafusion_expr::ReversedUDAF::Reversed(array_agg_udaf())
     }
 }
 
@@ -259,3 +302,367 @@ impl Accumulator for DistinctArrayAggAccumulator {
             - std::mem::size_of_val(&self.datatype)
     }
 }
+
+/// Accumulator for a `ARRAY_AGG(... ORDER BY ..., ...)` aggregation. In a multi
+/// partition setting, partial aggregations are computed for every partition,
+/// and then their results are merged.
+#[derive(Debug)]
+pub(crate) struct OrderSensitiveArrayAggAccumulator {
+    /// Stores entries in the `ARRAY_AGG` result.
+    values: Vec<ScalarValue>,
+    /// Stores values of ordering requirement expressions corresponding to each
+    /// entry in `values`. This information is used when merging results from
+    /// different partitions. For detailed information how merging is done, see
+    /// [`merge_ordered_arrays`].
+    ordering_values: Vec<Vec<ScalarValue>>,
+    /// Stores datatypes of expressions inside values and ordering requirement
+    /// expressions.
+    datatypes: Vec<DataType>,
+    /// Stores the ordering requirement of the `Accumulator`.
+    ordering_req: LexOrdering,
+    /// Whether the aggregation is running in reverse.
+    reverse: bool,
+}
+
+impl OrderSensitiveArrayAggAccumulator {
+    /// Create a new order-sensitive ARRAY_AGG accumulator based on the given
+    /// item data type.
+    pub fn try_new(
+        datatype: &DataType,
+        ordering_dtypes: &[DataType],
+        ordering_req: LexOrdering,
+        reverse: bool,
+    ) -> Result<Self> {
+        let mut datatypes = vec![datatype.clone()];
+        datatypes.extend(ordering_dtypes.iter().cloned());
+        Ok(Self {
+            values: vec![],
+            ordering_values: vec![],
+            datatypes,
+            ordering_req,
+            reverse,
+        })
+    }
+}
+
+impl Accumulator for OrderSensitiveArrayAggAccumulator {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        if values.is_empty() {
+            return Ok(());
+        }
+
+        let n_row = values[0].len();
+        for index in 0..n_row {
+            let row = get_row_at_idx(values, index)?;
+            self.values.push(row[0].clone());
+            self.ordering_values.push(row[1..].to_vec());
+        }
+
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        if states.is_empty() {
+            return Ok(());
+        }
+
+        // First entry in the state is the aggregation result. Second entry
+        // stores values received for ordering requirement columns for each
+        // aggregation value inside `ARRAY_AGG` list. For each `StructArray`
+        // inside `ARRAY_AGG` list, we will receive an `Array` that stores values
+        // received from its ordering requirement expression. (This information
+        // is necessary for during merging).
+        let [array_agg_values, agg_orderings, ..] = &states else {
+            return exec_err!("State should have two elements");
+        };
+        let Some(agg_orderings) = agg_orderings.as_list_opt::<i32>() else {
+            return exec_err!("Expects to receive a list array");
+        };
+
+        // Stores ARRAY_AGG results coming from each partition
+        let mut partition_values = vec![];
+        // Stores ordering requirement expression results coming from each partition
+        let mut partition_ordering_values = vec![];
+
+        // Existing values should be merged also.
+        partition_values.push(self.values.clone().into());
+        partition_ordering_values.push(self.ordering_values.clone().into());
+
+        // Convert array to Scalars to sort them easily. Convert back to array at evaluation.
+        let array_agg_res = ScalarValue::convert_array_to_scalar_vec(array_agg_values)?;
+        for v in array_agg_res.into_iter() {
+            partition_values.push(v.into());
+        }
+
+        let orderings = ScalarValue::convert_array_to_scalar_vec(agg_orderings)?;
+
+        for partition_ordering_rows in orderings.into_iter() {
+            // Extract value from struct to ordering_rows for each group/partition
+            let ordering_value = partition_ordering_rows.into_iter().map(|ordering_row| {
+                    if let ScalarValue::Struct(s) = ordering_row {
+                        let mut ordering_columns_per_row = vec![];
+
+                        for column in s.columns() {
+                            let sv = ScalarValue::try_from_array(column, 0)?;
+                            ordering_columns_per_row.push(sv);
+                        }
+
+                        Ok(ordering_columns_per_row)
+                    } else {
+                        exec_err!(
+                            "Expects to receive ScalarValue::Struct(Arc<StructArray>) but got:{:?}",
+                            ordering_row.data_type()
+                        )
+                    }
+                }).collect::<Result<VecDeque<_>>>()?;
+
+            partition_ordering_values.push(ordering_value);
+        }
+
+        let sort_options = self
+            .ordering_req
+            .iter()
+            .map(|sort_expr| sort_expr.options)
+            .collect::<Vec<_>>();
+
+        (self.values, self.ordering_values) = merge_ordered_arrays(
+            &mut partition_values,
+            &mut partition_ordering_values,
+            &sort_options,
+        )?;
+
+        Ok(())
+    }
+
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        let mut result = vec![self.evaluate()?];
+        result.push(self.evaluate_orderings()?);
+
+        Ok(result)
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        if self.values.is_empty() {
+            return Ok(ScalarValue::new_null_list(
+                self.datatypes[0].clone(),
+                true,
+                1,
+            ));
+        }
+
+        let values = self.values.clone();
+        let array = if self.reverse {
+            ScalarValue::new_list_from_iter(
+                values.into_iter().rev(),
+                &self.datatypes[0],
+                true,
+            )
+        } else {
+            ScalarValue::new_list_from_iter(values.into_iter(), &self.datatypes[0], true)
+        };
+        Ok(ScalarValue::List(array))
+    }
+
+    fn size(&self) -> usize {
+        let mut total = std::mem::size_of_val(self)
+            + ScalarValue::size_of_vec(&self.values)
+            - std::mem::size_of_val(&self.values);
+
+        // Add size of the `self.ordering_values`
+        total +=
+            std::mem::size_of::<Vec<ScalarValue>>() * self.ordering_values.capacity();
+        for row in &self.ordering_values {
+            total += ScalarValue::size_of_vec(row) - std::mem::size_of_val(row);
+        }
+
+        // Add size of the `self.datatypes`
+        total += std::mem::size_of::<DataType>() * self.datatypes.capacity();
+        for dtype in &self.datatypes {
+            total += dtype.size() - std::mem::size_of_val(dtype);
+        }
+
+        // Add size of the `self.ordering_req`
+        total += std::mem::size_of::<PhysicalSortExpr>() * self.ordering_req.capacity();
+        // TODO: Calculate size of each `PhysicalSortExpr` more accurately.
+        total
+    }
+}
+
+impl OrderSensitiveArrayAggAccumulator {
+    fn evaluate_orderings(&self) -> Result<ScalarValue> {
+        let fields = ordering_fields(&self.ordering_req, &self.datatypes[1..]);
+        let num_columns = fields.len();
+        let struct_field = Fields::from(fields.clone());
+
+        let mut column_wise_ordering_values = vec![];
+        for i in 0..num_columns {
+            let column_values = self
+                .ordering_values
+                .iter()
+                .map(|x| x[i].clone())
+                .collect::<Vec<_>>();
+            let array = if column_values.is_empty() {
+                new_empty_array(fields[i].data_type())
+            } else {
+                ScalarValue::iter_to_array(column_values.into_iter())?
+            };
+            column_wise_ordering_values.push(array);
+        }
+
+        let ordering_array = StructArray::try_new(
+            struct_field.clone(),
+            column_wise_ordering_values,
+            None,
+        )?;
+        Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable(
+            Arc::new(ordering_array),
+        ))))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::collections::VecDeque;
+    use std::sync::Arc;
+
+    use arrow::array::Int64Array;
+    use arrow_schema::SortOptions;
+
+    use datafusion_common::utils::get_row_at_idx;
+    use datafusion_common::{Result, ScalarValue};
+
+    #[test]
+    fn test_merge_asc() -> Result<()> {
+        let lhs_arrays: Vec<ArrayRef> = vec![
+            Arc::new(Int64Array::from(vec![0, 0, 1, 1, 2])),
+            Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4])),
+        ];
+        let n_row = lhs_arrays[0].len();
+        let lhs_orderings = (0..n_row)
+            .map(|idx| get_row_at_idx(&lhs_arrays, idx))
+            .collect::<Result<VecDeque<_>>>()?;
+
+        let rhs_arrays: Vec<ArrayRef> = vec![
+            Arc::new(Int64Array::from(vec![0, 0, 1, 1, 2])),
+            Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4])),
+        ];
+        let n_row = rhs_arrays[0].len();
+        let rhs_orderings = (0..n_row)
+            .map(|idx| get_row_at_idx(&rhs_arrays, idx))
+            .collect::<Result<VecDeque<_>>>()?;
+        let sort_options = vec![
+            SortOptions {
+                descending: false,
+                nulls_first: false,
+            },
+            SortOptions {
+                descending: false,
+                nulls_first: false,
+            },
+        ];
+
+        let lhs_vals_arr = Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4])) as ArrayRef;
+        let lhs_vals = (0..lhs_vals_arr.len())
+            .map(|idx| ScalarValue::try_from_array(&lhs_vals_arr, idx))
+            .collect::<Result<VecDeque<_>>>()?;
+
+        let rhs_vals_arr = Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4])) as ArrayRef;
+        let rhs_vals = (0..rhs_vals_arr.len())
+            .map(|idx| ScalarValue::try_from_array(&rhs_vals_arr, idx))
+            .collect::<Result<VecDeque<_>>>()?;
+        let expected =
+            Arc::new(Int64Array::from(vec![0, 0, 1, 1, 2, 2, 3, 3, 4, 4])) as ArrayRef;
+        let expected_ts = vec![
+            Arc::new(Int64Array::from(vec![0, 0, 0, 0, 1, 1, 1, 1, 2, 2])) as ArrayRef,
+            Arc::new(Int64Array::from(vec![0, 0, 1, 1, 2, 2, 3, 3, 4, 4])) as ArrayRef,
+        ];
+
+        let (merged_vals, merged_ts) = merge_ordered_arrays(
+            &mut [lhs_vals, rhs_vals],
+            &mut [lhs_orderings, rhs_orderings],
+            &sort_options,
+        )?;
+        let merged_vals = ScalarValue::iter_to_array(merged_vals.into_iter())?;
+        let merged_ts = (0..merged_ts[0].len())
+            .map(|col_idx| {
+                ScalarValue::iter_to_array(
+                    (0..merged_ts.len())
+                        .map(|row_idx| merged_ts[row_idx][col_idx].clone()),
+                )
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        assert_eq!(&merged_vals, &expected);
+        assert_eq!(&merged_ts, &expected_ts);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_merge_desc() -> Result<()> {
+        let lhs_arrays: Vec<ArrayRef> = vec![
+            Arc::new(Int64Array::from(vec![2, 1, 1, 0, 0])),
+            Arc::new(Int64Array::from(vec![4, 3, 2, 1, 0])),
+        ];
+        let n_row = lhs_arrays[0].len();
+        let lhs_orderings = (0..n_row)
+            .map(|idx| get_row_at_idx(&lhs_arrays, idx))
+            .collect::<Result<VecDeque<_>>>()?;
+
+        let rhs_arrays: Vec<ArrayRef> = vec![
+            Arc::new(Int64Array::from(vec![2, 1, 1, 0, 0])),
+            Arc::new(Int64Array::from(vec![4, 3, 2, 1, 0])),
+        ];
+        let n_row = rhs_arrays[0].len();
+        let rhs_orderings = (0..n_row)
+            .map(|idx| get_row_at_idx(&rhs_arrays, idx))
+            .collect::<Result<VecDeque<_>>>()?;
+        let sort_options = vec![
+            SortOptions {
+                descending: true,
+                nulls_first: false,
+            },
+            SortOptions {
+                descending: true,
+                nulls_first: false,
+            },
+        ];
+
+        // Values (which will be merged) doesn't have to be ordered.
+        let lhs_vals_arr = Arc::new(Int64Array::from(vec![0, 1, 2, 1, 2])) as ArrayRef;
+        let lhs_vals = (0..lhs_vals_arr.len())
+            .map(|idx| ScalarValue::try_from_array(&lhs_vals_arr, idx))
+            .collect::<Result<VecDeque<_>>>()?;
+
+        let rhs_vals_arr = Arc::new(Int64Array::from(vec![0, 1, 2, 1, 2])) as ArrayRef;
+        let rhs_vals = (0..rhs_vals_arr.len())
+            .map(|idx| ScalarValue::try_from_array(&rhs_vals_arr, idx))
+            .collect::<Result<VecDeque<_>>>()?;
+        let expected =
+            Arc::new(Int64Array::from(vec![0, 0, 1, 1, 2, 2, 1, 1, 2, 2])) as ArrayRef;
+        let expected_ts = vec![
+            Arc::new(Int64Array::from(vec![2, 2, 1, 1, 1, 1, 0, 0, 0, 0])) as ArrayRef,
+            Arc::new(Int64Array::from(vec![4, 4, 3, 3, 2, 2, 1, 1, 0, 0])) as ArrayRef,
+        ];
+        let (merged_vals, merged_ts) = merge_ordered_arrays(
+            &mut [lhs_vals, rhs_vals],
+            &mut [lhs_orderings, rhs_orderings],
+            &sort_options,
+        )?;
+        let merged_vals = ScalarValue::iter_to_array(merged_vals.into_iter())?;
+        let merged_ts = (0..merged_ts[0].len())
+            .map(|col_idx| {
+                ScalarValue::iter_to_array(
+                    (0..merged_ts.len())
+                        .map(|row_idx| merged_ts[row_idx][col_idx].clone()),
+                )
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        assert_eq!(&merged_vals, &expected);
+        assert_eq!(&merged_ts, &expected_ts);
+        Ok(())
+    }
+}
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
index 0e619bacef824..ba11f7e91e070 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -36,7 +36,8 @@ use datafusion_expr::{
 };
 use datafusion_physical_expr_common::aggregate::utils::get_sort_options;
 use datafusion_physical_expr_common::sort_expr::{
-    limited_convert_logical_sort_exprs_to_physical, LexOrdering, PhysicalSortExpr,
+    limited_convert_logical_sort_exprs_to_physical_with_dfschema, LexOrdering,
+    PhysicalSortExpr,
 };
 
 create_func!(FirstValue, first_value_udaf);
@@ -116,9 +117,9 @@ impl AggregateUDFImpl for FirstValue {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let ordering_req = limited_convert_logical_sort_exprs_to_physical(
+        let ordering_req = limited_convert_logical_sort_exprs_to_physical_with_dfschema(
             acc_args.sort_exprs,
-            acc_args.schema,
+            acc_args.dfschema,
         )?;
 
         let ordering_dtypes = ordering_req
@@ -415,9 +416,9 @@ impl AggregateUDFImpl for LastValue {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let ordering_req = limited_convert_logical_sort_exprs_to_physical(
+        let ordering_req = limited_convert_logical_sort_exprs_to_physical_with_dfschema(
             acc_args.sort_exprs,
-            acc_args.schema,
+            acc_args.dfschema,
         )?;
 
         let ordering_dtypes = ordering_req
diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs
index 6719c673c55be..9bbd68c9bdf60 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -36,7 +36,8 @@ use datafusion_expr::{
 use datafusion_physical_expr_common::aggregate::merge_arrays::merge_ordered_arrays;
 use datafusion_physical_expr_common::aggregate::utils::ordering_fields;
 use datafusion_physical_expr_common::sort_expr::{
-    limited_convert_logical_sort_exprs_to_physical, LexOrdering, PhysicalSortExpr,
+    limited_convert_logical_sort_exprs_to_physical_with_dfschema, LexOrdering,
+    PhysicalSortExpr,
 };
 
 make_udaf_expr_and_func!(
@@ -111,9 +112,9 @@ impl AggregateUDFImpl for NthValueAgg {
             ),
         }?;
 
-        let ordering_req = limited_convert_logical_sort_exprs_to_physical(
+        let ordering_req = limited_convert_logical_sort_exprs_to_physical_with_dfschema(
             acc_args.sort_exprs,
-            acc_args.schema,
+            acc_args.dfschema,
         )?;
 
         let ordering_dtypes = ordering_req
diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs
index 42cf44f65d8fb..247962dc2ce11 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -273,6 +273,7 @@ mod tests {
 
     use arrow::{array::*, datatypes::*};
 
+    use datafusion_common::DFSchema;
     use datafusion_expr::AggregateUDF;
     use datafusion_physical_expr_common::aggregate::utils::get_accum_scalar_values_as_arrays;
     use datafusion_physical_expr_common::expressions::column::col;
@@ -324,13 +325,16 @@ mod tests {
         agg2: Arc<AggregateUDF>,
         schema: &Schema,
     ) -> Result<ScalarValue> {
+        let dfschema = DFSchema::empty();
         let args1 = AccumulatorArgs {
             data_type: &DataType::Float64,
             schema,
+            dfschema: &dfschema,
             ignore_nulls: false,
             sort_exprs: &[],
             name: "a",
             is_distinct: false,
+            is_reversed: false,
             input_type: &DataType::Float64,
             input_exprs: &[datafusion_expr::col("a")],
         };
@@ -338,10 +342,12 @@ mod tests {
         let args2 = AccumulatorArgs {
             data_type: &DataType::Float64,
             schema,
+            dfschema: &dfschema,
             ignore_nulls: false,
             sort_exprs: &[],
             name: "a",
             is_distinct: false,
+            is_reversed: false,
             input_type: &DataType::Float64,
             input_exprs: &[datafusion_expr::col("a")],
         };
diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
index 7a4a3a6cac4bb..05c7e1caed0e8 100644
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -23,7 +23,7 @@ pub mod tdigest;
 pub mod utils;
 
 use arrow::datatypes::{DataType, Field, Schema};
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::{not_impl_err, DFSchema, Result};
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::type_coercion::aggregates::check_arg_count;
 use datafusion_expr::ReversedUDAF;
@@ -51,6 +51,10 @@ use datafusion_expr::utils::AggregateOrderSensitivity;
 ///
 /// `input_exprs` and `sort_exprs` are used for customizing Accumulator as the arguments in `AccumulatorArgs`,
 /// if you don't need them it is fine to pass empty slice `&[]`.
+///
+/// `is_reversed` is used to indicate whether the aggregation is running in reverse order,
+/// it could be used to hint Accumulator to accumulate in the reversed order,
+/// you can just set to false if you are not reversing expression
 #[allow(clippy::too_many_arguments)]
 pub fn create_aggregate_expr(
     fun: &AggregateUDF,
@@ -62,6 +66,7 @@ pub fn create_aggregate_expr(
     name: impl Into<String>,
     ignore_nulls: bool,
     is_distinct: bool,
+    is_reversed: bool,
 ) -> Result<Arc<dyn AggregateExpr>> {
     debug_assert_eq!(sort_exprs.len(), ordering_req.len());
 
@@ -81,6 +86,61 @@ pub fn create_aggregate_expr(
         .map(|e| e.expr.data_type(schema))
         .collect::<Result<Vec<_>>>()?;
 
+    let ordering_fields = ordering_fields(ordering_req, &ordering_types);
+    let name = name.into();
+
+    Ok(Arc::new(AggregateFunctionExpr {
+        fun: fun.clone(),
+        args: input_phy_exprs.to_vec(),
+        logical_args: input_exprs.to_vec(),
+        data_type: fun.return_type(&input_exprs_types)?,
+        name,
+        schema: schema.clone(),
+        dfschema: DFSchema::empty(),
+        sort_exprs: sort_exprs.to_vec(),
+        ordering_req: ordering_req.to_vec(),
+        ignore_nulls,
+        ordering_fields,
+        is_distinct,
+        input_type: input_exprs_types[0].clone(),
+        is_reversed,
+    }))
+}
+
+#[allow(clippy::too_many_arguments)]
+// This is not for external usage, consider creating with `create_aggregate_expr` instead.
+pub fn create_aggregate_expr_with_dfschema(
+    fun: &AggregateUDF,
+    input_phy_exprs: &[Arc<dyn PhysicalExpr>],
+    input_exprs: &[Expr],
+    sort_exprs: &[Expr],
+    ordering_req: &[PhysicalSortExpr],
+    dfschema: &DFSchema,
+    name: impl Into<String>,
+    ignore_nulls: bool,
+    is_distinct: bool,
+    is_reversed: bool,
+) -> Result<Arc<dyn AggregateExpr>> {
+    debug_assert_eq!(sort_exprs.len(), ordering_req.len());
+
+    let schema: Schema = dfschema.into();
+
+    let input_exprs_types = input_phy_exprs
+        .iter()
+        .map(|arg| arg.data_type(&schema))
+        .collect::<Result<Vec<_>>>()?;
+
+    check_arg_count(
+        fun.name(),
+        &input_exprs_types,
+        &fun.signature().type_signature,
+    )?;
+
+    let ordering_types = ordering_req
+        .iter()
+        .map(|e| e.expr.data_type(&schema))
+        .collect::<Result<Vec<_>>>()?;
+
     let ordering_fields = ordering_fields(ordering_req, &ordering_types);
 
     Ok(Arc::new(AggregateFunctionExpr {
@@ -90,12 +150,14 @@ pub fn create_aggregate_expr(
         data_type: fun.return_type(&input_exprs_types)?,
         name: name.into(),
         schema: schema.clone(),
+        dfschema: dfschema.clone(),
         sort_exprs: sort_exprs.to_vec(),
         ordering_req: ordering_req.to_vec(),
         ignore_nulls,
         ordering_fields,
         is_distinct,
         input_type: input_exprs_types[0].clone(),
+        is_reversed,
     }))
 }
 
@@ -261,6 +323,7 @@ pub struct AggregateFunctionExpr {
     data_type: DataType,
     name: String,
     schema: Schema,
+    dfschema: DFSchema,
     // The logical order by expressions
     sort_exprs: Vec<Expr>,
     // The physical order by expressions
@@ -270,6 +333,7 @@ pub struct AggregateFunctionExpr {
     // fields used for order sensitive aggregation functions
     ordering_fields: Vec<Field>,
     is_distinct: bool,
+    is_reversed: bool,
     input_type: DataType,
 }
 
@@ -288,6 +352,11 @@ impl AggregateFunctionExpr {
     pub fn ignore_nulls(&self) -> bool {
         self.ignore_nulls
     }
+
+    /// Return if the aggregation is distinct
+    pub fn is_reversed(&self) -> bool {
+        self.is_reversed
+    }
 }
 
 impl AggregateExpr for AggregateFunctionExpr {
@@ -320,12 +389,14 @@ impl AggregateExpr for AggregateFunctionExpr {
         let acc_args = AccumulatorArgs {
             data_type: &self.data_type,
             schema: &self.schema,
+            dfschema: &self.dfschema,
             ignore_nulls: self.ignore_nulls,
             sort_exprs: &self.sort_exprs,
             is_distinct: self.is_distinct,
             input_type: &self.input_type,
             input_exprs: &self.logical_args,
             name: &self.name,
+            is_reversed: self.is_reversed,
         };
 
         self.fun.accumulator(acc_args)
@@ -335,12 +406,14 @@ impl AggregateExpr for AggregateFunctionExpr {
         let args = AccumulatorArgs {
             data_type: &self.data_type,
             schema: &self.schema,
+            dfschema: &self.dfschema,
             ignore_nulls: self.ignore_nulls,
             sort_exprs: &self.sort_exprs,
             is_distinct: self.is_distinct,
             input_type: &self.input_type,
             input_exprs: &self.logical_args,
             name: &self.name,
+            is_reversed: self.is_reversed,
         };
 
         let accumulator = self.fun.create_sliding_accumulator(args)?;
@@ -405,12 +478,14 @@ impl AggregateExpr for AggregateFunctionExpr {
         let args = AccumulatorArgs {
             data_type: &self.data_type,
             schema: &self.schema,
+            dfschema: &self.dfschema,
             ignore_nulls: self.ignore_nulls,
             sort_exprs: &self.sort_exprs,
             is_distinct: self.is_distinct,
             input_type: &self.input_type,
             input_exprs: &self.logical_args,
             name: &self.name,
+            is_reversed: self.is_reversed,
         };
         self.fun.groups_accumulator_supported(args)
     }
@@ -419,12 +494,14 @@ impl AggregateExpr for AggregateFunctionExpr {
         let args = AccumulatorArgs {
             data_type: &self.data_type,
             schema: &self.schema,
+            dfschema: &self.dfschema,
             ignore_nulls: self.ignore_nulls,
             sort_exprs: &self.sort_exprs,
             is_distinct: self.is_distinct,
             input_type: &self.input_type,
             input_exprs: &self.logical_args,
             name: &self.name,
+            is_reversed: self.is_reversed,
         };
         self.fun.create_groups_accumulator(args)
     }
@@ -462,16 +539,17 @@ impl AggregateExpr for AggregateFunctionExpr {
         else {
             return Ok(None);
         };
-        create_aggregate_expr(
+        create_aggregate_expr_with_dfschema(
             &updated_fn,
             &self.args,
             &self.logical_args,
             &self.sort_exprs,
             &self.ordering_req,
-            &self.schema,
+            &self.dfschema,
             self.name(),
             self.ignore_nulls,
             self.is_distinct,
+            self.is_reversed,
         )
         .map(Some)
     }
@@ -495,18 +573,23 @@ impl AggregateExpr for AggregateFunctionExpr {
                     })
                     .collect::<Vec<_>>();
                 let mut name = self.name().to_string();
-                replace_order_by_clause(&mut name);
+                // TODO: Generalize order-by clause rewrite
+                if reverse_udf.name() == "ARRAY_AGG" {
+                } else {
+                    replace_order_by_clause(&mut name);
+                }
                 replace_fn_name_clause(&mut name, self.fun.name(), reverse_udf.name());
-                let reverse_aggr = create_aggregate_expr(
+                let reverse_aggr = create_aggregate_expr_with_dfschema(
                     &reverse_udf,
                     &self.args,
                     &self.logical_args,
                     &reverse_sort_exprs,
                     &reverse_ordering_req,
-                    &self.schema,
+                    &self.dfschema,
                     name,
                     self.ignore_nulls,
                     self.is_distinct,
+                    !self.is_reversed,
                 )
                 .unwrap();
 
diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs
index 8fb1356a80926..2b506b74216f4 100644
--- a/datafusion/physical-expr-common/src/sort_expr.rs
+++ b/datafusion/physical-expr-common/src/sort_expr.rs
@@ -22,12 +22,12 @@ use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
 use crate::physical_expr::PhysicalExpr;
-use crate::utils::limited_convert_logical_expr_to_physical_expr;
+use crate::utils::limited_convert_logical_expr_to_physical_expr_with_dfschema;
 
 use arrow::compute::kernels::sort::{SortColumn, SortOptions};
 use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{exec_err, DFSchema, Result};
 use datafusion_expr::{ColumnarValue, Expr};
 
 /// Represents Sort operation for a column in a RecordBatch
@@ -275,9 +275,9 @@ pub type LexRequirementRef<'a> = &'a [PhysicalSortRequirement];
 
 /// Converts each [`Expr::Sort`] into a corresponding [`PhysicalSortExpr`].
 /// Returns an error if the given logical expression is not a [`Expr::Sort`].
-pub fn limited_convert_logical_sort_exprs_to_physical(
+pub fn limited_convert_logical_sort_exprs_to_physical_with_dfschema(
     exprs: &[Expr],
-    schema: &Schema,
+    dfschema: &DFSchema,
 ) -> Result<Vec<PhysicalSortExpr>> {
     // Construct PhysicalSortExpr objects from Expr objects:
     let mut sort_exprs = vec![];
@@ -286,7 +286,10 @@ pub fn limited_convert_logical_sort_exprs_to_physical(
             return exec_err!("Expects to receive sort expression");
         };
         sort_exprs.push(PhysicalSortExpr::new(
-            limited_convert_logical_expr_to_physical_expr(sort.expr.as_ref(), schema)?,
+            limited_convert_logical_expr_to_physical_expr_with_dfschema(
+                sort.expr.as_ref(),
+                dfschema,
+            )?,
             SortOptions {
                 descending: !sort.asc,
                 nulls_first: sort.nulls_first,
diff --git a/datafusion/physical-expr-common/src/utils.rs b/datafusion/physical-expr-common/src/utils.rs
index 44622bd309df8..0978a906a5dc6 100644
--- a/datafusion/physical-expr-common/src/utils.rs
+++ b/datafusion/physical-expr-common/src/utils.rs
@@ -19,15 +19,15 @@ use std::sync::Arc;
 
 use arrow::array::{make_array, Array, ArrayRef, BooleanArray, MutableArrayData};
 use arrow::compute::{and_kleene, is_not_null, SlicesIterator};
-use arrow::datatypes::Schema;
 
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{exec_err, DFSchema, Result};
 use datafusion_expr::expr::Alias;
 use datafusion_expr::sort_properties::ExprProperties;
 use datafusion_expr::Expr;
 
+use crate::expressions::column::Column;
 use crate::expressions::literal::Literal;
-use crate::expressions::{self, CastExpr};
+use crate::expressions::CastExpr;
 use crate::physical_expr::PhysicalExpr;
 use crate::sort_expr::PhysicalSortExpr;
 use crate::tree_node::ExprContext;
@@ -110,19 +110,22 @@ pub fn reverse_order_bys(order_bys: &[PhysicalSortExpr]) -> Vec<PhysicalSortExpr
 
 /// Converts `datafusion_expr::Expr` into corresponding `Arc<dyn PhysicalExpr>`.
 /// If conversion is not supported yet, returns Error.
-pub fn limited_convert_logical_expr_to_physical_expr(
+pub fn limited_convert_logical_expr_to_physical_expr_with_dfschema(
     expr: &Expr,
-    schema: &Schema,
+    dfschema: &DFSchema,
 ) -> Result<Arc<dyn PhysicalExpr>> {
     match expr {
-        Expr::Alias(Alias { expr, .. }) => {
-            Ok(limited_convert_logical_expr_to_physical_expr(expr, schema)?)
+        Expr::Alias(Alias { expr, .. }) => Ok(
+            limited_convert_logical_expr_to_physical_expr_with_dfschema(expr, dfschema)?,
+        ),
+        Expr::Column(col) => {
+            let idx = dfschema.index_of_column(col)?;
+            Ok(Arc::new(Column::new(&col.name, idx)))
         }
-        Expr::Column(col) => expressions::column::col(&col.name, schema),
         Expr::Cast(cast_expr) => Ok(Arc::new(CastExpr::new(
-            limited_convert_logical_expr_to_physical_expr(
+            limited_convert_logical_expr_to_physical_expr_with_dfschema(
                 cast_expr.expr.as_ref(),
-                schema,
+                dfschema,
             )?,
             cast_expr.data_type.clone(),
             None,
diff --git a/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs b/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
deleted file mode 100644
index 992c06f5bf628..0000000000000
--- a/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
+++ /dev/null
@@ -1,520 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions which specify ordering requirement
-//! that can evaluated at runtime during query execution
-
-use std::any::Any;
-use std::collections::VecDeque;
-use std::fmt::Debug;
-use std::sync::Arc;
-
-use crate::aggregate::utils::{down_cast_any_ref, ordering_fields};
-use crate::expressions::format_state_name;
-use crate::{
-    reverse_order_bys, AggregateExpr, LexOrdering, PhysicalExpr, PhysicalSortExpr,
-};
-
-use arrow::datatypes::{DataType, Field};
-use arrow_array::cast::AsArray;
-use arrow_array::{new_empty_array, Array, ArrayRef, StructArray};
-use arrow_schema::Fields;
-use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx};
-use datafusion_common::{exec_err, Result, ScalarValue};
-use datafusion_expr::utils::AggregateOrderSensitivity;
-use datafusion_expr::Accumulator;
-use datafusion_physical_expr_common::aggregate::merge_arrays::merge_ordered_arrays;
-
-/// Expression for a `ARRAY_AGG(... ORDER BY ..., ...)` aggregation. In a multi
-/// partition setting, partial aggregations are computed for every partition,
-/// and then their results are merged.
-#[derive(Debug)]
-pub struct OrderSensitiveArrayAgg {
-    /// Column name
-    name: String,
-    /// The `DataType` for the input expression
-    input_data_type: DataType,
-    /// The input expression
-    expr: Arc<dyn PhysicalExpr>,
-    /// Ordering data types
-    order_by_data_types: Vec<DataType>,
-    /// Ordering requirement
-    ordering_req: LexOrdering,
-    /// Whether the aggregation is running in reverse
-    reverse: bool,
-}
-
-impl OrderSensitiveArrayAgg {
-    /// Create a new `OrderSensitiveArrayAgg` aggregate function
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        name: impl Into<String>,
-        input_data_type: DataType,
-        order_by_data_types: Vec<DataType>,
-        ordering_req: LexOrdering,
-    ) -> Self {
-        Self {
-            name: name.into(),
-            input_data_type,
-            expr,
-            order_by_data_types,
-            ordering_req,
-            reverse: false,
-        }
-    }
-}
-
-impl AggregateExpr for OrderSensitiveArrayAgg {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new_list(
-            &self.name,
-            // This should be the same as return type of AggregateFunction::OrderSensitiveArrayAgg
-            Field::new("item", self.input_data_type.clone(), true),
-            true,
-        ))
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        OrderSensitiveArrayAggAccumulator::try_new(
-            &self.input_data_type,
-            &self.order_by_data_types,
-            self.ordering_req.clone(),
-            self.reverse,
-        )
-        .map(|acc| Box::new(acc) as _)
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        let mut fields = vec![Field::new_list(
-            format_state_name(&self.name, "array_agg"),
-            Field::new("item", self.input_data_type.clone(), true),
-            true, // This should be the same as field()
-        )];
-        let orderings = ordering_fields(&self.ordering_req, &self.order_by_data_types);
-        fields.push(Field::new_list(
-            format_state_name(&self.name, "array_agg_orderings"),
-            Field::new("item", DataType::Struct(Fields::from(orderings)), true),
-            false,
-        ));
-        Ok(fields)
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![Arc::clone(&self.expr)]
-    }
-
-    fn order_bys(&self) -> Option<&[PhysicalSortExpr]> {
-        (!self.ordering_req.is_empty()).then_some(&self.ordering_req)
-    }
-
-    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
-        AggregateOrderSensitivity::HardRequirement
-    }
-
-    fn name(&self) -> &str {
-        &self.name
-    }
-
-    fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
-        Some(Arc::new(Self {
-            name: self.name.to_string(),
-            input_data_type: self.input_data_type.clone(),
-            expr: Arc::clone(&self.expr),
-            order_by_data_types: self.order_by_data_types.clone(),
-            // Reverse requirement:
-            ordering_req: reverse_order_bys(&self.ordering_req),
-            reverse: !self.reverse,
-        }))
-    }
-}
-
-impl PartialEq<dyn Any> for OrderSensitiveArrayAgg {
-    fn eq(&self, other: &dyn Any) -> bool {
-        down_cast_any_ref(other)
-            .downcast_ref::<Self>()
-            .map(|x| {
-                self.name == x.name
-                    && self.input_data_type == x.input_data_type
-                    && self.order_by_data_types == x.order_by_data_types
-                    && self.expr.eq(&x.expr)
-            })
-            .unwrap_or(false)
-    }
-}
-
-#[derive(Debug)]
-pub(crate) struct OrderSensitiveArrayAggAccumulator {
-    /// Stores entries in the `ARRAY_AGG` result.
-    values: Vec<ScalarValue>,
-    /// Stores values of ordering requirement expressions corresponding to each
-    /// entry in `values`. This information is used when merging results from
-    /// different partitions. For detailed information how merging is done, see
-    /// [`merge_ordered_arrays`].
-    ordering_values: Vec<Vec<ScalarValue>>,
-    /// Stores datatypes of expressions inside values and ordering requirement
-    /// expressions.
-    datatypes: Vec<DataType>,
-    /// Stores the ordering requirement of the `Accumulator`.
-    ordering_req: LexOrdering,
-    /// Whether the aggregation is running in reverse.
-    reverse: bool,
-}
-
-impl OrderSensitiveArrayAggAccumulator {
-    /// Create a new order-sensitive ARRAY_AGG accumulator based on the given
-    /// item data type.
-    pub fn try_new(
-        datatype: &DataType,
-        ordering_dtypes: &[DataType],
-        ordering_req: LexOrdering,
-        reverse: bool,
-    ) -> Result<Self> {
-        let mut datatypes = vec![datatype.clone()];
-        datatypes.extend(ordering_dtypes.iter().cloned());
-        Ok(Self {
-            values: vec![],
-            ordering_values: vec![],
-            datatypes,
-            ordering_req,
-            reverse,
-        })
-    }
-}
-
-impl Accumulator for OrderSensitiveArrayAggAccumulator {
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-
-        let n_row = values[0].len();
-        for index in 0..n_row {
-            let row = get_row_at_idx(values, index)?;
-            self.values.push(row[0].clone());
-            self.ordering_values.push(row[1..].to_vec());
-        }
-
-        Ok(())
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        if states.is_empty() {
-            return Ok(());
-        }
-
-        // First entry in the state is the aggregation result. Second entry
-        // stores values received for ordering requirement columns for each
-        // aggregation value inside `ARRAY_AGG` list. For each `StructArray`
-        // inside `ARRAY_AGG` list, we will receive an `Array` that stores values
-        // received from its ordering requirement expression. (This information
-        // is necessary for during merging).
-        let [array_agg_values, agg_orderings, ..] = &states else {
-            return exec_err!("State should have two elements");
-        };
-        let Some(agg_orderings) = agg_orderings.as_list_opt::<i32>() else {
-            return exec_err!("Expects to receive a list array");
-        };
-
-        // Stores ARRAY_AGG results coming from each partition
-        let mut partition_values = vec![];
-        // Stores ordering requirement expression results coming from each partition
-        let mut partition_ordering_values = vec![];
-
-        // Existing values should be merged also.
-        partition_values.push(self.values.clone().into());
-        partition_ordering_values.push(self.ordering_values.clone().into());
-
-        // Convert array to Scalars to sort them easily. Convert back to array at evaluation.
-        let array_agg_res = ScalarValue::convert_array_to_scalar_vec(array_agg_values)?;
-        for v in array_agg_res.into_iter() {
-            partition_values.push(v.into());
-        }
-
-        let orderings = ScalarValue::convert_array_to_scalar_vec(agg_orderings)?;
-
-        for partition_ordering_rows in orderings.into_iter() {
-            // Extract value from struct to ordering_rows for each group/partition
-            let ordering_value = partition_ordering_rows.into_iter().map(|ordering_row| {
-                    if let ScalarValue::Struct(s) = ordering_row {
-                        let mut ordering_columns_per_row = vec![];
-
-                        for column in s.columns() {
-                            let sv = ScalarValue::try_from_array(column, 0)?;
-                            ordering_columns_per_row.push(sv);
-                        }
-
-                        Ok(ordering_columns_per_row)
-                    } else {
-                        exec_err!(
-                            "Expects to receive ScalarValue::Struct(Arc<StructArray>) but got:{:?}",
-                            ordering_row.data_type()
-                        )
-                    }
-                }).collect::<Result<VecDeque<_>>>()?;
-
-            partition_ordering_values.push(ordering_value);
-        }
-
-        let sort_options = self
-            .ordering_req
-            .iter()
-            .map(|sort_expr| sort_expr.options)
-            .collect::<Vec<_>>();
-
-        (self.values, self.ordering_values) = merge_ordered_arrays(
-            &mut partition_values,
-            &mut partition_ordering_values,
-            &sort_options,
-        )?;
-
-        Ok(())
-    }
-
-    fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        let mut result = vec![self.evaluate()?];
-        result.push(self.evaluate_orderings()?);
-        Ok(result)
-    }
-
-    fn evaluate(&mut self) -> Result<ScalarValue> {
-        if self.values.is_empty() {
-            return Ok(ScalarValue::new_null_list(
-                self.datatypes[0].clone(),
-                true,
-                1,
-            ));
-        }
-
-        let values = self.values.clone();
-        let array = if self.reverse {
-            ScalarValue::new_list_from_iter(
-                values.into_iter().rev(),
-                &self.datatypes[0],
-                true,
-            )
-        } else {
-            ScalarValue::new_list_from_iter(values.into_iter(), &self.datatypes[0], true)
-        };
-        Ok(ScalarValue::List(array))
-    }
-
-    fn size(&self) -> usize {
-        let mut total = std::mem::size_of_val(self)
-            + ScalarValue::size_of_vec(&self.values)
-            - std::mem::size_of_val(&self.values);
-
-        // Add size of the `self.ordering_values`
-        total +=
-            std::mem::size_of::<Vec<ScalarValue>>() * self.ordering_values.capacity();
-        for row in &self.ordering_values {
-            total += ScalarValue::size_of_vec(row) - std::mem::size_of_val(row);
-        }
-
-        // Add size of the `self.datatypes`
-        total += std::mem::size_of::<DataType>() * self.datatypes.capacity();
-        for dtype in &self.datatypes {
-            total += dtype.size() - std::mem::size_of_val(dtype);
-        }
-
-        // Add size of the `self.ordering_req`
-        total += std::mem::size_of::<PhysicalSortExpr>() * self.ordering_req.capacity();
-        // TODO: Calculate size of each `PhysicalSortExpr` more accurately.
-        total
-    }
-}
-
-impl OrderSensitiveArrayAggAccumulator {
-    fn evaluate_orderings(&self) -> Result<ScalarValue> {
-        let fields = ordering_fields(&self.ordering_req, &self.datatypes[1..]);
-        let num_columns = fields.len();
-        let struct_field = Fields::from(fields.clone());
-
-        let mut column_wise_ordering_values = vec![];
-        for i in 0..num_columns {
-            let column_values = self
-                .ordering_values
-                .iter()
-                .map(|x| x[i].clone())
-                .collect::<Vec<_>>();
-            let array = if column_values.is_empty() {
-                new_empty_array(fields[i].data_type())
-            } else {
-                ScalarValue::iter_to_array(column_values.into_iter())?
-            };
-            column_wise_ordering_values.push(array);
-        }
-
-        let ordering_array = StructArray::try_new(
-            struct_field.clone(),
-            column_wise_ordering_values,
-            None,
-        )?;
-        Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable(
-            Arc::new(ordering_array),
-        ))))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::VecDeque;
-    use std::sync::Arc;
-
-    use crate::aggregate::array_agg_ordered::merge_ordered_arrays;
-
-    use arrow_array::{Array, ArrayRef, Int64Array};
-    use arrow_schema::SortOptions;
-    use datafusion_common::utils::get_row_at_idx;
-    use datafusion_common::{Result, ScalarValue};
-
-    #[test]
-    fn test_merge_asc() -> Result<()> {
-        let lhs_arrays: Vec<ArrayRef> = vec![
-            Arc::new(Int64Array::from(vec![0, 0, 1, 1, 2])),
-            Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4])),
-        ];
-        let n_row = lhs_arrays[0].len();
-        let lhs_orderings = (0..n_row)
-            .map(|idx| get_row_at_idx(&lhs_arrays, idx))
-            .collect::<Result<VecDeque<_>>>()?;
-
-        let rhs_arrays: Vec<ArrayRef> = vec![
-            Arc::new(Int64Array::from(vec![0, 0, 1, 1, 2])),
-            Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4])),
-        ];
-        let n_row = rhs_arrays[0].len();
-        let rhs_orderings = (0..n_row)
-            .map(|idx| get_row_at_idx(&rhs_arrays, idx))
-            .collect::<Result<VecDeque<_>>>()?;
-        let sort_options = vec![
-            SortOptions {
-                descending: false,
-                nulls_first: false,
-            },
-            SortOptions {
-                descending: false,
-                nulls_first: false,
-            },
-        ];
-
-        let lhs_vals_arr = Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4])) as ArrayRef;
-        let lhs_vals = (0..lhs_vals_arr.len())
-            .map(|idx| ScalarValue::try_from_array(&lhs_vals_arr, idx))
-            .collect::<Result<VecDeque<_>>>()?;
-
-        let rhs_vals_arr = Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4])) as ArrayRef;
-        let rhs_vals = (0..rhs_vals_arr.len())
-            .map(|idx| ScalarValue::try_from_array(&rhs_vals_arr, idx))
-            .collect::<Result<VecDeque<_>>>()?;
-        let expected =
-            Arc::new(Int64Array::from(vec![0, 0, 1, 1, 2, 2, 3, 3, 4, 4])) as ArrayRef;
-        let expected_ts = vec![
-            Arc::new(Int64Array::from(vec![0, 0, 0, 0, 1, 1, 1, 1, 2, 2])) as ArrayRef,
-            Arc::new(Int64Array::from(vec![0, 0, 1, 1, 2, 2, 3, 3, 4, 4])) as ArrayRef,
-        ];
-
-        let (merged_vals, merged_ts) = merge_ordered_arrays(
-            &mut [lhs_vals, rhs_vals],
-            &mut [lhs_orderings, rhs_orderings],
-            &sort_options,
-        )?;
-        let merged_vals = ScalarValue::iter_to_array(merged_vals.into_iter())?;
-        let merged_ts = (0..merged_ts[0].len())
-            .map(|col_idx| {
-                ScalarValue::iter_to_array(
-                    (0..merged_ts.len())
-                        .map(|row_idx| merged_ts[row_idx][col_idx].clone()),
-                )
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        assert_eq!(&merged_vals, &expected);
-        assert_eq!(&merged_ts, &expected_ts);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_merge_desc() -> Result<()> {
-        let lhs_arrays: Vec<ArrayRef> = vec![
-            Arc::new(Int64Array::from(vec![2, 1, 1, 0, 0])),
-            Arc::new(Int64Array::from(vec![4, 3, 2, 1, 0])),
-        ];
-        let n_row = lhs_arrays[0].len();
-        let lhs_orderings = (0..n_row)
-            .map(|idx| get_row_at_idx(&lhs_arrays, idx))
-            .collect::<Result<VecDeque<_>>>()?;
-
-        let rhs_arrays: Vec<ArrayRef> = vec![
-            Arc::new(Int64Array::from(vec![2, 1, 1, 0, 0])),
-            Arc::new(Int64Array::from(vec![4, 3, 2, 1, 0])),
-        ];
-        let n_row = rhs_arrays[0].len();
-        let rhs_orderings = (0..n_row)
-            .map(|idx| get_row_at_idx(&rhs_arrays, idx))
-            .collect::<Result<VecDeque<_>>>()?;
-        let sort_options = vec![
-            SortOptions {
-                descending: true,
-                nulls_first: false,
-            },
-            SortOptions {
-                descending: true,
-                nulls_first: false,
-            },
-        ];
-
-        // Values (which will be merged) doesn't have to be ordered.
-        let lhs_vals_arr = Arc::new(Int64Array::from(vec![0, 1, 2, 1, 2])) as ArrayRef;
-        let lhs_vals = (0..lhs_vals_arr.len())
-            .map(|idx| ScalarValue::try_from_array(&lhs_vals_arr, idx))
-            .collect::<Result<VecDeque<_>>>()?;
-
-        let rhs_vals_arr = Arc::new(Int64Array::from(vec![0, 1, 2, 1, 2])) as ArrayRef;
-        let rhs_vals = (0..rhs_vals_arr.len())
-            .map(|idx| ScalarValue::try_from_array(&rhs_vals_arr, idx))
-            .collect::<Result<VecDeque<_>>>()?;
-        let expected =
-            Arc::new(Int64Array::from(vec![0, 0, 1, 1, 2, 2, 1, 1, 2, 2])) as ArrayRef;
-        let expected_ts = vec![
-            Arc::new(Int64Array::from(vec![2, 2, 1, 1, 1, 1, 0, 0, 0, 0])) as ArrayRef,
-            Arc::new(Int64Array::from(vec![4, 4, 3, 3, 2, 2, 1, 1, 0, 0])) as ArrayRef,
-        ];
-        let (merged_vals, merged_ts) = merge_ordered_arrays(
-            &mut [lhs_vals, rhs_vals],
-            &mut [lhs_orderings, rhs_orderings],
-            &sort_options,
-        )?;
-        let merged_vals = ScalarValue::iter_to_array(merged_vals.into_iter())?;
-        let merged_ts = (0..merged_ts[0].len())
-            .map(|col_idx| {
-                ScalarValue::iter_to_array(
-                    (0..merged_ts.len())
-                        .map(|row_idx| merged_ts[row_idx][col_idx].clone()),
-                )
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        assert_eq!(&merged_vals, &expected);
-        assert_eq!(&merged_ts, &expected_ts);
-        Ok(())
-    }
-}
diff --git a/datafusion/physical-expr/src/aggregate/build_in.rs b/datafusion/physical-expr/src/aggregate/build_in.rs
index 9c270561f37d2..27c1533d05525 100644
--- a/datafusion/physical-expr/src/aggregate/build_in.rs
+++ b/datafusion/physical-expr/src/aggregate/build_in.rs
@@ -42,7 +42,7 @@ pub fn create_aggregate_expr(
     fun: &AggregateFunction,
     distinct: bool,
     input_phy_exprs: &[Arc<dyn PhysicalExpr>],
-    ordering_req: &[PhysicalSortExpr],
+    _ordering_req: &[PhysicalSortExpr],
     input_schema: &Schema,
     name: impl Into<String>,
     _ignore_nulls: bool,
@@ -54,29 +54,9 @@ pub fn create_aggregate_expr(
         .map(|e| e.data_type(input_schema))
         .collect::<Result<Vec<_>>>()?;
     let data_type = input_phy_types[0].clone();
-    let ordering_types = ordering_req
-        .iter()
-        .map(|e| e.expr.data_type(input_schema))
-        .collect::<Result<Vec<_>>>()?;
     let input_phy_exprs = input_phy_exprs.to_vec();
     Ok(match (fun, distinct) {
-        (AggregateFunction::ArrayAgg, _) => {
-            let expr = Arc::clone(&input_phy_exprs[0]);
-
-            if ordering_req.is_empty() {
-                return internal_err!(
-                    "ArrayAgg without ordering should be handled as UDAF"
-                );
-            } else {
-                Arc::new(expressions::OrderSensitiveArrayAgg::new(
-                    expr,
-                    name,
-                    data_type,
-                    ordering_types,
-                    ordering_req.to_vec(),
-                ))
-            }
-        }
+        (AggregateFunction::ArrayAgg, _) => return internal_err!("not reachable"),
         (AggregateFunction::Min, _) => Arc::new(expressions::Min::new(
             Arc::clone(&input_phy_exprs[0]),
             name,
diff --git a/datafusion/physical-expr/src/aggregate/mod.rs b/datafusion/physical-expr/src/aggregate/mod.rs
index 749cf2be7297c..264c485130505 100644
--- a/datafusion/physical-expr/src/aggregate/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/mod.rs
@@ -15,9 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub use datafusion_physical_expr_common::aggregate::AggregateExpr;
-
-pub(crate) mod array_agg_ordered;
 #[macro_use]
 pub(crate) mod min_max;
 pub(crate) mod groups_accumulator;
@@ -31,3 +28,5 @@ pub mod utils {
         get_sort_options, ordering_fields, DecimalAverager, Hashable,
     };
 }
+
+pub use datafusion_physical_expr_common::aggregate::AggregateExpr;
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index 5a2bcb63b18e6..7cbe4e7968445 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -34,7 +34,6 @@ mod unknown_column;
 pub mod helpers {
     pub use crate::aggregate::min_max::{max, min};
 }
-pub use crate::aggregate::array_agg_ordered::OrderSensitiveArrayAgg;
 pub use crate::aggregate::build_in::create_aggregate_expr;
 pub use crate::aggregate::min_max::{Max, MaxAccumulator, Min, MinAccumulator};
 pub use crate::aggregate::stats::StatsType;
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 4146dda7641d4..e7cd5cb2725be 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -1194,22 +1194,25 @@ mod tests {
     use arrow::datatypes::DataType;
     use arrow_array::{Float32Array, Int32Array};
     use datafusion_common::{
-        assert_batches_eq, assert_batches_sorted_eq, internal_err, DataFusionError,
-        ScalarValue,
+        assert_batches_eq, assert_batches_sorted_eq, internal_err, DFSchema, DFSchemaRef,
+        DataFusionError, ScalarValue,
     };
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::memory_pool::FairSpillPool;
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
     use datafusion_expr::expr::Sort;
+    use datafusion_functions_aggregate::array_agg::array_agg_udaf;
     use datafusion_functions_aggregate::average::avg_udaf;
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_functions_aggregate::first_last::{FirstValue, LastValue};
     use datafusion_functions_aggregate::median::median_udaf;
-    use datafusion_physical_expr::expressions::{lit, OrderSensitiveArrayAgg};
+    use datafusion_physical_expr::expressions::lit;
     use datafusion_physical_expr::PhysicalSortExpr;
 
     use crate::common::collect;
-    use datafusion_physical_expr_common::aggregate::create_aggregate_expr;
+    use datafusion_physical_expr_common::aggregate::{
+        create_aggregate_expr, create_aggregate_expr_with_dfschema,
+    };
     use datafusion_physical_expr_common::expressions::Literal;
     use futures::{FutureExt, Stream};
 
@@ -1258,19 +1261,22 @@ mod tests {
     }
 
     /// Generates some mock data for aggregate tests.
-    fn some_data_v2() -> (Arc<Schema>, Vec<RecordBatch>) {
+    fn some_data_v2() -> (Arc<Schema>, DFSchemaRef, Vec<RecordBatch>) {
         // Define a schema:
         let schema = Arc::new(Schema::new(vec![
             Field::new("a", DataType::UInt32, false),
             Field::new("b", DataType::Float64, false),
         ]));
 
+        let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
+
         // Generate data so that first and last value results are at 2nd and
         // 3rd partitions.  With this construction, we guarantee we don't receive
         // the expected result by accident, but merging actually works properly;
         // i.e. it doesn't depend on the data insertion order.
         (
             Arc::clone(&schema),
+            Arc::new(df_schema),
             vec![
                 RecordBatch::try_new(
                     Arc::clone(&schema),
@@ -1355,6 +1361,7 @@ mod tests {
             "COUNT(1)",
             false,
             false,
+            false,
         )?];
 
         let task_ctx = if spill {
@@ -1504,6 +1511,7 @@ mod tests {
             "AVG(b)",
             false,
             false,
+            false,
         )?];
 
         let task_ctx = if spill {
@@ -1808,6 +1816,7 @@ mod tests {
             "MEDIAN(a)",
             false,
             false,
+            false,
         )
     }
 
@@ -1844,6 +1853,7 @@ mod tests {
             "AVG(b)",
             false,
             false,
+            false,
         )?];
 
         for (version, groups, aggregates) in [
@@ -1908,6 +1918,7 @@ mod tests {
             "AVG(a)",
             false,
             false,
+            false,
         )?];
 
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
@@ -1952,6 +1963,7 @@ mod tests {
             "AVG(b)",
             false,
             false,
+            false,
         )?];
 
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
@@ -1996,12 +2008,11 @@ mod tests {
     // FIRST_VALUE(b ORDER BY b <SortOptions>)
     fn test_first_value_agg_expr(
         schema: &Schema,
+        dfschema: &DFSchema,
         sort_options: SortOptions,
     ) -> Result<Arc<dyn AggregateExpr>> {
         let sort_exprs = vec![datafusion_expr::Expr::Sort(Sort {
-            expr: Box::new(datafusion_expr::Expr::Column(
-                datafusion_common::Column::new(Some("table1"), "b"),
-            )),
+            expr: Box::new(datafusion_expr::col("b")),
             asc: !sort_options.descending,
             nulls_first: sort_options.nulls_first,
         })];
@@ -2012,28 +2023,28 @@ mod tests {
         let args = vec![col("b", schema)?];
         let logical_args = vec![datafusion_expr::col("b")];
         let func = datafusion_expr::AggregateUDF::new_from_impl(FirstValue::new());
-        datafusion_physical_expr_common::aggregate::create_aggregate_expr(
+        datafusion_physical_expr_common::aggregate::create_aggregate_expr_with_dfschema(
             &func,
             &args,
             &logical_args,
             &sort_exprs,
             &ordering_req,
-            schema,
+            dfschema,
             "FIRST_VALUE(b)",
             false,
             false,
+            false,
         )
     }
 
     // LAST_VALUE(b ORDER BY b <SortOptions>)
     fn test_last_value_agg_expr(
         schema: &Schema,
+        dfschema: &DFSchema,
         sort_options: SortOptions,
     ) -> Result<Arc<dyn AggregateExpr>> {
         let sort_exprs = vec![datafusion_expr::Expr::Sort(Sort {
-            expr: Box::new(datafusion_expr::Expr::Column(
-                datafusion_common::Column::new(Some("table1"), "b"),
-            )),
+            expr: Box::new(datafusion_expr::col("b")),
             asc: !sort_options.descending,
             nulls_first: sort_options.nulls_first,
         })];
@@ -2044,16 +2055,17 @@ mod tests {
         let args = vec![col("b", schema)?];
         let logical_args = vec![datafusion_expr::col("b")];
         let func = datafusion_expr::AggregateUDF::new_from_impl(LastValue::new());
-        create_aggregate_expr(
+        create_aggregate_expr_with_dfschema(
             &func,
             &args,
             &logical_args,
             &sort_exprs,
             &ordering_req,
-            schema,
+            dfschema,
             "LAST_VALUE(b)",
             false,
             false,
+            false,
         )
     }
 
@@ -2086,7 +2098,7 @@ mod tests {
             Arc::new(TaskContext::default())
         };
 
-        let (schema, data) = some_data_v2();
+        let (schema, df_schema, data) = some_data_v2();
         let partition1 = data[0].clone();
         let partition2 = data[1].clone();
         let partition3 = data[2].clone();
@@ -2100,9 +2112,13 @@ mod tests {
             nulls_first: false,
         };
         let aggregates: Vec<Arc<dyn AggregateExpr>> = if is_first_acc {
-            vec![test_first_value_agg_expr(&schema, sort_options)?]
+            vec![test_first_value_agg_expr(
+                &schema,
+                &df_schema,
+                sort_options,
+            )?]
         } else {
-            vec![test_last_value_agg_expr(&schema, sort_options)?]
+            vec![test_last_value_agg_expr(&schema, &df_schema, sort_options)?]
         };
 
         let memory_exec = Arc::new(MemoryExec::try_new(
@@ -2169,6 +2185,8 @@ mod tests {
     #[tokio::test]
     async fn test_get_finest_requirements() -> Result<()> {
         let test_schema = create_test_schema()?;
+        let test_df_schema = DFSchema::try_from(Arc::clone(&test_schema)).unwrap();
+
         // Assume column a and b are aliases
         // Assume also that a ASC and c DESC describe the same global ordering for the table. (Since they are ordering equivalent).
         let options1 = SortOptions {
@@ -2178,7 +2196,7 @@ mod tests {
         let col_a = &col("a", &test_schema)?;
         let col_b = &col("b", &test_schema)?;
         let col_c = &col("c", &test_schema)?;
-        let mut eq_properties = EquivalenceProperties::new(test_schema);
+        let mut eq_properties = EquivalenceProperties::new(Arc::clone(&test_schema));
         // Columns a and b are equal.
         eq_properties.add_equal_conditions(col_a, col_b)?;
         // Aggregate requirements are
@@ -2214,6 +2232,46 @@ mod tests {
                 },
             ]),
         ];
+        let col_expr_a = Box::new(datafusion_expr::col("a"));
+        let col_expr_b = Box::new(datafusion_expr::col("b"));
+        let col_expr_c = Box::new(datafusion_expr::col("c"));
+        let sort_exprs = vec![
+            None,
+            Some(vec![datafusion_expr::Expr::Sort(Sort::new(
+                col_expr_a.clone(),
+                options1.descending,
+                options1.nulls_first,
+            ))]),
+            Some(vec![
+                datafusion_expr::Expr::Sort(Sort::new(
+                    col_expr_a.clone(),
+                    options1.descending,
+                    options1.nulls_first,
+                )),
+                datafusion_expr::Expr::Sort(Sort::new(
+                    col_expr_b.clone(),
+                    options1.descending,
+                    options1.nulls_first,
+                )),
+                datafusion_expr::Expr::Sort(Sort::new(
+                    col_expr_c,
+                    options1.descending,
+                    options1.nulls_first,
+                )),
+            ]),
+            Some(vec![
+                datafusion_expr::Expr::Sort(Sort::new(
+                    col_expr_a,
+                    options1.descending,
+                    options1.nulls_first,
+                )),
+                datafusion_expr::Expr::Sort(Sort::new(
+                    col_expr_b,
+                    options1.descending,
+                    options1.nulls_first,
+                )),
+            ]),
+        ];
         let common_requirement = vec![
             PhysicalSortExpr {
                 expr: Arc::clone(col_a),
@@ -2226,14 +2284,23 @@ mod tests {
         ];
         let mut aggr_exprs = order_by_exprs
             .into_iter()
-            .map(|order_by_expr| {
-                Arc::new(OrderSensitiveArrayAgg::new(
-                    Arc::clone(col_a),
+            .zip(sort_exprs.into_iter())
+            .map(|(order_by_expr, sort_exprs)| {
+                let ordering_req = order_by_expr.unwrap_or_default();
+                let sort_exprs = sort_exprs.unwrap_or_default();
+                create_aggregate_expr_with_dfschema(
+                    &array_agg_udaf(),
+                    &[Arc::clone(col_a)],
+                    &[],
+                    &sort_exprs,
+                    &ordering_req,
+                    &test_df_schema,
                     "array_agg",
-                    DataType::Int32,
-                    vec![],
-                    order_by_expr.unwrap_or_default(),
-                )) as _
+                    false,
+                    false,
+                    false,
+                )
+                .unwrap()
             })
             .collect::<Vec<_>>();
         let group_by = PhysicalGroupBy::new_single(vec![]);
@@ -2254,6 +2321,7 @@ mod tests {
             Field::new("a", DataType::Float32, true),
             Field::new("b", DataType::Float32, true),
         ]));
+        let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
 
         let col_a = col("a", &schema)?;
         let option_desc = SortOptions {
@@ -2263,8 +2331,8 @@ mod tests {
         let groups = PhysicalGroupBy::new_single(vec![(col_a, "a".to_string())]);
 
         let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![
-            test_first_value_agg_expr(&schema, option_desc)?,
-            test_last_value_agg_expr(&schema, option_desc)?,
+            test_first_value_agg_expr(&schema, &df_schema, option_desc)?,
+            test_last_value_agg_expr(&schema, &df_schema, option_desc)?,
         ];
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
         let aggregate_exec = Arc::new(AggregateExec::try_new(
@@ -2330,6 +2398,7 @@ mod tests {
             "1",
             false,
             false,
+            false,
         )?];
 
         let input_batches = (0..4)
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 046977da0a37e..c834005bb7c3c 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -92,7 +92,7 @@ pub mod work_table;
 
 pub mod udaf {
     pub use datafusion_physical_expr_common::aggregate::{
-        create_aggregate_expr, AggregateFunctionExpr,
+        create_aggregate_expr, create_aggregate_expr_with_dfschema, AggregateFunctionExpr,
     };
 }
 
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 5eca7af19d16d..959796489c191 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -157,6 +157,7 @@ pub fn create_window_expr(
                 name,
                 ignore_nulls,
                 false,
+                false,
             )?;
             window_expr_from_aggregate_expr(
                 partition_by,
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 9e17c19ecbc5d..8c9e5bbd0e959 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -507,7 +507,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                                             // TODO: `order by` is not supported for UDAF yet
                                             let sort_exprs = &[];
                                             let ordering_req = &[];
-                                            udaf::create_aggregate_expr(agg_udf.as_ref(), &input_phy_expr, logical_exprs, sort_exprs, ordering_req, &physical_schema, name, agg_node.ignore_nulls, agg_node.distinct)
+                                            udaf::create_aggregate_expr(agg_udf.as_ref(), &input_phy_expr, logical_exprs, sort_exprs, ordering_req, &physical_schema, name, agg_node.ignore_nulls, agg_node.distinct, false)
                                         }
                                     }
                                 }).transpose()?.ok_or_else(|| {
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index e9a90fce2663f..140482b9903ca 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -24,8 +24,8 @@ use datafusion::physical_expr::window::{NthValueKind, SlidingAggregateWindowExpr
 use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr};
 use datafusion::physical_plan::expressions::{
     BinaryExpr, CaseExpr, CastExpr, Column, CumeDist, InListExpr, IsNotNullExpr,
-    IsNullExpr, Literal, Max, Min, NegativeExpr, NotExpr, NthValue, Ntile,
-    OrderSensitiveArrayAgg, Rank, RankType, RowNumber, TryCastExpr, WindowShift,
+    IsNullExpr, Literal, Max, Min, NegativeExpr, NotExpr, NthValue, Ntile, Rank,
+    RankType, RowNumber, TryCastExpr, WindowShift,
 };
 use datafusion::physical_plan::udaf::AggregateFunctionExpr;
 use datafusion::physical_plan::windows::{BuiltInWindowExpr, PlainAggregateWindowExpr};
@@ -260,10 +260,8 @@ struct AggrFn {
 fn aggr_expr_to_aggr_fn(expr: &dyn AggregateExpr) -> Result<AggrFn> {
     let aggr_expr = expr.as_any();
 
-    // TODO: remove OrderSensitiveArrayAgg
-    let inner = if aggr_expr.downcast_ref::<OrderSensitiveArrayAgg>().is_some() {
-        protobuf::AggregateFunction::ArrayAgg
-    } else if aggr_expr.downcast_ref::<Min>().is_some() {
+    // TODO: remove Min and Max
+    let inner = if aggr_expr.downcast_ref::<Min>().is_some() {
         protobuf::AggregateFunction::Min
     } else if aggr_expr.downcast_ref::<Max>().is_some() {
         protobuf::AggregateFunction::Max
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index fba6dfe425996..31ed0837d2f5b 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -301,6 +301,7 @@ fn roundtrip_window() -> Result<()> {
             "avg(b)",
             false,
             false,
+            false,
         )?,
         &[],
         &[],
@@ -324,6 +325,7 @@ fn roundtrip_window() -> Result<()> {
         "SUM(a) RANGE BETWEEN CURRENT ROW AND UNBOUNDED PRECEEDING",
         false,
         false,
+        false,
     )?;
 
     let sliding_aggr_window_expr = Arc::new(SlidingAggregateWindowExpr::new(
@@ -367,6 +369,7 @@ fn rountrip_aggregate() -> Result<()> {
             "AVG(b)",
             false,
             false,
+            false,
         )?],
         // NTH_VALUE
         vec![create_aggregate_expr(
@@ -379,6 +382,7 @@ fn rountrip_aggregate() -> Result<()> {
             "NTH_VALUE(b, 1)",
             false,
             false,
+            false,
         )?],
         // STRING_AGG
         vec![create_aggregate_expr(
@@ -394,6 +398,7 @@ fn rountrip_aggregate() -> Result<()> {
             "STRING_AGG(name, ',')",
             false,
             false,
+            false,
         )?],
     ];
 
@@ -431,6 +436,7 @@ fn rountrip_aggregate_with_limit() -> Result<()> {
         "AVG(b)",
         false,
         false,
+        false,
     )?];
 
     let agg = AggregateExec::try_new(
@@ -502,6 +508,7 @@ fn roundtrip_aggregate_udaf() -> Result<()> {
         "example_agg",
         false,
         false,
+        false,
     )?];
 
     roundtrip_test_with_context(
@@ -1000,6 +1007,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
         "aggregate_udf",
         false,
         false,
+        false,
     )?;
 
     let filter = Arc::new(FilterExec::try_new(
@@ -1032,6 +1040,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
         "aggregate_udf",
         true,
         true,
+        false,
     )?;
 
     let aggregate = Arc::new(AggregateExec::try_new(

From deef834e7adfd859414448ab4da461e2d4eabb9e Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Mon, 22 Jul 2024 17:46:23 -0700
Subject: [PATCH 116/357] Minor: Disable flaky antijoin test until perm fix
 (#11608)

---
 datafusion/core/tests/fuzz_cases/join_fuzz.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
index 604c1f93e55ea..f1cca66712d79 100644
--- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
@@ -250,6 +250,9 @@ async fn test_anti_join_1k() {
 }
 
 #[tokio::test]
+#[ignore]
+// flaky test giving 1 rows difference sometimes
+// https://github.com/apache/datafusion/issues/11555
 async fn test_anti_join_1k_filtered() {
     // NLJ vs HJ gives wrong result
     // Tracked in https://github.com/apache/datafusion/issues/11537

From 77311a5896272c7ed252d8cd53d48ec6ea7c0ccf Mon Sep 17 00:00:00 2001
From: Leonardo Yvens <leoyvens@gmail.com>
Date: Tue, 23 Jul 2024 11:18:00 +0100
Subject: [PATCH 117/357] support Decimal256 type in datafusion-proto (#11606)

---
 .../proto/datafusion_common.proto             |   7 +
 datafusion/proto-common/src/from_proto/mod.rs |   4 +
 .../proto-common/src/generated/pbjson.rs      | 125 ++++++++++++++++++
 .../proto-common/src/generated/prost.rs       |  12 +-
 datafusion/proto-common/src/to_proto/mod.rs   |   7 +-
 .../src/generated/datafusion_proto_common.rs  |  12 +-
 .../tests/cases/roundtrip_logical_plan.rs     |   2 +
 7 files changed, 164 insertions(+), 5 deletions(-)

diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto
index ca95136dadd96..8e8fd2352c6ca 100644
--- a/datafusion/proto-common/proto/datafusion_common.proto
+++ b/datafusion/proto-common/proto/datafusion_common.proto
@@ -130,6 +130,12 @@ message Decimal{
   int32 scale = 4;
 }
 
+message Decimal256Type{
+  reserved 1, 2;
+  uint32 precision = 3;
+  int32 scale = 4;
+}
+
 message List{
   Field field_type = 1;
 }
@@ -335,6 +341,7 @@ message ArrowType{
     TimeUnit TIME64 = 22 ;
     IntervalUnit INTERVAL = 23 ;
     Decimal DECIMAL = 24 ;
+    Decimal256Type DECIMAL256 = 36;
     List LIST = 25;
     List LARGE_LIST = 26;
     FixedSizeList FIXED_SIZE_LIST = 27;
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index 9191ff185a046..5fe9d937f7c4e 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -260,6 +260,10 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for DataType {
                 precision,
                 scale,
             }) => DataType::Decimal128(*precision as u8, *scale as i8),
+            arrow_type::ArrowTypeEnum::Decimal256(protobuf::Decimal256Type {
+                precision,
+                scale,
+            }) => DataType::Decimal256(*precision as u8, *scale as i8),
             arrow_type::ArrowTypeEnum::List(list) => {
                 let list_type =
                     list.as_ref().field_type.as_deref().required("field_type")?;
diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs
index 4b34660ae2ef5..511072f3cb558 100644
--- a/datafusion/proto-common/src/generated/pbjson.rs
+++ b/datafusion/proto-common/src/generated/pbjson.rs
@@ -175,6 +175,9 @@ impl serde::Serialize for ArrowType {
                 arrow_type::ArrowTypeEnum::Decimal(v) => {
                     struct_ser.serialize_field("DECIMAL", v)?;
                 }
+                arrow_type::ArrowTypeEnum::Decimal256(v) => {
+                    struct_ser.serialize_field("DECIMAL256", v)?;
+                }
                 arrow_type::ArrowTypeEnum::List(v) => {
                     struct_ser.serialize_field("LIST", v)?;
                 }
@@ -241,6 +244,7 @@ impl<'de> serde::Deserialize<'de> for ArrowType {
             "TIME64",
             "INTERVAL",
             "DECIMAL",
+            "DECIMAL256",
             "LIST",
             "LARGE_LIST",
             "LARGELIST",
@@ -282,6 +286,7 @@ impl<'de> serde::Deserialize<'de> for ArrowType {
             Time64,
             Interval,
             Decimal,
+            Decimal256,
             List,
             LargeList,
             FixedSizeList,
@@ -338,6 +343,7 @@ impl<'de> serde::Deserialize<'de> for ArrowType {
                             "TIME64" => Ok(GeneratedField::Time64),
                             "INTERVAL" => Ok(GeneratedField::Interval),
                             "DECIMAL" => Ok(GeneratedField::Decimal),
+                            "DECIMAL256" => Ok(GeneratedField::Decimal256),
                             "LIST" => Ok(GeneratedField::List),
                             "LARGELIST" | "LARGE_LIST" => Ok(GeneratedField::LargeList),
                             "FIXEDSIZELIST" | "FIXED_SIZE_LIST" => Ok(GeneratedField::FixedSizeList),
@@ -556,6 +562,13 @@ impl<'de> serde::Deserialize<'de> for ArrowType {
                                 return Err(serde::de::Error::duplicate_field("DECIMAL"));
                             }
                             arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::Decimal)
+;
+                        }
+                        GeneratedField::Decimal256 => {
+                            if arrow_type_enum__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("DECIMAL256"));
+                            }
+                            arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::Decimal256)
 ;
                         }
                         GeneratedField::List => {
@@ -2849,6 +2862,118 @@ impl<'de> serde::Deserialize<'de> for Decimal256 {
         deserializer.deserialize_struct("datafusion_common.Decimal256", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for Decimal256Type {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.precision != 0 {
+            len += 1;
+        }
+        if self.scale != 0 {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion_common.Decimal256Type", len)?;
+        if self.precision != 0 {
+            struct_ser.serialize_field("precision", &self.precision)?;
+        }
+        if self.scale != 0 {
+            struct_ser.serialize_field("scale", &self.scale)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for Decimal256Type {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "precision",
+            "scale",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Precision,
+            Scale,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "precision" => Ok(GeneratedField::Precision),
+                            "scale" => Ok(GeneratedField::Scale),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = Decimal256Type;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion_common.Decimal256Type")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<Decimal256Type, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut precision__ = None;
+                let mut scale__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Precision => {
+                            if precision__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("precision"));
+                            }
+                            precision__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::Scale => {
+                            if scale__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("scale"));
+                            }
+                            scale__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                    }
+                }
+                Ok(Decimal256Type {
+                    precision: precision__.unwrap_or_default(),
+                    scale: scale__.unwrap_or_default(),
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion_common.Decimal256Type", FIELDS, GeneratedVisitor)
+    }
+}
 impl serde::Serialize for DfField {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs
index 9a2770997f15e..62919e218b133 100644
--- a/datafusion/proto-common/src/generated/prost.rs
+++ b/datafusion/proto-common/src/generated/prost.rs
@@ -140,6 +140,14 @@ pub struct Decimal {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct Decimal256Type {
+    #[prost(uint32, tag = "3")]
+    pub precision: u32,
+    #[prost(int32, tag = "4")]
+    pub scale: i32,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct List {
     #[prost(message, optional, boxed, tag = "1")]
     pub field_type: ::core::option::Option<::prost::alloc::boxed::Box<Field>>,
@@ -446,7 +454,7 @@ pub struct Decimal256 {
 pub struct ArrowType {
     #[prost(
         oneof = "arrow_type::ArrowTypeEnum",
-        tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33"
+        tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 24, 36, 25, 26, 27, 28, 29, 30, 33"
     )]
     pub arrow_type_enum: ::core::option::Option<arrow_type::ArrowTypeEnum>,
 }
@@ -516,6 +524,8 @@ pub mod arrow_type {
         Interval(i32),
         #[prost(message, tag = "24")]
         Decimal(super::Decimal),
+        #[prost(message, tag = "36")]
+        Decimal256(super::Decimal256Type),
         #[prost(message, tag = "25")]
         List(::prost::alloc::boxed::Box<super::List>),
         #[prost(message, tag = "26")]
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index 9dcb65444a470..c15da2895b7cf 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -191,9 +191,10 @@ impl TryFrom<&DataType> for protobuf::arrow_type::ArrowTypeEnum {
                 precision: *precision as u32,
                 scale: *scale as i32,
             }),
-            DataType::Decimal256(_, _) => {
-                return Err(Error::General("Proto serialization error: The Decimal256 data type is not yet supported".to_owned()))
-            }
+            DataType::Decimal256(precision, scale) => Self::Decimal256(protobuf::Decimal256Type {
+                precision: *precision as u32,
+                scale: *scale as i32,
+            }),
             DataType::Map(field, sorted) => {
                 Self::Map(Box::new(
                     protobuf::Map {
diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs
index 9a2770997f15e..62919e218b133 100644
--- a/datafusion/proto/src/generated/datafusion_proto_common.rs
+++ b/datafusion/proto/src/generated/datafusion_proto_common.rs
@@ -140,6 +140,14 @@ pub struct Decimal {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct Decimal256Type {
+    #[prost(uint32, tag = "3")]
+    pub precision: u32,
+    #[prost(int32, tag = "4")]
+    pub scale: i32,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct List {
     #[prost(message, optional, boxed, tag = "1")]
     pub field_type: ::core::option::Option<::prost::alloc::boxed::Box<Field>>,
@@ -446,7 +454,7 @@ pub struct Decimal256 {
 pub struct ArrowType {
     #[prost(
         oneof = "arrow_type::ArrowTypeEnum",
-        tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33"
+        tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 24, 36, 25, 26, 27, 28, 29, 30, 33"
     )]
     pub arrow_type_enum: ::core::option::Option<arrow_type::ArrowTypeEnum>,
 }
@@ -516,6 +524,8 @@ pub mod arrow_type {
         Interval(i32),
         #[prost(message, tag = "24")]
         Decimal(super::Decimal),
+        #[prost(message, tag = "36")]
+        Decimal256(super::Decimal256Type),
         #[prost(message, tag = "25")]
         List(::prost::alloc::boxed::Box<super::List>),
         #[prost(message, tag = "26")]
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 3476d5d042cc8..f6557c7b2d8fc 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -27,6 +27,7 @@ use arrow::array::{
 use arrow::datatypes::{
     DataType, Field, Fields, Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType,
     IntervalUnit, Schema, SchemaRef, TimeUnit, UnionFields, UnionMode,
+    DECIMAL256_MAX_PRECISION,
 };
 use prost::Message;
 
@@ -1379,6 +1380,7 @@ fn round_trip_datatype() {
         DataType::Utf8,
         DataType::LargeUtf8,
         DataType::Decimal128(7, 12),
+        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
         // Recursive list tests
         DataType::List(new_arc_field("Level1", DataType::Binary, true)),
         DataType::List(new_arc_field(

From 7afc99f3d2a321d853210af7bd0a4b49e5afa0c4 Mon Sep 17 00:00:00 2001
From: Mehmet Ozan Kabak <ozankabak@gmail.com>
Date: Tue, 23 Jul 2024 17:11:48 +0300
Subject: [PATCH 118/357] Chore/fifo tests cleanup (#11616)

* Cleanup fifo tests

* Resolve merge conflicts
---
 datafusion/core/src/physical_optimizer/mod.rs |   8 +-
 datafusion/core/tests/fifo/mod.rs             | 174 +++++++++---------
 2 files changed, 90 insertions(+), 92 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index a0c9c36977442..7c508eeef8780 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -33,12 +33,12 @@ pub mod projection_pushdown;
 pub mod pruning;
 pub mod replace_with_order_preserving_variants;
 pub mod sanity_checker;
-mod sort_pushdown;
+#[cfg(test)]
+pub mod test_utils;
 pub mod topk_aggregation;
 pub mod update_aggr_exprs;
-mod utils;
 
-#[cfg(test)]
-pub mod test_utils;
+mod sort_pushdown;
+mod utils;
 
 pub use datafusion_physical_optimizer::*;
diff --git a/datafusion/core/tests/fifo/mod.rs b/datafusion/core/tests/fifo/mod.rs
index 1df97b1636c76..6efbb9b029ded 100644
--- a/datafusion/core/tests/fifo/mod.rs
+++ b/datafusion/core/tests/fifo/mod.rs
@@ -6,7 +6,7 @@
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
-//http://www.apache.org/licenses/LICENSE-2.0
+//   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
@@ -16,38 +16,37 @@
 // under the License.
 
 //! This test demonstrates the DataFusion FIFO capabilities.
-//!
+
 #[cfg(target_family = "unix")]
 #[cfg(test)]
 mod unix_test {
-    use datafusion_common::instant::Instant;
-    use std::fs::{File, OpenOptions};
-    use std::io::Write;
+    use std::fs::File;
     use std::path::PathBuf;
     use std::sync::atomic::{AtomicBool, Ordering};
     use std::sync::Arc;
-    use std::thread;
     use std::time::Duration;
 
     use arrow::array::Array;
     use arrow::csv::ReaderBuilder;
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow_schema::SchemaRef;
-    use futures::StreamExt;
-    use nix::sys::stat;
-    use nix::unistd;
-    use tempfile::TempDir;
-    use tokio::task::{spawn_blocking, JoinHandle};
-
     use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable};
     use datafusion::datasource::TableProvider;
     use datafusion::{
         prelude::{CsvReadOptions, SessionConfig, SessionContext},
         test_util::{aggr_test_schema, arrow_test_data},
     };
-    use datafusion_common::{exec_err, DataFusionError, Result};
+    use datafusion_common::instant::Instant;
+    use datafusion_common::{exec_err, Result};
     use datafusion_expr::Expr;
 
+    use futures::StreamExt;
+    use nix::sys::stat;
+    use nix::unistd;
+    use tempfile::TempDir;
+    use tokio::io::AsyncWriteExt;
+    use tokio::task::{spawn_blocking, JoinHandle};
+
     /// Makes a TableProvider for a fifo file
     fn fifo_table(
         schema: SchemaRef,
@@ -71,8 +70,8 @@ mod unix_test {
         }
     }
 
-    fn write_to_fifo(
-        mut file: &File,
+    async fn write_to_fifo(
+        file: &mut tokio::fs::File,
         line: &str,
         ref_time: Instant,
         broken_pipe_timeout: Duration,
@@ -80,11 +79,11 @@ mod unix_test {
         // We need to handle broken pipe error until the reader is ready. This
         // is why we use a timeout to limit the wait duration for the reader.
         // If the error is different than broken pipe, we fail immediately.
-        while let Err(e) = file.write_all(line.as_bytes()) {
+        while let Err(e) = file.write_all(line.as_bytes()).await {
             if e.raw_os_error().unwrap() == 32 {
                 let interval = Instant::now().duration_since(ref_time);
                 if interval < broken_pipe_timeout {
-                    thread::sleep(Duration::from_millis(100));
+                    tokio::time::sleep(Duration::from_millis(50)).await;
                     continue;
                 }
             }
@@ -93,28 +92,38 @@ mod unix_test {
         Ok(())
     }
 
-    fn create_writing_thread(
+    /// This function creates a writing task for the FIFO file. To verify
+    /// incremental processing, it waits for a signal to continue writing after
+    /// a certain number of lines are written.
+    #[allow(clippy::disallowed_methods)]
+    fn create_writing_task(
         file_path: PathBuf,
         header: String,
         lines: Vec<String>,
-        waiting_lock: Arc<AtomicBool>,
-        wait_until: usize,
+        waiting_signal: Arc<AtomicBool>,
+        send_before_waiting: usize,
     ) -> JoinHandle<()> {
         // Timeout for a long period of BrokenPipe error
         let broken_pipe_timeout = Duration::from_secs(10);
-        let sa = file_path.clone();
-        // Spawn a new thread to write to the FIFO file
-        #[allow(clippy::disallowed_methods)] // spawn allowed only in tests
-        spawn_blocking(move || {
-            let file = OpenOptions::new().write(true).open(sa).unwrap();
+        // Spawn a new task to write to the FIFO file
+        tokio::spawn(async move {
+            let mut file = tokio::fs::OpenOptions::new()
+                .write(true)
+                .open(file_path)
+                .await
+                .unwrap();
             // Reference time to use when deciding to fail the test
             let execution_start = Instant::now();
-            write_to_fifo(&file, &header, execution_start, broken_pipe_timeout).unwrap();
+            write_to_fifo(&mut file, &header, execution_start, broken_pipe_timeout)
+                .await
+                .unwrap();
             for (cnt, line) in lines.iter().enumerate() {
-                while waiting_lock.load(Ordering::SeqCst) && cnt > wait_until {
-                    thread::sleep(Duration::from_millis(50));
+                while waiting_signal.load(Ordering::SeqCst) && cnt > send_before_waiting {
+                    tokio::time::sleep(Duration::from_millis(50)).await;
                 }
-                write_to_fifo(&file, line, execution_start, broken_pipe_timeout).unwrap();
+                write_to_fifo(&mut file, line, execution_start, broken_pipe_timeout)
+                    .await
+                    .unwrap();
             }
             drop(file);
         })
@@ -125,6 +134,8 @@ mod unix_test {
     const TEST_BATCH_SIZE: usize = 20;
     // Number of lines written to FIFO
     const TEST_DATA_SIZE: usize = 20_000;
+    // Number of lines to write before waiting to verify incremental processing
+    const SEND_BEFORE_WAITING: usize = 2 * TEST_BATCH_SIZE;
     // Number of lines what can be joined. Each joinable key produced 20 lines with
     // aggregate_test_100 dataset. We will use these joinable keys for understanding
     // incremental execution.
@@ -132,7 +143,7 @@ mod unix_test {
 
     // This test provides a relatively realistic end-to-end scenario where
     // we swap join sides to accommodate a FIFO source.
-    #[tokio::test(flavor = "multi_thread", worker_threads = 8)]
+    #[tokio::test]
     async fn unbounded_file_with_swapped_join() -> Result<()> {
         // Create session context
         let config = SessionConfig::new()
@@ -162,8 +173,8 @@ mod unix_test {
             .zip(0..TEST_DATA_SIZE)
             .map(|(a1, a2)| format!("{a1},{a2}\n"))
             .collect::<Vec<_>>();
-        // Create writing threads for the left and right FIFO files
-        let task = create_writing_thread(
+        // Create writing tasks for the left and right FIFO files
+        let task = create_writing_task(
             fifo_path.clone(),
             "a1,a2\n".to_owned(),
             lines,
@@ -190,7 +201,16 @@ mod unix_test {
         )
         .await?;
         // Execute the query
-        let df = ctx.sql("SELECT t1.a2, t2.c1, t2.c4, t2.c5 FROM left as t1 JOIN right as t2 ON t1.a1 = t2.c1").await?;
+        let df = ctx
+            .sql(
+                "SELECT
+                  t1.a2, t2.c1, t2.c4, t2.c5
+                FROM
+                  left as t1, right as t2
+                WHERE
+                  t1.a1 = t2.c1",
+            )
+            .await?;
         let mut stream = df.execute_stream().await?;
         while (stream.next().await).is_some() {
             waiting.store(false, Ordering::SeqCst);
@@ -199,16 +219,9 @@ mod unix_test {
         Ok(())
     }
 
-    #[derive(Debug, PartialEq)]
-    enum JoinOperation {
-        LeftUnmatched,
-        RightUnmatched,
-        Equal,
-    }
-
-    // This test provides a relatively realistic end-to-end scenario where
-    // we change the join into a [SymmetricHashJoin] to accommodate two
-    // unbounded (FIFO) sources.
+    /// This test provides a relatively realistic end-to-end scenario where
+    /// we change the join into a `SymmetricHashJoinExec` to accommodate two
+    /// unbounded (FIFO) sources.
     #[tokio::test]
     async fn unbounded_file_with_symmetric_join() -> Result<()> {
         // Create session context
@@ -247,19 +260,18 @@ mod unix_test {
         let df = ctx
             .sql(
                 "SELECT
-                  t1.a1,
-                  t1.a2,
-                  t2.a1,
-                  t2.a2
+                  t1.a1, t1.a2, t2.a1, t2.a2
                 FROM
-                  left as t1 FULL
-                  JOIN right as t2 ON t1.a2 = t2.a2
-                  AND t1.a1 > t2.a1 + 4
-                  AND t1.a1 < t2.a1 + 9",
+                  left as t1
+                FULL JOIN
+                  right as t2
+                ON
+                  t1.a2 = t2.a2 AND
+                  t1.a1 > t2.a1 + 4 AND
+                  t1.a1 < t2.a1 + 9",
             )
             .await?;
         let mut stream = df.execute_stream().await?;
-        let mut operations = vec![];
 
         // Tasks
         let mut tasks: Vec<JoinHandle<()>> = vec![];
@@ -273,54 +285,43 @@ mod unix_test {
             .map(|(a1, a2)| format!("{a1},{a2}\n"))
             .collect::<Vec<_>>();
 
-        // Create writing threads for the left and right FIFO files
-        tasks.push(create_writing_thread(
+        // Create writing tasks for the left and right FIFO files
+        tasks.push(create_writing_task(
             left_fifo,
             "a1,a2\n".to_owned(),
             lines.clone(),
             waiting.clone(),
-            TEST_BATCH_SIZE,
+            SEND_BEFORE_WAITING,
         ));
-        tasks.push(create_writing_thread(
+        tasks.push(create_writing_task(
             right_fifo,
             "a1,a2\n".to_owned(),
-            lines.clone(),
+            lines,
             waiting.clone(),
-            TEST_BATCH_SIZE,
+            SEND_BEFORE_WAITING,
         ));
-        // Partial.
+        // Collect output data:
+        let (mut equal, mut left, mut right) = (0, 0, 0);
         while let Some(Ok(batch)) = stream.next().await {
             waiting.store(false, Ordering::SeqCst);
             let left_unmatched = batch.column(2).null_count();
             let right_unmatched = batch.column(0).null_count();
-            let op = if left_unmatched == 0 && right_unmatched == 0 {
-                JoinOperation::Equal
-            } else if right_unmatched > left_unmatched {
-                JoinOperation::RightUnmatched
+            if left_unmatched == 0 && right_unmatched == 0 {
+                equal += 1;
+            } else if right_unmatched <= left_unmatched {
+                left += 1;
             } else {
-                JoinOperation::LeftUnmatched
+                right += 1;
             };
-            operations.push(op);
         }
         futures::future::try_join_all(tasks).await.unwrap();
 
-        // The SymmetricHashJoin executor produces FULL join results at every
-        // pruning, which happens before it reaches the end of input and more
-        // than once. In this test, we feed partially joinable data to both
-        // sides in order to ensure that left or right unmatched results are
-        // generated more than once during the test.
-        assert!(
-            operations
-                .iter()
-                .filter(|&n| JoinOperation::RightUnmatched.eq(n))
-                .count()
-                > 1
-                && operations
-                    .iter()
-                    .filter(|&n| JoinOperation::LeftUnmatched.eq(n))
-                    .count()
-                    > 1
-        );
+        // The symmetric hash join algorithm produces FULL join results at
+        // every pruning, which happens before it reaches the end of input and
+        // more than once. In this test, we feed partially joinable data to
+        // both sides in order to ensure that left or right unmatched results
+        // are generated as expected.
+        assert!(equal >= 0 && left > 1 && right > 1);
         Ok(())
     }
 
@@ -341,17 +342,14 @@ mod unix_test {
             (source_fifo_path.clone(), source_fifo_path.display());
         // Tasks
         let mut tasks: Vec<JoinHandle<()>> = vec![];
-        // TEST_BATCH_SIZE + 1 rows will be provided. However, after processing precisely
-        // TEST_BATCH_SIZE rows, the program will pause and wait for a batch to be read in another
-        // thread. This approach ensures that the pipeline remains unbroken.
-        tasks.push(create_writing_thread(
+        tasks.push(create_writing_task(
             source_fifo_path_thread,
             "a1,a2\n".to_owned(),
             (0..TEST_DATA_SIZE)
                 .map(|_| "a,1\n".to_string())
                 .collect::<Vec<_>>(),
             waiting,
-            TEST_BATCH_SIZE,
+            SEND_BEFORE_WAITING,
         ));
         // Create a new temporary FIFO file
         let sink_fifo_path = create_fifo_file(&tmp_dir, "sink.csv")?;
@@ -370,8 +368,8 @@ mod unix_test {
 
             let mut reader = ReaderBuilder::new(schema)
                 .with_batch_size(TEST_BATCH_SIZE)
+                .with_header(true)
                 .build(file)
-                .map_err(|e| DataFusionError::Internal(e.to_string()))
                 .unwrap();
 
             while let Some(Ok(_)) = reader.next() {

From 67c6ee2fb0b8691bc2603d5179c0b51e66e80382 Mon Sep 17 00:00:00 2001
From: waruto <wmc314@outlook.com>
Date: Tue, 23 Jul 2024 23:59:18 +0800
Subject: [PATCH 119/357] fix typo in doc of Partitioning (#11612)

Co-authored-by: jsonwu <jsonwu@tencent.com>
---
 datafusion/physical-expr/src/partitioning.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-expr/src/partitioning.rs b/datafusion/physical-expr/src/partitioning.rs
index 821b2c9fe17a7..6472dd47489ca 100644
--- a/datafusion/physical-expr/src/partitioning.rs
+++ b/datafusion/physical-expr/src/partitioning.rs
@@ -84,9 +84,9 @@ use crate::{physical_exprs_equal, EquivalenceProperties, PhysicalExpr};
 ///                                                        └──────────┐│┌──────────┘
 ///                                                                   │││
 ///                                                                   │││
-/// RepartitionExec with one input
-/// that has 3 partitions, but                        3 (async) streams, that internally
-/// itself has only 1 output partition                  pull from the same input stream
+/// RepartitionExec with 1 input
+/// partition and 3 output partitions                 3 (async) streams, that internally
+///                                                    pull from the same input stream
 ///                                                                  ...
 /// ```
 ///

From f80dde06ec2cbe06938e7335facb8e30100ddb9f Mon Sep 17 00:00:00 2001
From: Arttu <Blizzara@users.noreply.github.com>
Date: Tue, 23 Jul 2024 18:36:51 +0200
Subject: [PATCH 120/357] feat: support Map literals in Substrait consumer and
 producer (#11547)

* implement Map literals/nulls  conversions in Substrait

* fix name handling for lists/maps containing structs

* add hashing for map scalars

* add a test for creating a map in VALUES

* fix clipppy

* better test

* use MapBuilder in test

* fix hash test

* remove unnecessary type variation checks from maps
---
 datafusion/common/src/hash_utils.rs           | 102 ++++++++++++-
 datafusion/common/src/scalar/mod.rs           |   2 +-
 datafusion/sqllogictest/test_files/map.slt    |   8 +
 .../substrait/src/logical_plan/consumer.rs    | 143 +++++++++++++++---
 .../substrait/src/logical_plan/producer.rs    |  76 +++++++++-
 .../tests/cases/roundtrip_logical_plan.rs     |   4 +-
 6 files changed, 308 insertions(+), 27 deletions(-)

diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs
index c8adae34f6455..010221b0485f9 100644
--- a/datafusion/common/src/hash_utils.rs
+++ b/datafusion/common/src/hash_utils.rs
@@ -29,8 +29,8 @@ use arrow_buffer::IntervalMonthDayNano;
 
 use crate::cast::{
     as_boolean_array, as_fixed_size_list_array, as_generic_binary_array,
-    as_large_list_array, as_list_array, as_primitive_array, as_string_array,
-    as_struct_array,
+    as_large_list_array, as_list_array, as_map_array, as_primitive_array,
+    as_string_array, as_struct_array,
 };
 use crate::error::{Result, _internal_err};
 
@@ -236,6 +236,40 @@ fn hash_struct_array(
     Ok(())
 }
 
+fn hash_map_array(
+    array: &MapArray,
+    random_state: &RandomState,
+    hashes_buffer: &mut [u64],
+) -> Result<()> {
+    let nulls = array.nulls();
+    let offsets = array.offsets();
+
+    // Create hashes for each entry in each row
+    let mut values_hashes = vec![0u64; array.entries().len()];
+    create_hashes(array.entries().columns(), random_state, &mut values_hashes)?;
+
+    // Combine the hashes for entries on each row with each other and previous hash for that row
+    if let Some(nulls) = nulls {
+        for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
+            if nulls.is_valid(i) {
+                let hash = &mut hashes_buffer[i];
+                for values_hash in &values_hashes[start.as_usize()..stop.as_usize()] {
+                    *hash = combine_hashes(*hash, *values_hash);
+                }
+            }
+        }
+    } else {
+        for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
+            let hash = &mut hashes_buffer[i];
+            for values_hash in &values_hashes[start.as_usize()..stop.as_usize()] {
+                *hash = combine_hashes(*hash, *values_hash);
+            }
+        }
+    }
+
+    Ok(())
+}
+
 fn hash_list_array<OffsetSize>(
     array: &GenericListArray<OffsetSize>,
     random_state: &RandomState,
@@ -400,6 +434,10 @@ pub fn create_hashes<'a>(
                 let array = as_large_list_array(array)?;
                 hash_list_array(array, random_state, hashes_buffer)?;
             }
+            DataType::Map(_, _) => {
+                let array = as_map_array(array)?;
+                hash_map_array(array, random_state, hashes_buffer)?;
+            }
             DataType::FixedSizeList(_,_) => {
                 let array = as_fixed_size_list_array(array)?;
                 hash_fixed_list_array(array, random_state, hashes_buffer)?;
@@ -572,6 +610,7 @@ mod tests {
             Some(vec![Some(3), None, Some(5)]),
             None,
             Some(vec![Some(0), Some(1), Some(2)]),
+            Some(vec![]),
         ];
         let list_array =
             Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(data)) as ArrayRef;
@@ -581,6 +620,7 @@ mod tests {
         assert_eq!(hashes[0], hashes[5]);
         assert_eq!(hashes[1], hashes[4]);
         assert_eq!(hashes[2], hashes[3]);
+        assert_eq!(hashes[1], hashes[6]); // null vs empty list
     }
 
     #[test]
@@ -692,6 +732,64 @@ mod tests {
         assert_eq!(hashes[0], hashes[1]);
     }
 
+    #[test]
+    // Tests actual values of hashes, which are different if forcing collisions
+    #[cfg(not(feature = "force_hash_collisions"))]
+    fn create_hashes_for_map_arrays() {
+        let mut builder =
+            MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
+        // Row 0
+        builder.keys().append_value("key1");
+        builder.keys().append_value("key2");
+        builder.values().append_value(1);
+        builder.values().append_value(2);
+        builder.append(true).unwrap();
+        // Row 1
+        builder.keys().append_value("key1");
+        builder.keys().append_value("key2");
+        builder.values().append_value(1);
+        builder.values().append_value(2);
+        builder.append(true).unwrap();
+        // Row 2
+        builder.keys().append_value("key1");
+        builder.keys().append_value("key2");
+        builder.values().append_value(1);
+        builder.values().append_value(3);
+        builder.append(true).unwrap();
+        // Row 3
+        builder.keys().append_value("key1");
+        builder.keys().append_value("key3");
+        builder.values().append_value(1);
+        builder.values().append_value(2);
+        builder.append(true).unwrap();
+        // Row 4
+        builder.keys().append_value("key1");
+        builder.values().append_value(1);
+        builder.append(true).unwrap();
+        // Row 5
+        builder.keys().append_value("key1");
+        builder.values().append_null();
+        builder.append(true).unwrap();
+        // Row 6
+        builder.append(true).unwrap();
+        // Row 7
+        builder.keys().append_value("key1");
+        builder.values().append_value(1);
+        builder.append(false).unwrap();
+
+        let array = Arc::new(builder.finish()) as ArrayRef;
+
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+        let mut hashes = vec![0; array.len()];
+        create_hashes(&[array], &random_state, &mut hashes).unwrap();
+        assert_eq!(hashes[0], hashes[1]); // same value
+        assert_ne!(hashes[0], hashes[2]); // different value
+        assert_ne!(hashes[0], hashes[3]); // different key
+        assert_ne!(hashes[0], hashes[4]); // missing an entry
+        assert_ne!(hashes[4], hashes[5]); // filled vs null value
+        assert_eq!(hashes[6], hashes[7]); // empty vs null map
+    }
+
     #[test]
     // Tests actual values of hashes, which are different if forcing collisions
     #[cfg(not(feature = "force_hash_collisions"))]
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 0651013901154..92ed897e7185c 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -1770,6 +1770,7 @@ impl ScalarValue {
             }
             DataType::List(_)
             | DataType::LargeList(_)
+            | DataType::Map(_, _)
             | DataType::Struct(_)
             | DataType::Union(_, _) => {
                 let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
@@ -1838,7 +1839,6 @@ impl ScalarValue {
             | DataType::Time32(TimeUnit::Nanosecond)
             | DataType::Time64(TimeUnit::Second)
             | DataType::Time64(TimeUnit::Millisecond)
-            | DataType::Map(_, _)
             | DataType::RunEndEncoded(_, _)
             | DataType::ListView(_)
             | DataType::LargeListView(_) => {
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index 26bfb4a5922e6..e530e14df66ea 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -302,3 +302,11 @@ SELECT MAP(arrow_cast(make_array('POST', 'HEAD', 'PATCH'), 'LargeList(Utf8)'), a
 {POST: 41, HEAD: 33, PATCH: 30}
 {POST: 41, HEAD: 33, PATCH: 30}
 {POST: 41, HEAD: 33, PATCH: 30}
+
+
+query ?
+VALUES (MAP(['a'], [1])), (MAP(['b'], [2])), (MAP(['c', 'a'], [3, 1]))
+----
+{a: 1}
+{b: 2}
+{c: 3, a: 1}
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 5768c44bbf6c8..15c447114819e 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
+use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, OffsetBuffer};
 use async_recursion::async_recursion;
-use datafusion::arrow::array::GenericListArray;
+use datafusion::arrow::array::{GenericListArray, MapArray};
 use datafusion::arrow::datatypes::{
     DataType, Field, FieldRef, Fields, IntervalUnit, Schema, TimeUnit,
 };
@@ -51,6 +51,7 @@ use crate::variation_const::{
     INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
     INTERVAL_YEAR_MONTH_TYPE_REF,
 };
+use datafusion::arrow::array::{new_empty_array, AsArray};
 use datafusion::common::scalar::ScalarStructBuilder;
 use datafusion::logical_expr::expr::InList;
 use datafusion::logical_expr::{
@@ -1449,21 +1450,14 @@ fn from_substrait_type(
                     from_substrait_type(value_type, extensions, dfs_names, name_idx)?,
                     true,
                 ));
-                match map.type_variation_reference {
-                    DEFAULT_CONTAINER_TYPE_VARIATION_REF => {
-                        Ok(DataType::Map(
-                            Arc::new(Field::new_struct(
-                                "entries",
-                                [key_field, value_field],
-                                false, // The inner map field is always non-nullable (Arrow #1697),
-                            )),
-                            false,
-                        ))
-                    }
-                    v => not_impl_err!(
-                        "Unsupported Substrait type variation {v} of type {s_kind:?}"
-                    )?,
-                }
+                Ok(DataType::Map(
+                    Arc::new(Field::new_struct(
+                        "entries",
+                        [key_field, value_field],
+                        false, // The inner map field is always non-nullable (Arrow #1697),
+                    )),
+                    false, // whether keys are sorted
+                ))
             }
             r#type::Kind::Decimal(d) => match d.type_variation_reference {
                 DECIMAL_128_TYPE_VARIATION_REF => {
@@ -1743,11 +1737,23 @@ fn from_substrait_literal(
             )
         }
         Some(LiteralType::List(l)) => {
+            // Each element should start the name index from the same value, then we increase it
+            // once at the end
+            let mut element_name_idx = *name_idx;
             let elements = l
                 .values
                 .iter()
-                .map(|el| from_substrait_literal(el, extensions, dfs_names, name_idx))
+                .map(|el| {
+                    element_name_idx = *name_idx;
+                    from_substrait_literal(
+                        el,
+                        extensions,
+                        dfs_names,
+                        &mut element_name_idx,
+                    )
+                })
                 .collect::<Result<Vec<_>>>()?;
+            *name_idx = element_name_idx;
             if elements.is_empty() {
                 return substrait_err!(
                     "Empty list must be encoded as EmptyList literal type, not List"
@@ -1785,6 +1791,84 @@ fn from_substrait_literal(
                 }
             }
         }
+        Some(LiteralType::Map(m)) => {
+            // Each entry should start the name index from the same value, then we increase it
+            // once at the end
+            let mut entry_name_idx = *name_idx;
+            let entries = m
+                .key_values
+                .iter()
+                .map(|kv| {
+                    entry_name_idx = *name_idx;
+                    let key_sv = from_substrait_literal(
+                        kv.key.as_ref().unwrap(),
+                        extensions,
+                        dfs_names,
+                        &mut entry_name_idx,
+                    )?;
+                    let value_sv = from_substrait_literal(
+                        kv.value.as_ref().unwrap(),
+                        extensions,
+                        dfs_names,
+                        &mut entry_name_idx,
+                    )?;
+                    ScalarStructBuilder::new()
+                        .with_scalar(Field::new("key", key_sv.data_type(), false), key_sv)
+                        .with_scalar(
+                            Field::new("value", value_sv.data_type(), true),
+                            value_sv,
+                        )
+                        .build()
+                })
+                .collect::<Result<Vec<_>>>()?;
+            *name_idx = entry_name_idx;
+
+            if entries.is_empty() {
+                return substrait_err!(
+                    "Empty map must be encoded as EmptyMap literal type, not Map"
+                );
+            }
+
+            ScalarValue::Map(Arc::new(MapArray::new(
+                Arc::new(Field::new("entries", entries[0].data_type(), false)),
+                OffsetBuffer::new(vec![0, entries.len() as i32].into()),
+                ScalarValue::iter_to_array(entries)?.as_struct().to_owned(),
+                None,
+                false,
+            )))
+        }
+        Some(LiteralType::EmptyMap(m)) => {
+            let key = match &m.key {
+                Some(k) => Ok(k),
+                _ => plan_err!("Missing key type for empty map"),
+            }?;
+            let value = match &m.value {
+                Some(v) => Ok(v),
+                _ => plan_err!("Missing value type for empty map"),
+            }?;
+            let key_type = from_substrait_type(key, extensions, dfs_names, name_idx)?;
+            let value_type = from_substrait_type(value, extensions, dfs_names, name_idx)?;
+
+            // new_empty_array on a MapType creates a too empty array
+            // We want it to contain an empty struct array to align with an empty MapBuilder one
+            let entries = Field::new_struct(
+                "entries",
+                vec![
+                    Field::new("key", key_type, false),
+                    Field::new("value", value_type, true),
+                ],
+                false,
+            );
+            let struct_array =
+                new_empty_array(entries.data_type()).as_struct().to_owned();
+            ScalarValue::Map(Arc::new(MapArray::new(
+                Arc::new(entries),
+                OffsetBuffer::new(vec![0, 0].into()),
+                struct_array,
+                None,
+                false,
+            )))
+        }
         Some(LiteralType::Struct(s)) => {
             let mut builder = ScalarStructBuilder::new();
             for (i, field) in s.fields.iter().enumerate() {
@@ -2013,6 +2097,29 @@ fn from_substrait_null(
                     ),
                 }
             }
+            r#type::Kind::Map(map) => {
+                let key_type = map.key.as_ref().ok_or_else(|| {
+                    substrait_datafusion_err!("Map type must have key type")
+                })?;
+                let value_type = map.value.as_ref().ok_or_else(|| {
+                    substrait_datafusion_err!("Map type must have value type")
+                })?;
+
+                let key_type =
+                    from_substrait_type(key_type, extensions, dfs_names, name_idx)?;
+                let value_type =
+                    from_substrait_type(value_type, extensions, dfs_names, name_idx)?;
+                let entries_field = Arc::new(Field::new_struct(
+                    "entries",
+                    vec![
+                        Field::new("key", key_type, false),
+                        Field::new("value", value_type, true),
+                    ],
+                    false,
+                ));
+
+                DataType::Map(entries_field, false /* keys sorted */).try_into()
+            }
             r#type::Kind::Struct(s) => {
                 let fields =
                     from_substrait_struct_type(s, extensions, dfs_names, name_idx)?;
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index 8f69cc5e218f6..8263209ffccc7 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -57,8 +57,10 @@ use datafusion::logical_expr::{expr, Between, JoinConstraint, LogicalPlan, Opera
 use datafusion::prelude::Expr;
 use pbjson_types::Any as ProtoAny;
 use substrait::proto::exchange_rel::{ExchangeKind, RoundRobin, ScatterFields};
+use substrait::proto::expression::literal::map::KeyValue;
 use substrait::proto::expression::literal::{
-    user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Struct, UserDefined,
+    user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Map, Struct,
+    UserDefined,
 };
 use substrait::proto::expression::subquery::InPredicate;
 use substrait::proto::expression::window_function::BoundsType;
@@ -1922,6 +1924,48 @@ fn to_substrait_literal(
             convert_array_to_literal_list(l, extensions)?,
             LARGE_CONTAINER_TYPE_VARIATION_REF,
         ),
+        ScalarValue::Map(m) => {
+            let map = if m.is_empty() || m.value(0).is_empty() {
+                let mt = to_substrait_type(m.data_type(), m.is_nullable(), extensions)?;
+                let mt = match mt {
+                    substrait::proto::Type {
+                        kind: Some(r#type::Kind::Map(mt)),
+                    } => Ok(mt.as_ref().to_owned()),
+                    _ => exec_err!("Unexpected type for a map: {mt:?}"),
+                }?;
+                LiteralType::EmptyMap(mt)
+            } else {
+                let keys = (0..m.keys().len())
+                    .map(|i| {
+                        to_substrait_literal(
+                            &ScalarValue::try_from_array(&m.keys(), i)?,
+                            extensions,
+                        )
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+                let values = (0..m.values().len())
+                    .map(|i| {
+                        to_substrait_literal(
+                            &ScalarValue::try_from_array(&m.values(), i)?,
+                            extensions,
+                        )
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+
+                let key_values = keys
+                    .into_iter()
+                    .zip(values.into_iter())
+                    .map(|(k, v)| {
+                        Ok(KeyValue {
+                            key: Some(k),
+                            value: Some(v),
+                        })
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+                LiteralType::Map(Map { key_values })
+            };
+            (map, DEFAULT_CONTAINER_TYPE_VARIATION_REF)
+        }
         ScalarValue::Struct(s) => (
             LiteralType::Struct(Struct {
                 fields: s
@@ -1967,7 +2011,7 @@ fn convert_array_to_literal_list<T: OffsetSizeTrait>(
         .collect::<Result<Vec<_>>>()?;
 
     if values.is_empty() {
-        let et = match to_substrait_type(
+        let lt = match to_substrait_type(
             array.data_type(),
             array.is_nullable(),
             extensions,
@@ -1977,7 +2021,7 @@ fn convert_array_to_literal_list<T: OffsetSizeTrait>(
             } => lt.as_ref().to_owned(),
             _ => unreachable!(),
         };
-        Ok(LiteralType::EmptyList(et))
+        Ok(LiteralType::EmptyList(lt))
     } else {
         Ok(LiteralType::List(List { values }))
     }
@@ -2094,7 +2138,9 @@ mod test {
         from_substrait_literal_without_names, from_substrait_type_without_names,
     };
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
-    use datafusion::arrow::array::GenericListArray;
+    use datafusion::arrow::array::{
+        GenericListArray, Int64Builder, MapBuilder, StringBuilder,
+    };
     use datafusion::arrow::datatypes::Field;
     use datafusion::common::scalar::ScalarStructBuilder;
     use std::collections::HashMap;
@@ -2160,6 +2206,28 @@ mod test {
             ),
         )))?;
 
+        // Null map
+        let mut map_builder =
+            MapBuilder::new(None, StringBuilder::new(), Int64Builder::new());
+        map_builder.append(false)?;
+        round_trip_literal(ScalarValue::Map(Arc::new(map_builder.finish())))?;
+
+        // Empty map
+        let mut map_builder =
+            MapBuilder::new(None, StringBuilder::new(), Int64Builder::new());
+        map_builder.append(true)?;
+        round_trip_literal(ScalarValue::Map(Arc::new(map_builder.finish())))?;
+
+        // Valid map
+        let mut map_builder =
+            MapBuilder::new(None, StringBuilder::new(), Int64Builder::new());
+        map_builder.keys().append_value("key1");
+        map_builder.keys().append_value("key2");
+        map_builder.values().append_value(1);
+        map_builder.values().append_value(2);
+        map_builder.append(true)?;
+        round_trip_literal(ScalarValue::Map(Arc::new(map_builder.finish())))?;
+
         let c0 = Field::new("c0", DataType::Boolean, true);
         let c1 = Field::new("c1", DataType::Int32, true);
         let c2 = Field::new("c2", DataType::Utf8, true);
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 5b4389c832c7c..439e3efa29228 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -749,7 +749,7 @@ async fn roundtrip_values() -> Result<()> {
                 [[-213.1, NULL, 5.5, 2.0, 1.0], []], \
                 arrow_cast([1,2,3], 'LargeList(Int64)'), \
                 STRUCT(true, 1 AS int_field, CAST(NULL AS STRING)), \
-                [STRUCT(STRUCT('a' AS string_field) AS struct_field)]\
+                [STRUCT(STRUCT('a' AS string_field) AS struct_field), STRUCT(STRUCT('b' AS string_field) AS struct_field)]\
             ), \
             (NULL, NULL, NULL, NULL, NULL, NULL)",
         "Values: \
@@ -759,7 +759,7 @@ async fn roundtrip_values() -> Result<()> {
                 List([[-213.1, , 5.5, 2.0, 1.0], []]), \
                 LargeList([1, 2, 3]), \
                 Struct({c0:true,int_field:1,c2:}), \
-                List([{struct_field: {string_field: a}}])\
+                List([{struct_field: {string_field: a}}, {struct_field: {string_field: b}}])\
             ), \
             (Int64(NULL), Utf8(NULL), List(), LargeList(), Struct({c0:,int_field:,c2:}), List())",
     true).await

From f9ee64a0ec98bfc5407fa539d073bb38230faa44 Mon Sep 17 00:00:00 2001
From: Xin Li <33629085+xinlifoobar@users.noreply.github.com>
Date: Tue, 23 Jul 2024 10:34:45 -0700
Subject: [PATCH 121/357] Fix Internal Error for an INNER JOIN query (#11578)

* Fix Internal Error for an INNER JOIN query

* Fix fmt

* Fix comment
---
 datafusion/expr/src/expr.rs                 | 20 ++++++++++
 datafusion/expr/src/logical_plan/plan.rs    |  8 ++--
 datafusion/sqllogictest/test_files/join.slt | 42 +++++++++++++++++++++
 3 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index e3620501d9a8f..452c05be34f49 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1340,6 +1340,9 @@ impl Expr {
     ///
     /// returns `None` if the expression is not a `Column`
     ///
+    /// Note: None may be returned for expressions that are not `Column` but
+    /// are convertible to `Column` such as `Cast` expressions.
+    ///
     /// Example
     /// ```
     /// # use datafusion_common::Column;
@@ -1358,6 +1361,23 @@ impl Expr {
         }
     }
 
+    /// Returns the inner `Column` if any. This is a specialized version of
+    /// [`Self::try_as_col`] that take Cast expressions into account when the
+    /// expression is as on condition for joins.
+    ///
+    /// Called this method when you are sure that the expression is a `Column`
+    /// or a `Cast` expression that wraps a `Column`.
+    pub fn get_as_join_column(&self) -> Option<&Column> {
+        match self {
+            Expr::Column(c) => Some(c),
+            Expr::Cast(Cast { expr, .. }) => match &**expr {
+                Expr::Column(c) => Some(c),
+                _ => None,
+            },
+            _ => None,
+        }
+    }
+
     /// Return all referenced columns of this expression.
     #[deprecated(since = "40.0.0", note = "use Expr::column_refs instead")]
     pub fn to_columns(&self) -> Result<HashSet<Column>> {
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 48fa6270b2027..d4fe233cac06e 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -496,18 +496,18 @@ impl LogicalPlan {
                 // The join keys in using-join must be columns.
                 let columns =
                     on.iter().try_fold(HashSet::new(), |mut accumu, (l, r)| {
-                        let Some(l) = l.try_as_col().cloned() else {
+                        let Some(l) = l.get_as_join_column() else {
                             return internal_err!(
                                 "Invalid join key. Expected column, found {l:?}"
                             );
                         };
-                        let Some(r) = r.try_as_col().cloned() else {
+                        let Some(r) = r.get_as_join_column() else {
                             return internal_err!(
                                 "Invalid join key. Expected column, found {r:?}"
                             );
                         };
-                        accumu.insert(l);
-                        accumu.insert(r);
+                        accumu.insert(l.to_owned());
+                        accumu.insert(r.to_owned());
                         Result::<_, DataFusionError>::Ok(accumu)
                     })?;
                 using_columns.push(columns);
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index efebba1779cf7..4f01d2b2c72bf 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -1054,3 +1054,45 @@ DROP TABLE t1;
 
 statement ok
 DROP TABLE t2;
+
+# Join Using Issue with Cast Expr
+# Found issue: https://github.com/apache/datafusion/issues/11412
+
+statement ok
+/*DML*/CREATE TABLE t60(v0 BIGINT, v1 BIGINT, v2 BOOLEAN, v3 BOOLEAN);
+
+statement ok
+/*DML*/CREATE TABLE t0(v0 DOUBLE, v1 BIGINT);
+
+statement ok
+/*DML*/CREATE TABLE t1(v0 DOUBLE);
+
+query I
+SELECT COUNT(*)
+FROM t1
+NATURAL JOIN t60
+INNER JOIN t0
+ON t60.v1 = t0.v0
+AND t0.v1 > t60.v1;
+----
+0
+
+query I
+SELECT COUNT(*)
+FROM t1
+JOIN t60
+USING (v0)
+INNER JOIN t0
+ON t60.v1 = t0.v0
+AND t0.v1 > t60.v1;
+----
+0
+
+statement ok
+DROP TABLE t60;
+
+statement ok
+DROP TABLE t0;
+
+statement ok
+DROP TABLE t1;
\ No newline at end of file

From 5f0ab3100f7aec71891acd7ecd3415e142d4ea78 Mon Sep 17 00:00:00 2001
From: zhuliquan <zlqlovecode@foxmail.com>
Date: Wed, 24 Jul 2024 01:36:42 +0800
Subject: [PATCH 122/357] test: get file size by func metadata (#11575)

---
 datafusion/core/src/datasource/file_format/csv.rs | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 185f50883b2ce..958d2694aa04a 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -1301,11 +1301,8 @@ mod tests {
             "+-----------------------+",
             "| 50                    |",
             "+-----------------------+"];
-        let file_size = if cfg!(target_os = "windows") {
-            30 // new line on Win is '\r\n'
-        } else {
-            20
-        };
+
+        let file_size = std::fs::metadata("tests/data/one_col.csv")?.len() as usize;
         // A 20-Byte file at most get partitioned into 20 chunks
         let expected_partitions = if n_partitions <= file_size {
             n_partitions

From 268be4553306ae0b85a34c8eb2f44e2c381a49db Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Wed, 24 Jul 2024 01:46:06 +0800
Subject: [PATCH 123/357] Update scalar_functions.md (#11620)

---
 docs/source/user-guide/sql/scalar_functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index d2e012cf4093d..561824772af8c 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -772,7 +772,7 @@ concat(str[, ..., str_n])
 Concatenates multiple strings together with a specified separator.
 
 ```
-concat(separator, str[, ..., str_n])
+concat_ws(separator, str[, ..., str_n])
 ```
 
 #### Arguments

From 65dd364be438c2febb10f68efab0fd7a63586055 Mon Sep 17 00:00:00 2001
From: Sergei Grebnov <sergei.grebnov@gmail.com>
Date: Tue, 23 Jul 2024 10:49:43 -0700
Subject: [PATCH 124/357] Improve unparser MySQL compatibility (#11589)

* Configurable date field extraction style for unparsing (#21)

* Add support for IntervalStyle::MySQL (#18)

* Support alternate format for Int64 unparsing (SIGNED for MySQL) (#22)

* Alternate format support for Timestamp casting (DATETIME for MySQL) (#23)

* Improve

* Fix clippy and docs
---
 datafusion/sql/src/unparser/dialect.rs | 155 +++++++++-
 datafusion/sql/src/unparser/expr.rs    | 397 ++++++++++++++++++++++---
 2 files changed, 507 insertions(+), 45 deletions(-)

diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index ed0cfddc38273..7eca326386fc5 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -15,8 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::sync::Arc;
+
+use arrow_schema::TimeUnit;
 use regex::Regex;
-use sqlparser::{ast, keywords::ALL_KEYWORDS};
+use sqlparser::{
+    ast::{self, Ident, ObjectName, TimezoneInfo},
+    keywords::ALL_KEYWORDS,
+};
 
 /// `Dialect` to use for Unparsing
 ///
@@ -36,8 +42,8 @@ pub trait Dialect: Send + Sync {
         true
     }
 
-    // Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
-    // E.g. Trino, Athena and Dremio does not have DATETIME data type
+    /// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
+    /// E.g. Trino, Athena and Dremio does not have DATETIME data type
     fn use_timestamp_for_date64(&self) -> bool {
         false
     }
@@ -46,23 +52,50 @@ pub trait Dialect: Send + Sync {
         IntervalStyle::PostgresVerbose
     }
 
-    // Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
-    // E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
+    /// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
+    /// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
     fn float64_ast_dtype(&self) -> sqlparser::ast::DataType {
         sqlparser::ast::DataType::Double
     }
 
-    // The SQL type to use for Arrow Utf8 unparsing
-    // Most dialects use VARCHAR, but some, like MySQL, require CHAR
+    /// The SQL type to use for Arrow Utf8 unparsing
+    /// Most dialects use VARCHAR, but some, like MySQL, require CHAR
     fn utf8_cast_dtype(&self) -> ast::DataType {
         ast::DataType::Varchar(None)
     }
 
-    // The SQL type to use for Arrow LargeUtf8 unparsing
-    // Most dialects use TEXT, but some, like MySQL, require CHAR
+    /// The SQL type to use for Arrow LargeUtf8 unparsing
+    /// Most dialects use TEXT, but some, like MySQL, require CHAR
     fn large_utf8_cast_dtype(&self) -> ast::DataType {
         ast::DataType::Text
     }
+
+    /// The date field extract style to use: `DateFieldExtractStyle`
+    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
+        DateFieldExtractStyle::DatePart
+    }
+
+    /// The SQL type to use for Arrow Int64 unparsing
+    /// Most dialects use BigInt, but some, like MySQL, require SIGNED
+    fn int64_cast_dtype(&self) -> ast::DataType {
+        ast::DataType::BigInt(None)
+    }
+
+    /// The SQL type to use for Timestamp unparsing
+    /// Most dialects use Timestamp, but some, like MySQL, require Datetime
+    /// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
+    fn timestamp_cast_dtype(
+        &self,
+        _time_unit: &TimeUnit,
+        tz: &Option<Arc<str>>,
+    ) -> ast::DataType {
+        let tz_info = match tz {
+            Some(_) => TimezoneInfo::WithTimeZone,
+            None => TimezoneInfo::None,
+        };
+
+        ast::DataType::Timestamp(None, tz_info)
+    }
 }
 
 /// `IntervalStyle` to use for unparsing
@@ -80,6 +113,19 @@ pub enum IntervalStyle {
     MySQL,
 }
 
+/// Datetime subfield extraction style for unparsing
+///
+/// `<https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT>`
+/// Different DBMSs follow different standards; popular ones are:
+/// date_part('YEAR', date '2001-02-16')
+/// EXTRACT(YEAR from date '2001-02-16')
+/// Some DBMSs, like Postgres, support both, whereas others like MySQL require EXTRACT.
+#[derive(Clone, Copy, PartialEq)]
+pub enum DateFieldExtractStyle {
+    DatePart,
+    Extract,
+}
+
 pub struct DefaultDialect {}
 
 impl Dialect for DefaultDialect {
@@ -133,6 +179,22 @@ impl Dialect for MySqlDialect {
     fn large_utf8_cast_dtype(&self) -> ast::DataType {
         ast::DataType::Char(None)
     }
+
+    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
+        DateFieldExtractStyle::Extract
+    }
+
+    fn int64_cast_dtype(&self) -> ast::DataType {
+        ast::DataType::Custom(ObjectName(vec![Ident::new("SIGNED")]), vec![])
+    }
+
+    fn timestamp_cast_dtype(
+        &self,
+        _time_unit: &TimeUnit,
+        _tz: &Option<Arc<str>>,
+    ) -> ast::DataType {
+        ast::DataType::Datetime(None)
+    }
 }
 
 pub struct SqliteDialect {}
@@ -151,6 +213,10 @@ pub struct CustomDialect {
     float64_ast_dtype: sqlparser::ast::DataType,
     utf8_cast_dtype: ast::DataType,
     large_utf8_cast_dtype: ast::DataType,
+    date_field_extract_style: DateFieldExtractStyle,
+    int64_cast_dtype: ast::DataType,
+    timestamp_cast_dtype: ast::DataType,
+    timestamp_tz_cast_dtype: ast::DataType,
 }
 
 impl Default for CustomDialect {
@@ -163,6 +229,13 @@ impl Default for CustomDialect {
             float64_ast_dtype: sqlparser::ast::DataType::Double,
             utf8_cast_dtype: ast::DataType::Varchar(None),
             large_utf8_cast_dtype: ast::DataType::Text,
+            date_field_extract_style: DateFieldExtractStyle::DatePart,
+            int64_cast_dtype: ast::DataType::BigInt(None),
+            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
+            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
+                None,
+                TimezoneInfo::WithTimeZone,
+            ),
         }
     }
 }
@@ -206,6 +279,26 @@ impl Dialect for CustomDialect {
     fn large_utf8_cast_dtype(&self) -> ast::DataType {
         self.large_utf8_cast_dtype.clone()
     }
+
+    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
+        self.date_field_extract_style
+    }
+
+    fn int64_cast_dtype(&self) -> ast::DataType {
+        self.int64_cast_dtype.clone()
+    }
+
+    fn timestamp_cast_dtype(
+        &self,
+        _time_unit: &TimeUnit,
+        tz: &Option<Arc<str>>,
+    ) -> ast::DataType {
+        if tz.is_some() {
+            self.timestamp_tz_cast_dtype.clone()
+        } else {
+            self.timestamp_cast_dtype.clone()
+        }
+    }
 }
 
 /// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
@@ -230,6 +323,10 @@ pub struct CustomDialectBuilder {
     float64_ast_dtype: sqlparser::ast::DataType,
     utf8_cast_dtype: ast::DataType,
     large_utf8_cast_dtype: ast::DataType,
+    date_field_extract_style: DateFieldExtractStyle,
+    int64_cast_dtype: ast::DataType,
+    timestamp_cast_dtype: ast::DataType,
+    timestamp_tz_cast_dtype: ast::DataType,
 }
 
 impl Default for CustomDialectBuilder {
@@ -248,6 +345,13 @@ impl CustomDialectBuilder {
             float64_ast_dtype: sqlparser::ast::DataType::Double,
             utf8_cast_dtype: ast::DataType::Varchar(None),
             large_utf8_cast_dtype: ast::DataType::Text,
+            date_field_extract_style: DateFieldExtractStyle::DatePart,
+            int64_cast_dtype: ast::DataType::BigInt(None),
+            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
+            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
+                None,
+                TimezoneInfo::WithTimeZone,
+            ),
         }
     }
 
@@ -260,6 +364,10 @@ impl CustomDialectBuilder {
             float64_ast_dtype: self.float64_ast_dtype,
             utf8_cast_dtype: self.utf8_cast_dtype,
             large_utf8_cast_dtype: self.large_utf8_cast_dtype,
+            date_field_extract_style: self.date_field_extract_style,
+            int64_cast_dtype: self.int64_cast_dtype,
+            timestamp_cast_dtype: self.timestamp_cast_dtype,
+            timestamp_tz_cast_dtype: self.timestamp_tz_cast_dtype,
         }
     }
 
@@ -293,6 +401,7 @@ impl CustomDialectBuilder {
         self
     }
 
+    /// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
     pub fn with_float64_ast_dtype(
         mut self,
         float64_ast_dtype: sqlparser::ast::DataType,
@@ -301,11 +410,13 @@ impl CustomDialectBuilder {
         self
     }
 
+    /// Customize the dialect with a specific SQL type for Utf8 casting: VARCHAR, CHAR, etc.
     pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
         self.utf8_cast_dtype = utf8_cast_dtype;
         self
     }
 
+    /// Customize the dialect with a specific SQL type for LargeUtf8 casting: TEXT, CHAR, etc.
     pub fn with_large_utf8_cast_dtype(
         mut self,
         large_utf8_cast_dtype: ast::DataType,
@@ -313,4 +424,30 @@ impl CustomDialectBuilder {
         self.large_utf8_cast_dtype = large_utf8_cast_dtype;
         self
     }
+
+    /// Customize the dialect with a specific date field extract style listed in `DateFieldExtractStyle`
+    pub fn with_date_field_extract_style(
+        mut self,
+        date_field_extract_style: DateFieldExtractStyle,
+    ) -> Self {
+        self.date_field_extract_style = date_field_extract_style;
+        self
+    }
+
+    /// Customize the dialect with a specific SQL type for Int64 casting: BigInt, SIGNED, etc.
+    pub fn with_int64_cast_dtype(mut self, int64_cast_dtype: ast::DataType) -> Self {
+        self.int64_cast_dtype = int64_cast_dtype;
+        self
+    }
+
+    /// Customize the dialect with a specific SQL type for Timestamp casting: Timestamp, Datetime, etc.
+    pub fn with_timestamp_cast_dtype(
+        mut self,
+        timestamp_cast_dtype: ast::DataType,
+        timestamp_tz_cast_dtype: ast::DataType,
+    ) -> Self {
+        self.timestamp_cast_dtype = timestamp_cast_dtype;
+        self.timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
+        self
+    }
 }
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 2f7854c1a183c..f4ea44f37d788 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -16,6 +16,13 @@
 // under the License.
 
 use core::fmt;
+
+use datafusion_expr::ScalarUDF;
+use sqlparser::ast::Value::SingleQuotedString;
+use sqlparser::ast::{
+    self, BinaryOperator, Expr as AstExpr, Function, FunctionArg, Ident, Interval,
+    TimezoneInfo, UnaryOperator,
+};
 use std::sync::Arc;
 use std::{fmt::Display, vec};
 
@@ -28,12 +35,6 @@ use arrow_array::types::{
 };
 use arrow_array::{Date32Array, Date64Array, PrimitiveArray};
 use arrow_schema::DataType;
-use sqlparser::ast::Value::SingleQuotedString;
-use sqlparser::ast::{
-    self, BinaryOperator, Expr as AstExpr, Function, FunctionArg, Ident, Interval,
-    TimezoneInfo, UnaryOperator,
-};
-
 use datafusion_common::{
     internal_datafusion_err, internal_err, not_impl_err, plan_err, Column, Result,
     ScalarValue,
@@ -43,7 +44,7 @@ use datafusion_expr::{
     Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast,
 };
 
-use super::dialect::IntervalStyle;
+use super::dialect::{DateFieldExtractStyle, IntervalStyle};
 use super::Unparser;
 
 /// DataFusion's Exprs can represent either an `Expr` or an `OrderByExpr`
@@ -149,6 +150,12 @@ impl Unparser<'_> {
             Expr::ScalarFunction(ScalarFunction { func, args }) => {
                 let func_name = func.name();
 
+                if let Some(expr) =
+                    self.scalar_function_to_sql_overrides(func_name, func, args)
+                {
+                    return Ok(expr);
+                }
+
                 let args = args
                     .iter()
                     .map(|e| {
@@ -545,6 +552,38 @@ impl Unparser<'_> {
         }
     }
 
+    fn scalar_function_to_sql_overrides(
+        &self,
+        func_name: &str,
+        _func: &Arc<ScalarUDF>,
+        args: &[Expr],
+    ) -> Option<ast::Expr> {
+        if func_name.to_lowercase() == "date_part"
+            && self.dialect.date_field_extract_style() == DateFieldExtractStyle::Extract
+            && args.len() == 2
+        {
+            let date_expr = self.expr_to_sql(&args[1]).ok()?;
+
+            if let Expr::Literal(ScalarValue::Utf8(Some(field))) = &args[0] {
+                let field = match field.to_lowercase().as_str() {
+                    "year" => ast::DateTimeField::Year,
+                    "month" => ast::DateTimeField::Month,
+                    "day" => ast::DateTimeField::Day,
+                    "hour" => ast::DateTimeField::Hour,
+                    "minute" => ast::DateTimeField::Minute,
+                    "second" => ast::DateTimeField::Second,
+                    _ => return None,
+                };
+
+                return Some(ast::Expr::Extract {
+                    field,
+                    expr: Box::new(date_expr),
+                });
+            }
+        }
+        None
+    }
+
     fn ast_type_for_date64_in_cast(&self) -> ast::DataType {
         if self.dialect.use_timestamp_for_date64() {
             ast::DataType::Timestamp(None, ast::TimezoneInfo::None)
@@ -1105,6 +1144,131 @@ impl Unparser<'_> {
         }
     }
 
+    /// MySQL requires INTERVAL sql to be in the format: INTERVAL 1 YEAR + INTERVAL 1 MONTH + INTERVAL 1 DAY etc
+    /// `<https://dev.mysql.com/doc/refman/8.4/en/expressions.html#temporal-intervals>`
+    /// Interval sequence can't be wrapped in brackets - (INTERVAL 1 YEAR + INTERVAL 1 MONTH ...) so we need to generate
+    /// a single INTERVAL expression so it works correct for interval substraction cases
+    /// MySQL supports the DAY_MICROSECOND unit type (format is DAYS HOURS:MINUTES:SECONDS.MICROSECONDS), but it is not supported by sqlparser
+    /// so we calculate the best single interval to represent the provided duration
+    fn interval_to_mysql_expr(
+        &self,
+        months: i32,
+        days: i32,
+        microseconds: i64,
+    ) -> Result<ast::Expr> {
+        // MONTH only
+        if months != 0 && days == 0 && microseconds == 0 {
+            let interval = Interval {
+                value: Box::new(ast::Expr::Value(ast::Value::Number(
+                    months.to_string(),
+                    false,
+                ))),
+                leading_field: Some(ast::DateTimeField::Month),
+                leading_precision: None,
+                last_field: None,
+                fractional_seconds_precision: None,
+            };
+            return Ok(ast::Expr::Interval(interval));
+        } else if months != 0 {
+            return not_impl_err!("Unsupported Interval scalar with both Month and DayTime for IntervalStyle::MySQL");
+        }
+
+        // DAY only
+        if microseconds == 0 {
+            let interval = Interval {
+                value: Box::new(ast::Expr::Value(ast::Value::Number(
+                    days.to_string(),
+                    false,
+                ))),
+                leading_field: Some(ast::DateTimeField::Day),
+                leading_precision: None,
+                last_field: None,
+                fractional_seconds_precision: None,
+            };
+            return Ok(ast::Expr::Interval(interval));
+        }
+
+        // calculate the best single interval to represent the provided days and microseconds
+
+        let microseconds = microseconds + (days as i64 * 24 * 60 * 60 * 1_000_000);
+
+        if microseconds % 1_000_000 != 0 {
+            let interval = Interval {
+                value: Box::new(ast::Expr::Value(ast::Value::Number(
+                    microseconds.to_string(),
+                    false,
+                ))),
+                leading_field: Some(ast::DateTimeField::Microsecond),
+                leading_precision: None,
+                last_field: None,
+                fractional_seconds_precision: None,
+            };
+            return Ok(ast::Expr::Interval(interval));
+        }
+
+        let secs = microseconds / 1_000_000;
+
+        if secs % 60 != 0 {
+            let interval = Interval {
+                value: Box::new(ast::Expr::Value(ast::Value::Number(
+                    secs.to_string(),
+                    false,
+                ))),
+                leading_field: Some(ast::DateTimeField::Second),
+                leading_precision: None,
+                last_field: None,
+                fractional_seconds_precision: None,
+            };
+            return Ok(ast::Expr::Interval(interval));
+        }
+
+        let mins = secs / 60;
+
+        if mins % 60 != 0 {
+            let interval = Interval {
+                value: Box::new(ast::Expr::Value(ast::Value::Number(
+                    mins.to_string(),
+                    false,
+                ))),
+                leading_field: Some(ast::DateTimeField::Minute),
+                leading_precision: None,
+                last_field: None,
+                fractional_seconds_precision: None,
+            };
+            return Ok(ast::Expr::Interval(interval));
+        }
+
+        let hours = mins / 60;
+
+        if hours % 24 != 0 {
+            let interval = Interval {
+                value: Box::new(ast::Expr::Value(ast::Value::Number(
+                    hours.to_string(),
+                    false,
+                ))),
+                leading_field: Some(ast::DateTimeField::Hour),
+                leading_precision: None,
+                last_field: None,
+                fractional_seconds_precision: None,
+            };
+            return Ok(ast::Expr::Interval(interval));
+        }
+
+        let days = hours / 24;
+
+        let interval = Interval {
+            value: Box::new(ast::Expr::Value(ast::Value::Number(
+                days.to_string(),
+                false,
+            ))),
+            leading_field: Some(ast::DateTimeField::Day),
+            leading_precision: None,
+            last_field: None,
+            fractional_seconds_precision: None,
+        };
+        Ok(ast::Expr::Interval(interval))
+    }
+
     fn interval_scalar_to_sql(&self, v: &ScalarValue) -> Result<ast::Expr> {
         match self.dialect.interval_style() {
             IntervalStyle::PostgresVerbose => {
@@ -1127,10 +1291,7 @@ impl Unparser<'_> {
             }
             // If the interval standard is SQLStandard, implement a simple unparse logic
             IntervalStyle::SQLStandard => match v {
-                ScalarValue::IntervalYearMonth(v) => {
-                    let Some(v) = v else {
-                        return Ok(ast::Expr::Value(ast::Value::Null));
-                    };
+                ScalarValue::IntervalYearMonth(Some(v)) => {
                     let interval = Interval {
                         value: Box::new(ast::Expr::Value(
                             ast::Value::SingleQuotedString(v.to_string()),
@@ -1142,10 +1303,7 @@ impl Unparser<'_> {
                     };
                     Ok(ast::Expr::Interval(interval))
                 }
-                ScalarValue::IntervalDayTime(v) => {
-                    let Some(v) = v else {
-                        return Ok(ast::Expr::Value(ast::Value::Null));
-                    };
+                ScalarValue::IntervalDayTime(Some(v)) => {
                     let days = v.days;
                     let secs = v.milliseconds / 1_000;
                     let mins = secs / 60;
@@ -1168,11 +1326,7 @@ impl Unparser<'_> {
                     };
                     Ok(ast::Expr::Interval(interval))
                 }
-                ScalarValue::IntervalMonthDayNano(v) => {
-                    let Some(v) = v else {
-                        return Ok(ast::Expr::Value(ast::Value::Null));
-                    };
-
+                ScalarValue::IntervalMonthDayNano(Some(v)) => {
                     if v.months >= 0 && v.days == 0 && v.nanoseconds == 0 {
                         let interval = Interval {
                             value: Box::new(ast::Expr::Value(
@@ -1184,10 +1338,7 @@ impl Unparser<'_> {
                             fractional_seconds_precision: None,
                         };
                         Ok(ast::Expr::Interval(interval))
-                    } else if v.months == 0
-                        && v.days >= 0
-                        && v.nanoseconds % 1_000_000 == 0
-                    {
+                    } else if v.months == 0 && v.nanoseconds % 1_000_000 == 0 {
                         let days = v.days;
                         let secs = v.nanoseconds / 1_000_000_000;
                         let mins = secs / 60;
@@ -1214,11 +1365,29 @@ impl Unparser<'_> {
                         not_impl_err!("Unsupported IntervalMonthDayNano scalar with both Month and DayTime for IntervalStyle::SQLStandard")
                     }
                 }
-                _ => Ok(ast::Expr::Value(ast::Value::Null)),
+                _ => not_impl_err!(
+                    "Unsupported ScalarValue for Interval conversion: {v:?}"
+                ),
+            },
+            IntervalStyle::MySQL => match v {
+                ScalarValue::IntervalYearMonth(Some(v)) => {
+                    self.interval_to_mysql_expr(*v, 0, 0)
+                }
+                ScalarValue::IntervalDayTime(Some(v)) => {
+                    self.interval_to_mysql_expr(0, v.days, v.milliseconds as i64 * 1_000)
+                }
+                ScalarValue::IntervalMonthDayNano(Some(v)) => {
+                    if v.nanoseconds % 1_000 != 0 {
+                        return not_impl_err!(
+                            "Unsupported IntervalMonthDayNano scalar with nanoseconds precision for IntervalStyle::MySQL"
+                        );
+                    }
+                    self.interval_to_mysql_expr(v.months, v.days, v.nanoseconds / 1_000)
+                }
+                _ => not_impl_err!(
+                    "Unsupported ScalarValue for Interval conversion: {v:?}"
+                ),
             },
-            IntervalStyle::MySQL => {
-                not_impl_err!("Unsupported interval scalar for IntervalStyle::MySQL")
-            }
         }
     }
 
@@ -1231,7 +1400,7 @@ impl Unparser<'_> {
             DataType::Int8 => Ok(ast::DataType::TinyInt(None)),
             DataType::Int16 => Ok(ast::DataType::SmallInt(None)),
             DataType::Int32 => Ok(ast::DataType::Integer(None)),
-            DataType::Int64 => Ok(ast::DataType::BigInt(None)),
+            DataType::Int64 => Ok(self.dialect.int64_cast_dtype()),
             DataType::UInt8 => Ok(ast::DataType::UnsignedTinyInt(None)),
             DataType::UInt16 => Ok(ast::DataType::UnsignedSmallInt(None)),
             DataType::UInt32 => Ok(ast::DataType::UnsignedInteger(None)),
@@ -1241,13 +1410,8 @@ impl Unparser<'_> {
             }
             DataType::Float32 => Ok(ast::DataType::Float(None)),
             DataType::Float64 => Ok(self.dialect.float64_ast_dtype()),
-            DataType::Timestamp(_, tz) => {
-                let tz_info = match tz {
-                    Some(_) => TimezoneInfo::WithTimeZone,
-                    None => TimezoneInfo::None,
-                };
-
-                Ok(ast::DataType::Timestamp(None, tz_info))
+            DataType::Timestamp(time_unit, tz) => {
+                Ok(self.dialect.timestamp_cast_dtype(time_unit, tz))
             }
             DataType::Date32 => Ok(ast::DataType::Date),
             DataType::Date64 => Ok(self.ast_type_for_date64_in_cast()),
@@ -1335,6 +1499,7 @@ mod tests {
     use arrow::datatypes::TimeUnit;
     use arrow::datatypes::{Field, Schema};
     use arrow_schema::DataType::Int8;
+    use ast::ObjectName;
     use datafusion_common::TableReference;
     use datafusion_expr::{
         case, col, cube, exists, grouping_set, interval_datetime_lit,
@@ -1885,6 +2050,11 @@ mod tests {
                 IntervalStyle::SQLStandard,
                 "INTERVAL '1 12:0:0.000' DAY TO SECOND",
             ),
+            (
+                interval_month_day_nano_lit("-1.5 DAY"),
+                IntervalStyle::SQLStandard,
+                "INTERVAL '-1 -12:0:0.000' DAY TO SECOND",
+            ),
             (
                 interval_month_day_nano_lit("1.51234 DAY"),
                 IntervalStyle::SQLStandard,
@@ -1949,6 +2119,46 @@ mod tests {
                 IntervalStyle::PostgresVerbose,
                 r#"INTERVAL '1 YEARS 7 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
             ),
+            (
+                interval_year_month_lit("1 YEAR 1 MONTH"),
+                IntervalStyle::MySQL,
+                r#"INTERVAL 13 MONTH"#,
+            ),
+            (
+                interval_month_day_nano_lit("1 YEAR -1 MONTH"),
+                IntervalStyle::MySQL,
+                r#"INTERVAL 11 MONTH"#,
+            ),
+            (
+                interval_month_day_nano_lit("15 DAY"),
+                IntervalStyle::MySQL,
+                r#"INTERVAL 15 DAY"#,
+            ),
+            (
+                interval_month_day_nano_lit("-40 HOURS"),
+                IntervalStyle::MySQL,
+                r#"INTERVAL -40 HOUR"#,
+            ),
+            (
+                interval_datetime_lit("-1.5 DAY 1 HOUR"),
+                IntervalStyle::MySQL,
+                "INTERVAL -35 HOUR",
+            ),
+            (
+                interval_datetime_lit("1000000 DAY 1.5 HOUR 10 MINUTE 20 SECOND"),
+                IntervalStyle::MySQL,
+                r#"INTERVAL 86400006020 SECOND"#,
+            ),
+            (
+                interval_year_month_lit("0 DAY 0 HOUR"),
+                IntervalStyle::MySQL,
+                r#"INTERVAL 0 DAY"#,
+            ),
+            (
+                interval_month_day_nano_lit("-1296000000 SECOND"),
+                IntervalStyle::MySQL,
+                r#"INTERVAL -15000 DAY"#,
+            ),
         ];
 
         for (value, style, expected) in tests {
@@ -1994,4 +2204,119 @@ mod tests {
         }
         Ok(())
     }
+
+    #[test]
+    fn custom_dialect_with_date_field_extract_style() -> Result<()> {
+        for (extract_style, unit, expected) in [
+            (
+                DateFieldExtractStyle::DatePart,
+                "YEAR",
+                "date_part('YEAR', x)",
+            ),
+            (
+                DateFieldExtractStyle::Extract,
+                "YEAR",
+                "EXTRACT(YEAR FROM x)",
+            ),
+            (
+                DateFieldExtractStyle::DatePart,
+                "MONTH",
+                "date_part('MONTH', x)",
+            ),
+            (
+                DateFieldExtractStyle::Extract,
+                "MONTH",
+                "EXTRACT(MONTH FROM x)",
+            ),
+            (
+                DateFieldExtractStyle::DatePart,
+                "DAY",
+                "date_part('DAY', x)",
+            ),
+            (DateFieldExtractStyle::Extract, "DAY", "EXTRACT(DAY FROM x)"),
+        ] {
+            let dialect = CustomDialectBuilder::new()
+                .with_date_field_extract_style(extract_style)
+                .build();
+
+            let unparser = Unparser::new(&dialect);
+            let expr = ScalarUDF::new_from_impl(
+                datafusion_functions::datetime::date_part::DatePartFunc::new(),
+            )
+            .call(vec![Expr::Literal(ScalarValue::new_utf8(unit)), col("x")]);
+
+            let ast = unparser.expr_to_sql(&expr)?;
+            let actual = format!("{}", ast);
+
+            assert_eq!(actual, expected);
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn custom_dialect_with_int64_cast_dtype() -> Result<()> {
+        let default_dialect = CustomDialectBuilder::new().build();
+        let mysql_dialect = CustomDialectBuilder::new()
+            .with_int64_cast_dtype(ast::DataType::Custom(
+                ObjectName(vec![Ident::new("SIGNED")]),
+                vec![],
+            ))
+            .build();
+
+        for (dialect, identifier) in
+            [(default_dialect, "BIGINT"), (mysql_dialect, "SIGNED")]
+        {
+            let unparser = Unparser::new(&dialect);
+            let expr = Expr::Cast(Cast {
+                expr: Box::new(col("a")),
+                data_type: DataType::Int64,
+            });
+            let ast = unparser.expr_to_sql(&expr)?;
+
+            let actual = format!("{}", ast);
+            let expected = format!(r#"CAST(a AS {identifier})"#);
+
+            assert_eq!(actual, expected);
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn custom_dialect_with_teimstamp_cast_dtype() -> Result<()> {
+        let default_dialect = CustomDialectBuilder::new().build();
+        let mysql_dialect = CustomDialectBuilder::new()
+            .with_timestamp_cast_dtype(
+                ast::DataType::Datetime(None),
+                ast::DataType::Datetime(None),
+            )
+            .build();
+
+        let timestamp = DataType::Timestamp(TimeUnit::Nanosecond, None);
+        let timestamp_with_tz =
+            DataType::Timestamp(TimeUnit::Nanosecond, Some("+08:00".into()));
+
+        for (dialect, data_type, identifier) in [
+            (&default_dialect, &timestamp, "TIMESTAMP"),
+            (
+                &default_dialect,
+                &timestamp_with_tz,
+                "TIMESTAMP WITH TIME ZONE",
+            ),
+            (&mysql_dialect, &timestamp, "DATETIME"),
+            (&mysql_dialect, &timestamp_with_tz, "DATETIME"),
+        ] {
+            let unparser = Unparser::new(dialect);
+            let expr = Expr::Cast(Cast {
+                expr: Box::new(col("a")),
+                data_type: data_type.clone(),
+            });
+            let ast = unparser.expr_to_sql(&expr)?;
+
+            let actual = format!("{}", ast);
+            let expected = format!(r#"CAST(a AS {identifier})"#);
+
+            assert_eq!(actual, expected);
+        }
+        Ok(())
+    }
 }

From 9f74dcc999f648132aca914bc120527e9cf97d48 Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Tue, 23 Jul 2024 10:56:47 -0700
Subject: [PATCH 125/357] Change default Parquet writer settings to match
 arrow-rs (except for compression & statistics) (#11558)

* feat(11367): define the default session's writer props, both with and without the parquet feature enabled

* feat(11367): map the defaults that already align between datafusion and arrow-rs

* feat(11367): move datafusion to use extern parquet defaults for most writer settings

* chore(11367): denote what are the remaining misaligned defaults, for further discussion

* chore: fix doc links

* chore: update the readme demonstrating the default config setting

* refactor(11367): remove layer of indirection using the conditional parquet feature, instead rely upon regression testing
---
 datafusion/common/src/config.rs               | 15 ++--
 .../common/src/file_options/parquet_writer.rs | 72 -------------------
 .../test_files/information_schema.slt         | 18 ++---
 docs/source/user-guide/configs.md             | 10 +--
 4 files changed, 23 insertions(+), 92 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 3cbe14cb558eb..6e007ded03888 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -374,18 +374,21 @@ config_namespace! {
 
         /// (writing) Sets parquet writer version
         /// valid values are "1.0" and "2.0"
-        pub writer_version: String, default = "1.0".into()
+        pub writer_version: String, default = "1.0".to_string()
 
         /// (writing) Sets default parquet compression codec.
         /// Valid values are: uncompressed, snappy, gzip(level),
         /// lzo, brotli(level), lz4, zstd(level), and lz4_raw.
         /// These values are not case sensitive. If NULL, uses
         /// default parquet writer setting
+        ///
+        /// Note that this default setting is not the same as
+        /// the default parquet writer setting.
         pub compression: Option<String>, default = Some("zstd(3)".into())
 
         /// (writing) Sets if dictionary encoding is enabled. If NULL, uses
         /// default parquet writer setting
-        pub dictionary_enabled: Option<bool>, default = None
+        pub dictionary_enabled: Option<bool>, default = Some(true)
 
         /// (writing) Sets best effort maximum dictionary page size, in bytes
         pub dictionary_page_size_limit: usize, default = 1024 * 1024
@@ -398,21 +401,21 @@ config_namespace! {
 
         /// (writing) Sets max statistics size for any column. If NULL, uses
         /// default parquet writer setting
-        pub max_statistics_size: Option<usize>, default = None
+        pub max_statistics_size: Option<usize>, default = Some(4096)
 
         /// (writing) Target maximum number of rows in each row group (defaults to 1M
         /// rows). Writing larger row groups requires more memory to write, but
         /// can get better compression and be faster to read.
-        pub max_row_group_size: usize, default = 1024 * 1024
+        pub max_row_group_size: usize, default =  1024 * 1024
 
         /// (writing) Sets "created by" property
         pub created_by: String, default = concat!("datafusion version ", env!("CARGO_PKG_VERSION")).into()
 
         /// (writing) Sets column index truncate length
-        pub column_index_truncate_length: Option<usize>, default = None
+        pub column_index_truncate_length: Option<usize>, default = Some(64)
 
         /// (writing) Sets best effort maximum number of rows in data page
-        pub data_page_row_count_limit: usize, default = usize::MAX
+        pub data_page_row_count_limit: usize, default = 20_000
 
         /// (writing)  Sets default encoding for any column.
         /// Valid values are: plain, plain_dictionary, rle,
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index abe7db2009a25..e63a7e5ef78d8 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -644,56 +644,6 @@ mod tests {
             "datafusion's default is zstd"
         );
 
-        // TODO: data_page_row_count_limit defaults do not match
-        // refer to https://github.com/apache/datafusion/issues/11367
-        assert_eq!(
-            default_writer_props.data_page_row_count_limit(),
-            20_000,
-            "extern parquet's default data_page_row_count_limit is 20_000"
-        );
-        assert_eq!(
-            from_datafusion_defaults.data_page_row_count_limit(),
-            usize::MAX,
-            "datafusion's default is usize::MAX"
-        );
-
-        // TODO: column_index_truncate_length do not match
-        // refer to https://github.com/apache/datafusion/issues/11367
-        assert_eq!(
-            default_writer_props.column_index_truncate_length(),
-            Some(64),
-            "extern parquet's default is 64"
-        );
-        assert_eq!(
-            from_datafusion_defaults.column_index_truncate_length(),
-            None,
-            "datafusion's default is None"
-        );
-
-        // The next few examples are where datafusion's default is None.
-        // But once datafusion's TableParquetOptions are converted to a WriterProperties,
-        // then we get the extern parquet's defaults.
-        //
-        // In other words, we do not get indeterminate behavior in the output writer props.
-        // But this is only because we use the extern parquet's defaults when we leave
-        // the datafusion setting as None.
-
-        // datafusion's `None` for Option<bool> => becomes parquet's true
-        // TODO: should this be changed?
-        // refer to https://github.com/apache/datafusion/issues/11367
-        assert!(
-            default_writer_props.dictionary_enabled(&"default".into()),
-            "extern parquet's default is true"
-        );
-        assert_eq!(
-            default_table_writer_opts.global.dictionary_enabled, None,
-            "datafusion's has no default"
-        );
-        assert!(
-            from_datafusion_defaults.dictionary_enabled(&"default".into()),
-            "should see the extern parquet's default over-riding datafusion's None",
-        );
-
         // datafusion's `None` for Option<String> => becomes parquet's EnabledStatistics::Page
         // TODO: should this be changed?
         // refer to https://github.com/apache/datafusion/issues/11367
@@ -712,35 +662,13 @@ mod tests {
             "should see the extern parquet's default over-riding datafusion's None",
         );
 
-        // datafusion's `None` for Option<usize> => becomes parquet's 4096
-        // TODO: should this be changed?
-        // refer to https://github.com/apache/datafusion/issues/11367
-        assert_eq!(
-            default_writer_props.max_statistics_size(&"default".into()),
-            4096,
-            "extern parquet's default is 4096"
-        );
-        assert_eq!(
-            default_table_writer_opts.global.max_statistics_size, None,
-            "datafusion's has no default"
-        );
-        assert_eq!(
-            default_writer_props.max_statistics_size(&"default".into()),
-            4096,
-            "should see the extern parquet's default over-riding datafusion's None",
-        );
-
         // Confirm all other settings are equal.
         // First resolve the known discrepancies, (set as the same).
         // TODO: once we fix the above mis-matches, we should be able to remove this.
         let mut from_extern_parquet =
             session_config_from_writer_props(&default_writer_props);
         from_extern_parquet.global.compression = Some("zstd(3)".into());
-        from_extern_parquet.global.data_page_row_count_limit = usize::MAX;
-        from_extern_parquet.global.column_index_truncate_length = None;
-        from_extern_parquet.global.dictionary_enabled = None;
         from_extern_parquet.global.statistics_enabled = None;
-        from_extern_parquet.global.max_statistics_size = None;
 
         // Expected: the remaining should match
         let same_created_by = default_table_writer_opts.global.created_by.clone(); // we expect these to be different
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 1c6ffd44b1efc..ddacf1cc6a79c 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -184,17 +184,17 @@ datafusion.execution.parquet.bloom_filter_fpp NULL
 datafusion.execution.parquet.bloom_filter_ndv NULL
 datafusion.execution.parquet.bloom_filter_on_read true
 datafusion.execution.parquet.bloom_filter_on_write false
-datafusion.execution.parquet.column_index_truncate_length NULL
+datafusion.execution.parquet.column_index_truncate_length 64
 datafusion.execution.parquet.compression zstd(3)
 datafusion.execution.parquet.created_by datafusion
-datafusion.execution.parquet.data_page_row_count_limit 18446744073709551615
+datafusion.execution.parquet.data_page_row_count_limit 20000
 datafusion.execution.parquet.data_pagesize_limit 1048576
-datafusion.execution.parquet.dictionary_enabled NULL
+datafusion.execution.parquet.dictionary_enabled true
 datafusion.execution.parquet.dictionary_page_size_limit 1048576
 datafusion.execution.parquet.enable_page_index true
 datafusion.execution.parquet.encoding NULL
 datafusion.execution.parquet.max_row_group_size 1048576
-datafusion.execution.parquet.max_statistics_size NULL
+datafusion.execution.parquet.max_statistics_size 4096
 datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2
 datafusion.execution.parquet.maximum_parallel_row_group_writers 1
 datafusion.execution.parquet.metadata_size_hint NULL
@@ -269,17 +269,17 @@ datafusion.execution.parquet.bloom_filter_fpp NULL (writing) Sets bloom filter f
 datafusion.execution.parquet.bloom_filter_ndv NULL (writing) Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting
 datafusion.execution.parquet.bloom_filter_on_read true (writing) Use any available bloom filters when reading parquet files
 datafusion.execution.parquet.bloom_filter_on_write false (writing) Write bloom filters for all columns when creating parquet files
-datafusion.execution.parquet.column_index_truncate_length NULL (writing) Sets column index truncate length
-datafusion.execution.parquet.compression zstd(3) (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.column_index_truncate_length 64 (writing) Sets column index truncate length
+datafusion.execution.parquet.compression zstd(3) (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting Note that this default setting is not the same as the default parquet writer setting.
 datafusion.execution.parquet.created_by datafusion (writing) Sets "created by" property
-datafusion.execution.parquet.data_page_row_count_limit 18446744073709551615 (writing) Sets best effort maximum number of rows in data page
+datafusion.execution.parquet.data_page_row_count_limit 20000 (writing) Sets best effort maximum number of rows in data page
 datafusion.execution.parquet.data_pagesize_limit 1048576 (writing) Sets best effort maximum size of data page in bytes
-datafusion.execution.parquet.dictionary_enabled NULL (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.dictionary_enabled true (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting
 datafusion.execution.parquet.dictionary_page_size_limit 1048576 (writing) Sets best effort maximum dictionary page size, in bytes
 datafusion.execution.parquet.enable_page_index true (reading) If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded.
 datafusion.execution.parquet.encoding NULL (writing)  Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting
 datafusion.execution.parquet.max_row_group_size 1048576 (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.
-datafusion.execution.parquet.max_statistics_size NULL (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.max_statistics_size 4096 (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting
 datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
 datafusion.execution.parquet.maximum_parallel_row_group_writers 1 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
 datafusion.execution.parquet.metadata_size_hint NULL (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 5e5de016e375e..e4b849cd28bb5 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -59,15 +59,15 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.data_pagesize_limit                        | 1048576                   | (writing) Sets best effort maximum size of data page in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.execution.parquet.write_batch_size                           | 1024                      | (writing) Sets write_batch_size in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | datafusion.execution.parquet.writer_version                             | 1.0                       | (writing) Sets parquet writer version valid values are "1.0" and "2.0"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
-| datafusion.execution.parquet.compression                                | zstd(3)                   | (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.execution.parquet.dictionary_enabled                         | NULL                      | (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.parquet.compression                                | zstd(3)                   | (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting Note that this default setting is not the same as the default parquet writer setting.                                                                                                                                                                                                                                                                       |
+| datafusion.execution.parquet.dictionary_enabled                         | true                      | (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.execution.parquet.dictionary_page_size_limit                 | 1048576                   | (writing) Sets best effort maximum dictionary page size, in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | datafusion.execution.parquet.statistics_enabled                         | NULL                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                 |
-| datafusion.execution.parquet.max_statistics_size                        | NULL                      | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.parquet.max_statistics_size                        | 4096                      | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                         |
 | datafusion.execution.parquet.created_by                                 | datafusion version 40.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.column_index_truncate_length               | NULL                      | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.execution.parquet.data_page_row_count_limit                  | 18446744073709551615      | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.execution.parquet.encoding                                   | NULL                      | (writing) Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.bloom_filter_on_read                       | true                      | (writing) Use any available bloom filters when reading parquet files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.bloom_filter_on_write                      | false                     | (writing) Write bloom filters for all columns when creating parquet files                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |

From 6d3521738b5b3cb6f02ed3c9266431b714647330 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Wed, 24 Jul 2024 01:57:02 +0800
Subject: [PATCH 126/357] Push scalar functions  into cross join (#11528)

* UDF can be join condition

* Fix test
---
 datafusion/optimizer/src/push_down_filter.rs |  6 ++---
 datafusion/sqllogictest/test_files/joins.slt | 23 +++++++++-----------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index a22f2e83e2110..ad9be449d9ab7 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -261,8 +261,7 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result<bool> {
         | Expr::InSubquery(_)
         | Expr::ScalarSubquery(_)
         | Expr::OuterReferenceColumn(_, _)
-        | Expr::Unnest(_)
-        | Expr::ScalarFunction(_) => {
+        | Expr::Unnest(_) => {
             is_evaluate = false;
             Ok(TreeNodeRecursion::Stop)
         }
@@ -284,7 +283,8 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result<bool> {
         | Expr::Case(_)
         | Expr::Cast(_)
         | Expr::TryCast(_)
-        | Expr::InList { .. } => Ok(TreeNodeRecursion::Continue),
+        | Expr::InList { .. }
+        | Expr::ScalarFunction(_) => Ok(TreeNodeRecursion::Continue),
         Expr::Sort(_)
         | Expr::AggregateFunction(_)
         | Expr::WindowFunction(_)
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index b9897f81a107a..441ccb7d99d5b 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3532,7 +3532,7 @@ physical_plan
 03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 04)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
-# Currently datafusion cannot pushdown filter conditions with scalar UDF into
+# Currently datafusion can pushdown filter conditions with scalar UDF into
 # cross join.
 query TT
 EXPLAIN SELECT *
@@ -3540,19 +3540,16 @@ FROM annotated_data as t1, annotated_data as t2
 WHERE EXAMPLE(t1.a, t2.a) > 3
 ----
 logical_plan
-01)Filter: example(CAST(t1.a AS Float64), CAST(t2.a AS Float64)) > Float64(3)
-02)--CrossJoin:
-03)----SubqueryAlias: t1
-04)------TableScan: annotated_data projection=[a0, a, b, c, d]
-05)----SubqueryAlias: t2
-06)------TableScan: annotated_data projection=[a0, a, b, c, d]
+01)Inner Join:  Filter: example(CAST(t1.a AS Float64), CAST(t2.a AS Float64)) > Float64(3)
+02)--SubqueryAlias: t1
+03)----TableScan: annotated_data projection=[a0, a, b, c, d]
+04)--SubqueryAlias: t2
+05)----TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=2
-02)--FilterExec: example(CAST(a@1 AS Float64), CAST(a@6 AS Float64)) > 3
-03)----CrossJoinExec
-04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
-05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+01)NestedLoopJoinExec: join_type=Inner, filter=example(CAST(a@0 AS Float64), CAST(a@1 AS Float64)) > 3
+02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+04)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
 ####
 # Config teardown

From 6d8bd2cb670ec003929871b619aadc3967457ac1 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Tue, 23 Jul 2024 10:57:41 -0700
Subject: [PATCH 127/357] Implement physical plan serialization for csv COPY
 plans , add `as_any`, `Debug` to `FileFormatFactory` (#11588)

* Implement physical plan serialization for COPY plans CsvLogicalExtensionCodec

* fix check

* optimize code

* optimize code
---
 .../examples/custom_file_format.rs            |   6 +-
 .../core/src/datasource/file_format/arrow.rs  |   6 +-
 .../core/src/datasource/file_format/avro.rs   |  11 ++
 .../core/src/datasource/file_format/csv.rs    |  15 +-
 .../core/src/datasource/file_format/json.rs   |  12 ++
 .../core/src/datasource/file_format/mod.rs    |  16 +-
 .../src/datasource/file_format/parquet.rs     |  11 ++
 .../proto/src/logical_plan/file_formats.rs    | 154 ++++++++++++++++--
 datafusion/proto/src/logical_plan/mod.rs      |   7 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  43 +++--
 10 files changed, 251 insertions(+), 30 deletions(-)

diff --git a/datafusion-examples/examples/custom_file_format.rs b/datafusion-examples/examples/custom_file_format.rs
index bdb702375c945..8612a1cc4430c 100644
--- a/datafusion-examples/examples/custom_file_format.rs
+++ b/datafusion-examples/examples/custom_file_format.rs
@@ -131,7 +131,7 @@ impl FileFormat for TSVFileFormat {
     }
 }
 
-#[derive(Default)]
+#[derive(Default, Debug)]
 /// Factory for creating TSV file formats
 ///
 /// This factory is a wrapper around the CSV file format factory
@@ -166,6 +166,10 @@ impl FileFormatFactory for TSVFileFactory {
     fn default(&self) -> std::sync::Arc<dyn FileFormat> {
         todo!()
     }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
 }
 
 impl GetExt for TSVFileFactory {
diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs
index 6bcbd43476821..8b6a8800119d4 100644
--- a/datafusion/core/src/datasource/file_format/arrow.rs
+++ b/datafusion/core/src/datasource/file_format/arrow.rs
@@ -66,7 +66,7 @@ const INITIAL_BUFFER_BYTES: usize = 1048576;
 /// If the buffered Arrow data exceeds this size, it is flushed to object store
 const BUFFER_FLUSH_BYTES: usize = 1024000;
 
-#[derive(Default)]
+#[derive(Default, Debug)]
 /// Factory struct used to create [ArrowFormat]
 pub struct ArrowFormatFactory;
 
@@ -89,6 +89,10 @@ impl FileFormatFactory for ArrowFormatFactory {
     fn default(&self) -> Arc<dyn FileFormat> {
         Arc::new(ArrowFormat)
     }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
 }
 
 impl GetExt for ArrowFormatFactory {
diff --git a/datafusion/core/src/datasource/file_format/avro.rs b/datafusion/core/src/datasource/file_format/avro.rs
index f4f9adcba7ed8..5190bdbe153a2 100644
--- a/datafusion/core/src/datasource/file_format/avro.rs
+++ b/datafusion/core/src/datasource/file_format/avro.rs
@@ -19,6 +19,7 @@
 
 use std::any::Any;
 use std::collections::HashMap;
+use std::fmt;
 use std::sync::Arc;
 
 use arrow::datatypes::Schema;
@@ -64,6 +65,16 @@ impl FileFormatFactory for AvroFormatFactory {
     fn default(&self) -> Arc<dyn FileFormat> {
         Arc::new(AvroFormat)
     }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
+impl fmt::Debug for AvroFormatFactory {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("AvroFormatFactory").finish()
+    }
 }
 
 impl GetExt for AvroFormatFactory {
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 958d2694aa04a..e1b6daac092db 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -58,7 +58,8 @@ use object_store::{delimited::newline_delimited_stream, ObjectMeta, ObjectStore}
 #[derive(Default)]
 /// Factory struct used to create [CsvFormatFactory]
 pub struct CsvFormatFactory {
-    options: Option<CsvOptions>,
+    /// the options for csv file read
+    pub options: Option<CsvOptions>,
 }
 
 impl CsvFormatFactory {
@@ -75,6 +76,14 @@ impl CsvFormatFactory {
     }
 }
 
+impl fmt::Debug for CsvFormatFactory {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("CsvFormatFactory")
+            .field("options", &self.options)
+            .finish()
+    }
+}
+
 impl FileFormatFactory for CsvFormatFactory {
     fn create(
         &self,
@@ -103,6 +112,10 @@ impl FileFormatFactory for CsvFormatFactory {
     fn default(&self) -> Arc<dyn FileFormat> {
         Arc::new(CsvFormat::default())
     }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
 }
 
 impl GetExt for CsvFormatFactory {
diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs
index 007b084f504dd..9de9c3d7d8712 100644
--- a/datafusion/core/src/datasource/file_format/json.rs
+++ b/datafusion/core/src/datasource/file_format/json.rs
@@ -102,6 +102,10 @@ impl FileFormatFactory for JsonFormatFactory {
     fn default(&self) -> Arc<dyn FileFormat> {
         Arc::new(JsonFormat::default())
     }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
 }
 
 impl GetExt for JsonFormatFactory {
@@ -111,6 +115,14 @@ impl GetExt for JsonFormatFactory {
     }
 }
 
+impl fmt::Debug for JsonFormatFactory {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("JsonFormatFactory")
+            .field("options", &self.options)
+            .finish()
+    }
+}
+
 /// New line delimited JSON `FileFormat` implementation.
 #[derive(Debug, Default)]
 pub struct JsonFormat {
diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
index 1aa93a106aff0..500f20af474f9 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -49,11 +49,11 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
 use async_trait::async_trait;
 use file_compression_type::FileCompressionType;
 use object_store::{ObjectMeta, ObjectStore};
-
+use std::fmt::Debug;
 /// Factory for creating [`FileFormat`] instances based on session and command level options
 ///
 /// Users can provide their own `FileFormatFactory` to support arbitrary file formats
-pub trait FileFormatFactory: Sync + Send + GetExt {
+pub trait FileFormatFactory: Sync + Send + GetExt + Debug {
     /// Initialize a [FileFormat] and configure based on session and command level options
     fn create(
         &self,
@@ -63,6 +63,10 @@ pub trait FileFormatFactory: Sync + Send + GetExt {
 
     /// Initialize a [FileFormat] with all options set to default values
     fn default(&self) -> Arc<dyn FileFormat>;
+
+    /// Returns the table source as [`Any`] so that it can be
+    /// downcast to a specific implementation.
+    fn as_any(&self) -> &dyn Any;
 }
 
 /// This trait abstracts all the file format specific implementations
@@ -138,6 +142,7 @@ pub trait FileFormat: Send + Sync + fmt::Debug {
 /// The former trait is a superset of the latter trait, which includes execution time
 /// relevant methods. [FileType] is only used in logical planning and only implements
 /// the subset of methods required during logical planning.
+#[derive(Debug)]
 pub struct DefaultFileType {
     file_format_factory: Arc<dyn FileFormatFactory>,
 }
@@ -149,6 +154,11 @@ impl DefaultFileType {
             file_format_factory,
         }
     }
+
+    /// get a reference to the inner [FileFormatFactory] struct
+    pub fn as_format_factory(&self) -> &Arc<dyn FileFormatFactory> {
+        &self.file_format_factory
+    }
 }
 
 impl FileType for DefaultFileType {
@@ -159,7 +169,7 @@ impl FileType for DefaultFileType {
 
 impl Display for DefaultFileType {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        self.file_format_factory.default().fmt(f)
+        write!(f, "{:?}", self.file_format_factory)
     }
 }
 
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index d4e77b911c9f1..3250b59fa1d13 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -140,6 +140,10 @@ impl FileFormatFactory for ParquetFormatFactory {
     fn default(&self) -> Arc<dyn FileFormat> {
         Arc::new(ParquetFormat::default())
     }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
 }
 
 impl GetExt for ParquetFormatFactory {
@@ -149,6 +153,13 @@ impl GetExt for ParquetFormatFactory {
     }
 }
 
+impl fmt::Debug for ParquetFormatFactory {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ParquetFormatFactory")
+            .field("ParquetFormatFactory", &self.options)
+            .finish()
+    }
+}
 /// The Apache Parquet `FileFormat` implementation
 #[derive(Debug, Default)]
 pub struct ParquetFormat {
diff --git a/datafusion/proto/src/logical_plan/file_formats.rs b/datafusion/proto/src/logical_plan/file_formats.rs
index 09e36a650b9fa..2c4085b888692 100644
--- a/datafusion/proto/src/logical_plan/file_formats.rs
+++ b/datafusion/proto/src/logical_plan/file_formats.rs
@@ -18,19 +18,129 @@
 use std::sync::Arc;
 
 use datafusion::{
+    config::CsvOptions,
     datasource::file_format::{
         arrow::ArrowFormatFactory, csv::CsvFormatFactory, json::JsonFormatFactory,
         parquet::ParquetFormatFactory, FileFormatFactory,
     },
     prelude::SessionContext,
 };
-use datafusion_common::{not_impl_err, TableReference};
+use datafusion_common::{
+    exec_err, not_impl_err, parsers::CompressionTypeVariant, DataFusionError,
+    TableReference,
+};
+use prost::Message;
+
+use crate::protobuf::CsvOptions as CsvOptionsProto;
 
 use super::LogicalExtensionCodec;
 
 #[derive(Debug)]
 pub struct CsvLogicalExtensionCodec;
 
+impl CsvOptionsProto {
+    fn from_factory(factory: &CsvFormatFactory) -> Self {
+        if let Some(options) = &factory.options {
+            CsvOptionsProto {
+                has_header: options.has_header.map_or(vec![], |v| vec![v as u8]),
+                delimiter: vec![options.delimiter],
+                quote: vec![options.quote],
+                escape: options.escape.map_or(vec![], |v| vec![v]),
+                double_quote: options.double_quote.map_or(vec![], |v| vec![v as u8]),
+                compression: options.compression as i32,
+                schema_infer_max_rec: options.schema_infer_max_rec as u64,
+                date_format: options.date_format.clone().unwrap_or_default(),
+                datetime_format: options.datetime_format.clone().unwrap_or_default(),
+                timestamp_format: options.timestamp_format.clone().unwrap_or_default(),
+                timestamp_tz_format: options
+                    .timestamp_tz_format
+                    .clone()
+                    .unwrap_or_default(),
+                time_format: options.time_format.clone().unwrap_or_default(),
+                null_value: options.null_value.clone().unwrap_or_default(),
+                comment: options.comment.map_or(vec![], |v| vec![v]),
+                newlines_in_values: options
+                    .newlines_in_values
+                    .map_or(vec![], |v| vec![v as u8]),
+            }
+        } else {
+            CsvOptionsProto::default()
+        }
+    }
+}
+
+impl From<&CsvOptionsProto> for CsvOptions {
+    fn from(proto: &CsvOptionsProto) -> Self {
+        CsvOptions {
+            has_header: if !proto.has_header.is_empty() {
+                Some(proto.has_header[0] != 0)
+            } else {
+                None
+            },
+            delimiter: proto.delimiter.first().copied().unwrap_or(b','),
+            quote: proto.quote.first().copied().unwrap_or(b'"'),
+            escape: if !proto.escape.is_empty() {
+                Some(proto.escape[0])
+            } else {
+                None
+            },
+            double_quote: if !proto.double_quote.is_empty() {
+                Some(proto.double_quote[0] != 0)
+            } else {
+                None
+            },
+            compression: match proto.compression {
+                0 => CompressionTypeVariant::GZIP,
+                1 => CompressionTypeVariant::BZIP2,
+                2 => CompressionTypeVariant::XZ,
+                3 => CompressionTypeVariant::ZSTD,
+                _ => CompressionTypeVariant::UNCOMPRESSED,
+            },
+            schema_infer_max_rec: proto.schema_infer_max_rec as usize,
+            date_format: if proto.date_format.is_empty() {
+                None
+            } else {
+                Some(proto.date_format.clone())
+            },
+            datetime_format: if proto.datetime_format.is_empty() {
+                None
+            } else {
+                Some(proto.datetime_format.clone())
+            },
+            timestamp_format: if proto.timestamp_format.is_empty() {
+                None
+            } else {
+                Some(proto.timestamp_format.clone())
+            },
+            timestamp_tz_format: if proto.timestamp_tz_format.is_empty() {
+                None
+            } else {
+                Some(proto.timestamp_tz_format.clone())
+            },
+            time_format: if proto.time_format.is_empty() {
+                None
+            } else {
+                Some(proto.time_format.clone())
+            },
+            null_value: if proto.null_value.is_empty() {
+                None
+            } else {
+                Some(proto.null_value.clone())
+            },
+            comment: if !proto.comment.is_empty() {
+                Some(proto.comment[0])
+            } else {
+                None
+            },
+            newlines_in_values: if proto.newlines_in_values.is_empty() {
+                None
+            } else {
+                Some(proto.newlines_in_values[0] != 0)
+            },
+        }
+    }
+}
+
 // TODO! This is a placeholder for now and needs to be implemented for real.
 impl LogicalExtensionCodec for CsvLogicalExtensionCodec {
     fn try_decode(
@@ -73,17 +183,41 @@ impl LogicalExtensionCodec for CsvLogicalExtensionCodec {
 
     fn try_decode_file_format(
         &self,
-        __buf: &[u8],
-        __ctx: &SessionContext,
+        buf: &[u8],
+        _ctx: &SessionContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
-        Ok(Arc::new(CsvFormatFactory::new()))
+        let proto = CsvOptionsProto::decode(buf).map_err(|e| {
+            DataFusionError::Execution(format!(
+                "Failed to decode CsvOptionsProto: {:?}",
+                e
+            ))
+        })?;
+        let options: CsvOptions = (&proto).into();
+        Ok(Arc::new(CsvFormatFactory {
+            options: Some(options),
+        }))
     }
 
     fn try_encode_file_format(
         &self,
-        __buf: &[u8],
-        __node: Arc<dyn FileFormatFactory>,
+        buf: &mut Vec<u8>,
+        node: Arc<dyn FileFormatFactory>,
     ) -> datafusion_common::Result<()> {
+        let options =
+            if let Some(csv_factory) = node.as_any().downcast_ref::<CsvFormatFactory>() {
+                csv_factory.options.clone().unwrap_or_default()
+            } else {
+                return exec_err!("{}", "Unsupported FileFormatFactory type".to_string());
+            };
+
+        let proto = CsvOptionsProto::from_factory(&CsvFormatFactory {
+            options: Some(options),
+        });
+
+        proto.encode(buf).map_err(|e| {
+            DataFusionError::Execution(format!("Failed to encode CsvOptions: {:?}", e))
+        })?;
+
         Ok(())
     }
 }
@@ -141,7 +275,7 @@ impl LogicalExtensionCodec for JsonLogicalExtensionCodec {
 
     fn try_encode_file_format(
         &self,
-        __buf: &[u8],
+        __buf: &mut Vec<u8>,
         __node: Arc<dyn FileFormatFactory>,
     ) -> datafusion_common::Result<()> {
         Ok(())
@@ -201,7 +335,7 @@ impl LogicalExtensionCodec for ParquetLogicalExtensionCodec {
 
     fn try_encode_file_format(
         &self,
-        __buf: &[u8],
+        __buf: &mut Vec<u8>,
         __node: Arc<dyn FileFormatFactory>,
     ) -> datafusion_common::Result<()> {
         Ok(())
@@ -261,7 +395,7 @@ impl LogicalExtensionCodec for ArrowLogicalExtensionCodec {
 
     fn try_encode_file_format(
         &self,
-        __buf: &[u8],
+        __buf: &mut Vec<u8>,
         __node: Arc<dyn FileFormatFactory>,
     ) -> datafusion_common::Result<()> {
         Ok(())
@@ -321,7 +455,7 @@ impl LogicalExtensionCodec for AvroLogicalExtensionCodec {
 
     fn try_encode_file_format(
         &self,
-        __buf: &[u8],
+        __buf: &mut Vec<u8>,
         __node: Arc<dyn FileFormatFactory>,
     ) -> datafusion_common::Result<()> {
         Ok(())
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index 2a963fb13ccf0..5427f34e8e071 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -131,7 +131,7 @@ pub trait LogicalExtensionCodec: Debug + Send + Sync {
 
     fn try_encode_file_format(
         &self,
-        _buf: &[u8],
+        _buf: &mut Vec<u8>,
         _node: Arc<dyn FileFormatFactory>,
     ) -> Result<()> {
         Ok(())
@@ -1666,10 +1666,9 @@ impl AsLogicalPlan for LogicalPlanNode {
                     input,
                     extension_codec,
                 )?;
-
-                let buf = Vec::new();
+                let mut buf = Vec::new();
                 extension_codec
-                    .try_encode_file_format(&buf, file_type_to_format(file_type)?)?;
+                    .try_encode_file_format(&mut buf, file_type_to_format(file_type)?)?;
 
                 Ok(protobuf::LogicalPlanNode {
                     logical_plan_type: Some(LogicalPlanType::CopyTo(Box::new(
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index f6557c7b2d8fc..e17515086ecd9 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -15,12 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
-use std::collections::HashMap;
-use std::fmt::{self, Debug, Formatter};
-use std::sync::Arc;
-use std::vec;
-
 use arrow::array::{
     ArrayRef, FixedSizeListArray, Int32Builder, MapArray, MapBuilder, StringBuilder,
 };
@@ -30,11 +24,16 @@ use arrow::datatypes::{
     DECIMAL256_MAX_PRECISION,
 };
 use prost::Message;
+use std::any::Any;
+use std::collections::HashMap;
+use std::fmt::{self, Debug, Formatter};
+use std::sync::Arc;
+use std::vec;
 
 use datafusion::datasource::file_format::arrow::ArrowFormatFactory;
 use datafusion::datasource::file_format::csv::CsvFormatFactory;
-use datafusion::datasource::file_format::format_as_file_type;
 use datafusion::datasource::file_format::parquet::ParquetFormatFactory;
+use datafusion::datasource::file_format::{format_as_file_type, DefaultFileType};
 use datafusion::datasource::provider::TableProviderFactory;
 use datafusion::datasource::TableProvider;
 use datafusion::execution::session_state::SessionStateBuilder;
@@ -380,7 +379,9 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
     parquet_format.global.dictionary_page_size_limit = 444;
     parquet_format.global.max_row_group_size = 555;
 
-    let file_type = format_as_file_type(Arc::new(ParquetFormatFactory::new()));
+    let file_type = format_as_file_type(Arc::new(
+        ParquetFormatFactory::new_with_options(parquet_format),
+    ));
 
     let plan = LogicalPlan::Copy(CopyTo {
         input: Arc::new(input),
@@ -395,7 +396,6 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
     let logical_round_trip =
         logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
-
     match logical_round_trip {
         LogicalPlan::Copy(copy_to) => {
             assert_eq!("test.parquet", copy_to.output_url);
@@ -458,7 +458,9 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
     csv_format.time_format = Some("HH:mm:ss".to_string());
     csv_format.null_value = Some("NIL".to_string());
 
-    let file_type = format_as_file_type(Arc::new(CsvFormatFactory::new()));
+    let file_type = format_as_file_type(Arc::new(CsvFormatFactory::new_with_options(
+        csv_format.clone(),
+    )));
 
     let plan = LogicalPlan::Copy(CopyTo {
         input: Arc::new(input),
@@ -479,6 +481,27 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
             assert_eq!("test.csv", copy_to.output_url);
             assert_eq!("csv".to_string(), copy_to.file_type.get_ext());
             assert_eq!(vec!["a", "b", "c"], copy_to.partition_by);
+
+            let file_type = copy_to
+                .file_type
+                .as_ref()
+                .as_any()
+                .downcast_ref::<DefaultFileType>()
+                .unwrap();
+
+            let format_factory = file_type.as_format_factory();
+            let csv_factory = format_factory
+                .as_ref()
+                .as_any()
+                .downcast_ref::<CsvFormatFactory>()
+                .unwrap();
+            let csv_config = csv_factory.options.as_ref().unwrap();
+            assert_eq!(csv_format.delimiter, csv_config.delimiter);
+            assert_eq!(csv_format.date_format, csv_config.date_format);
+            assert_eq!(csv_format.datetime_format, csv_config.datetime_format);
+            assert_eq!(csv_format.timestamp_format, csv_config.timestamp_format);
+            assert_eq!(csv_format.time_format, csv_config.time_format);
+            assert_eq!(csv_format.null_value, csv_config.null_value)
         }
         _ => panic!(),
     }

From fc8e7b90356b94af5f591240b8165bc4c8275a51 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Wed, 24 Jul 2024 04:52:45 +0800
Subject: [PATCH 128/357] Remove ArrayAgg Builtin in favor of UDF (#11611)

* rm def

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rewrite test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/core/tests/dataframe/mod.rs        |   4 +-
 datafusion/core/tests/sql/aggregates.rs       |   2 +-
 datafusion/expr/src/aggregate_function.rs     |  16 +-
 .../expr/src/type_coercion/aggregates.rs      |   7 +-
 datafusion/expr/src/udaf.rs                   |   2 +-
 .../functions-aggregate/src/array_agg.rs      |   7 +-
 datafusion/functions-array/src/planner.rs     |   2 +-
 .../physical-expr-common/src/aggregate/mod.rs |   5 +-
 .../physical-expr/src/aggregate/build_in.rs   |   4 +-
 datafusion/proto/proto/datafusion.proto       |   2 +-
 datafusion/proto/src/generated/pbjson.rs      |   3 -
 datafusion/proto/src/generated/prost.rs       |   7 +-
 .../proto/src/logical_plan/from_proto.rs      |   1 -
 datafusion/proto/src/logical_plan/to_proto.rs |   2 -
 .../sqllogictest/test_files/aggregate.slt     |  16 +-
 .../sqllogictest/test_files/binary_view.slt   |   2 +-
 .../sqllogictest/test_files/group_by.slt      | 138 +++++++++---------
 datafusion/sqllogictest/test_files/window.slt |  10 +-
 18 files changed, 100 insertions(+), 130 deletions(-)

diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index d68b80691917c..bc01ada1e04b8 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -1389,7 +1389,7 @@ async fn unnest_with_redundant_columns() -> Result<()> {
     let expected = vec![
         "Projection: shapes.shape_id [shape_id:UInt32]",
         "  Unnest: lists[shape_id2] structs[] [shape_id:UInt32, shape_id2:UInt32;N]",
-        "    Aggregate: groupBy=[[shapes.shape_id]], aggr=[[ARRAY_AGG(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { name: \"item\", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]",
+        "    Aggregate: groupBy=[[shapes.shape_id]], aggr=[[array_agg(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { name: \"item\", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]",
         "      TableScan: shapes projection=[shape_id] [shape_id:UInt32]",
     ];
 
@@ -1973,7 +1973,7 @@ async fn test_array_agg() -> Result<()> {
 
     let expected = [
         "+-------------------------------------+",
-        "| ARRAY_AGG(test.a)                   |",
+        "| array_agg(test.a)                   |",
         "+-------------------------------------+",
         "| [abcDEF, abc123, CBAdef, 123AbcDef] |",
         "+-------------------------------------+",
diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
index 1f4f9e77d5dc5..1f10cb244e83c 100644
--- a/datafusion/core/tests/sql/aggregates.rs
+++ b/datafusion/core/tests/sql/aggregates.rs
@@ -35,7 +35,7 @@ async fn csv_query_array_agg_distinct() -> Result<()> {
     assert_eq!(
         *actual[0].schema(),
         Schema::new(vec![Field::new_list(
-            "ARRAY_AGG(DISTINCT aggregate_test_100.c2)",
+            "array_agg(DISTINCT aggregate_test_100.c2)",
             Field::new("item", DataType::UInt32, true),
             true
         ),])
diff --git a/datafusion/expr/src/aggregate_function.rs b/datafusion/expr/src/aggregate_function.rs
index 39b3b4ed3b5a4..4037e3c5db9b9 100644
--- a/datafusion/expr/src/aggregate_function.rs
+++ b/datafusion/expr/src/aggregate_function.rs
@@ -17,13 +17,12 @@
 
 //! Aggregate function module contains all built-in aggregate functions definitions
 
-use std::sync::Arc;
 use std::{fmt, str::FromStr};
 
 use crate::utils;
 use crate::{type_coercion::aggregates::*, Signature, Volatility};
 
-use arrow::datatypes::{DataType, Field};
+use arrow::datatypes::DataType;
 use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result};
 
 use strum_macros::EnumIter;
@@ -37,8 +36,6 @@ pub enum AggregateFunction {
     Min,
     /// Maximum
     Max,
-    /// Aggregation into an array
-    ArrayAgg,
 }
 
 impl AggregateFunction {
@@ -47,7 +44,6 @@ impl AggregateFunction {
         match self {
             Min => "MIN",
             Max => "MAX",
-            ArrayAgg => "ARRAY_AGG",
         }
     }
 }
@@ -65,7 +61,6 @@ impl FromStr for AggregateFunction {
             // general
             "max" => AggregateFunction::Max,
             "min" => AggregateFunction::Min,
-            "array_agg" => AggregateFunction::ArrayAgg,
             _ => {
                 return plan_err!("There is no built-in function named {name}");
             }
@@ -80,7 +75,7 @@ impl AggregateFunction {
     pub fn return_type(
         &self,
         input_expr_types: &[DataType],
-        input_expr_nullable: &[bool],
+        _input_expr_nullable: &[bool],
     ) -> Result<DataType> {
         // Note that this function *must* return the same type that the respective physical expression returns
         // or the execution panics.
@@ -105,11 +100,6 @@ impl AggregateFunction {
                 // The coerced_data_types is same with input_types.
                 Ok(coerced_data_types[0].clone())
             }
-            AggregateFunction::ArrayAgg => Ok(DataType::List(Arc::new(Field::new(
-                "item",
-                coerced_data_types[0].clone(),
-                input_expr_nullable[0],
-            )))),
         }
     }
 
@@ -118,7 +108,6 @@ impl AggregateFunction {
     pub fn nullable(&self) -> Result<bool> {
         match self {
             AggregateFunction::Max | AggregateFunction::Min => Ok(true),
-            AggregateFunction::ArrayAgg => Ok(true),
         }
     }
 }
@@ -128,7 +117,6 @@ impl AggregateFunction {
     pub fn signature(&self) -> Signature {
         // note: the physical expression must accept the type returned by this function or the execution panics.
         match self {
-            AggregateFunction::ArrayAgg => Signature::any(1, Volatility::Immutable),
             AggregateFunction::Min | AggregateFunction::Max => {
                 let valid = STRINGS
                     .iter()
diff --git a/datafusion/expr/src/type_coercion/aggregates.rs b/datafusion/expr/src/type_coercion/aggregates.rs
index fbec6e2f8024d..a024401e18d5f 100644
--- a/datafusion/expr/src/type_coercion/aggregates.rs
+++ b/datafusion/expr/src/type_coercion/aggregates.rs
@@ -95,7 +95,6 @@ pub fn coerce_types(
     check_arg_count(agg_fun.name(), input_types, &signature.type_signature)?;
 
     match agg_fun {
-        AggregateFunction::ArrayAgg => Ok(input_types.to_vec()),
         AggregateFunction::Min | AggregateFunction::Max => {
             // min and max support the dictionary data type
             // unpack the dictionary to get the value
@@ -360,11 +359,7 @@ mod tests {
 
         // test count, array_agg, approx_distinct, min, max.
         // the coerced types is same with input types
-        let funs = vec![
-            AggregateFunction::ArrayAgg,
-            AggregateFunction::Min,
-            AggregateFunction::Max,
-        ];
+        let funs = vec![AggregateFunction::Min, AggregateFunction::Max];
         let input_types = vec![
             vec![DataType::Int32],
             vec![DataType::Decimal128(10, 2)],
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index 1657e034fbe2b..2851ca811e0c0 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -542,7 +542,7 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
 pub enum ReversedUDAF {
     /// The expression is the same as the original expression, like SUM, COUNT
     Identical,
-    /// The expression does not support reverse calculation, like ArrayAgg
+    /// The expression does not support reverse calculation
     NotSupported,
     /// The expression is different from the original expression
     Reversed(Arc<AggregateUDF>),
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index 777a242aa27e3..96b39ae4121eb 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -50,14 +50,12 @@ make_udaf_expr_and_func!(
 /// ARRAY_AGG aggregate expression
 pub struct ArrayAgg {
     signature: Signature,
-    alias: Vec<String>,
 }
 
 impl Default for ArrayAgg {
     fn default() -> Self {
         Self {
             signature: Signature::any(1, Volatility::Immutable),
-            alias: vec!["array_agg".to_string()],
         }
     }
 }
@@ -67,13 +65,12 @@ impl AggregateUDFImpl for ArrayAgg {
         self
     }
 
-    // TODO: change name to lowercase
     fn name(&self) -> &str {
-        "ARRAY_AGG"
+        "array_agg"
     }
 
     fn aliases(&self) -> &[String] {
-        &self.alias
+        &[]
     }
 
     fn signature(&self) -> &Signature {
diff --git a/datafusion/functions-array/src/planner.rs b/datafusion/functions-array/src/planner.rs
index c63c2c83e66e8..3f779c9f111ed 100644
--- a/datafusion/functions-array/src/planner.rs
+++ b/datafusion/functions-array/src/planner.rs
@@ -172,7 +172,7 @@ impl ExprPlanner for FieldAccessPlanner {
 
 fn is_array_agg(agg_func: &datafusion_expr::expr::AggregateFunction) -> bool {
     if let AggregateFunctionDefinition::UDF(udf) = &agg_func.func_def {
-        return udf.name() == "ARRAY_AGG";
+        return udf.name() == "array_agg";
     }
 
     false
diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
index 05c7e1caed0e8..8c5f9f9e5a7e1 100644
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -573,8 +573,9 @@ impl AggregateExpr for AggregateFunctionExpr {
                     })
                     .collect::<Vec<_>>();
                 let mut name = self.name().to_string();
-                // TODO: Generalize order-by clause rewrite
-                if reverse_udf.name() == "ARRAY_AGG" {
+                // If the function is changed, we need to reverse order_by clause as well
+                // i.e. First(a order by b asc null first) -> Last(a order by b desc null last)
+                if self.fun().name() == reverse_udf.name() {
                 } else {
                     replace_order_by_clause(&mut name);
                 }
diff --git a/datafusion/physical-expr/src/aggregate/build_in.rs b/datafusion/physical-expr/src/aggregate/build_in.rs
index 27c1533d05525..bdc41ff0a9bc2 100644
--- a/datafusion/physical-expr/src/aggregate/build_in.rs
+++ b/datafusion/physical-expr/src/aggregate/build_in.rs
@@ -30,7 +30,7 @@ use std::sync::Arc;
 
 use arrow::datatypes::Schema;
 
-use datafusion_common::{internal_err, Result};
+use datafusion_common::Result;
 use datafusion_expr::AggregateFunction;
 
 use crate::expressions::{self};
@@ -56,7 +56,6 @@ pub fn create_aggregate_expr(
     let data_type = input_phy_types[0].clone();
     let input_phy_exprs = input_phy_exprs.to_vec();
     Ok(match (fun, distinct) {
-        (AggregateFunction::ArrayAgg, _) => return internal_err!("not reachable"),
         (AggregateFunction::Min, _) => Arc::new(expressions::Min::new(
             Arc::clone(&input_phy_exprs[0]),
             name,
@@ -123,7 +122,6 @@ mod tests {
                             result_agg_phy_exprs.field().unwrap()
                         );
                     }
-                    _ => {}
                 };
             }
         }
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 49d9f2dde67f0..e133abd46f43d 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -472,7 +472,7 @@ enum AggregateFunction {
   // AVG = 3;
   // COUNT = 4;
   // APPROX_DISTINCT = 5;
-  ARRAY_AGG = 6;
+  // ARRAY_AGG = 6;
   // VARIANCE = 7;
   // VARIANCE_POP = 8;
   // COVARIANCE = 9;
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 25f6646d2a9af..c5ec67d72875f 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -534,7 +534,6 @@ impl serde::Serialize for AggregateFunction {
         let variant = match self {
             Self::Min => "MIN",
             Self::Max => "MAX",
-            Self::ArrayAgg => "ARRAY_AGG",
         };
         serializer.serialize_str(variant)
     }
@@ -548,7 +547,6 @@ impl<'de> serde::Deserialize<'de> for AggregateFunction {
         const FIELDS: &[&str] = &[
             "MIN",
             "MAX",
-            "ARRAY_AGG",
         ];
 
         struct GeneratedVisitor;
@@ -591,7 +589,6 @@ impl<'de> serde::Deserialize<'de> for AggregateFunction {
                 match value {
                     "MIN" => Ok(AggregateFunction::Min),
                     "MAX" => Ok(AggregateFunction::Max),
-                    "ARRAY_AGG" => Ok(AggregateFunction::ArrayAgg),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index ba288fe3d1b87..98b70dc253511 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -1941,12 +1941,11 @@ pub struct PartitionStats {
 #[repr(i32)]
 pub enum AggregateFunction {
     Min = 0,
-    Max = 1,
     /// SUM = 2;
     /// AVG = 3;
     /// COUNT = 4;
     /// APPROX_DISTINCT = 5;
-    ///
+    /// ARRAY_AGG = 6;
     /// VARIANCE = 7;
     /// VARIANCE_POP = 8;
     /// COVARIANCE = 9;
@@ -1975,7 +1974,7 @@ pub enum AggregateFunction {
     /// REGR_SXY = 34;
     /// STRING_AGG = 35;
     /// NTH_VALUE_AGG = 36;
-    ArrayAgg = 6,
+    Max = 1,
 }
 impl AggregateFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -1986,7 +1985,6 @@ impl AggregateFunction {
         match self {
             AggregateFunction::Min => "MIN",
             AggregateFunction::Max => "MAX",
-            AggregateFunction::ArrayAgg => "ARRAY_AGG",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -1994,7 +1992,6 @@ impl AggregateFunction {
         match value {
             "MIN" => Some(Self::Min),
             "MAX" => Some(Self::Max),
-            "ARRAY_AGG" => Some(Self::ArrayAgg),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index b6b556a8ed6b2..aea8e454a31c4 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -144,7 +144,6 @@ impl From<protobuf::AggregateFunction> for AggregateFunction {
         match agg_fun {
             protobuf::AggregateFunction::Min => Self::Min,
             protobuf::AggregateFunction::Max => Self::Max,
-            protobuf::AggregateFunction::ArrayAgg => Self::ArrayAgg,
         }
     }
 }
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 9607b918eb895..c2441892e8a89 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -116,7 +116,6 @@ impl From<&AggregateFunction> for protobuf::AggregateFunction {
         match value {
             AggregateFunction::Min => Self::Min,
             AggregateFunction::Max => Self::Max,
-            AggregateFunction::ArrayAgg => Self::ArrayAgg,
         }
     }
 }
@@ -386,7 +385,6 @@ pub fn serialize_expr(
         }) => match func_def {
             AggregateFunctionDefinition::BuiltIn(fun) => {
                 let aggr_function = match fun {
-                    AggregateFunction::ArrayAgg => protobuf::AggregateFunction::ArrayAgg,
                     AggregateFunction::Min => protobuf::AggregateFunction::Min,
                     AggregateFunction::Max => protobuf::AggregateFunction::Max,
                 };
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index bb5ce1150a58b..fa228d499d1fc 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -129,12 +129,12 @@ query TT
 explain select array_agg(c1 order by c2 desc, c3) from agg_order;
 ----
 logical_plan
-01)Aggregate: groupBy=[[]], aggr=[[ARRAY_AGG(agg_order.c1) ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]]
+01)Aggregate: groupBy=[[]], aggr=[[array_agg(agg_order.c1) ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]]
 02)--TableScan: agg_order projection=[c1, c2, c3]
 physical_plan
-01)AggregateExec: mode=Final, gby=[], aggr=[ARRAY_AGG(agg_order.c1) ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]
+01)AggregateExec: mode=Final, gby=[], aggr=[array_agg(agg_order.c1) ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]
 02)--CoalescePartitionsExec
-03)----AggregateExec: mode=Partial, gby=[], aggr=[ARRAY_AGG(agg_order.c1) ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[array_agg(agg_order.c1) ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]
 04)------SortExec: expr=[c2@1 DESC,c3@2 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_agg_multi_order.csv]]}, projection=[c1, c2, c3], has_header=true
@@ -231,8 +231,8 @@ explain with A as (
 ) select array_length(array_agg(distinct a.foo)), sum(distinct 1) from A a group by a.id;
 ----
 logical_plan
-01)Projection: array_length(ARRAY_AGG(DISTINCT a.foo)), sum(DISTINCT Int64(1))
-02)--Aggregate: groupBy=[[a.id]], aggr=[[ARRAY_AGG(DISTINCT a.foo), sum(DISTINCT Int64(1))]]
+01)Projection: array_length(array_agg(DISTINCT a.foo)), sum(DISTINCT Int64(1))
+02)--Aggregate: groupBy=[[a.id]], aggr=[[array_agg(DISTINCT a.foo), sum(DISTINCT Int64(1))]]
 03)----SubqueryAlias: a
 04)------SubqueryAlias: a
 05)--------Union
@@ -247,11 +247,11 @@ logical_plan
 14)----------Projection: Int64(1) AS id, Int64(2) AS foo
 15)------------EmptyRelation
 physical_plan
-01)ProjectionExec: expr=[array_length(ARRAY_AGG(DISTINCT a.foo)@1) as array_length(ARRAY_AGG(DISTINCT a.foo)), sum(DISTINCT Int64(1))@2 as sum(DISTINCT Int64(1))]
-02)--AggregateExec: mode=FinalPartitioned, gby=[id@0 as id], aggr=[ARRAY_AGG(DISTINCT a.foo), sum(DISTINCT Int64(1))]
+01)ProjectionExec: expr=[array_length(array_agg(DISTINCT a.foo)@1) as array_length(array_agg(DISTINCT a.foo)), sum(DISTINCT Int64(1))@2 as sum(DISTINCT Int64(1))]
+02)--AggregateExec: mode=FinalPartitioned, gby=[id@0 as id], aggr=[array_agg(DISTINCT a.foo), sum(DISTINCT Int64(1))]
 03)----CoalesceBatchesExec: target_batch_size=8192
 04)------RepartitionExec: partitioning=Hash([id@0], 4), input_partitions=5
-05)--------AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[ARRAY_AGG(DISTINCT a.foo), sum(DISTINCT Int64(1))], ordering_mode=Sorted
+05)--------AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[array_agg(DISTINCT a.foo), sum(DISTINCT Int64(1))], ordering_mode=Sorted
 06)----------UnionExec
 07)------------ProjectionExec: expr=[1 as id, 2 as foo]
 08)--------------PlaceholderRowExec
diff --git a/datafusion/sqllogictest/test_files/binary_view.slt b/datafusion/sqllogictest/test_files/binary_view.slt
index de0f0bea7ffb5..77ec77c5eccee 100644
--- a/datafusion/sqllogictest/test_files/binary_view.slt
+++ b/datafusion/sqllogictest/test_files/binary_view.slt
@@ -199,4 +199,4 @@ Raphael R false false true true
 NULL R NULL NULL NULL NULL
 
 statement ok
-drop table test;
\ No newline at end of file
+drop table test;
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index b2be65a609e37..a3cc10e1eeb8e 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2289,10 +2289,10 @@ FROM annotated_data_infinite2
 GROUP BY a, b;
 ----
 logical_plan
-01)Aggregate: groupBy=[[annotated_data_infinite2.a, annotated_data_infinite2.b]], aggr=[[ARRAY_AGG(annotated_data_infinite2.d) ORDER BY [annotated_data_infinite2.d ASC NULLS LAST]]]
+01)Aggregate: groupBy=[[annotated_data_infinite2.a, annotated_data_infinite2.b]], aggr=[[array_agg(annotated_data_infinite2.d) ORDER BY [annotated_data_infinite2.d ASC NULLS LAST]]]
 02)--TableScan: annotated_data_infinite2 projection=[a, b, d]
 physical_plan
-01)AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[ARRAY_AGG(annotated_data_infinite2.d) ORDER BY [annotated_data_infinite2.d ASC NULLS LAST]], ordering_mode=Sorted
+01)AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[array_agg(annotated_data_infinite2.d) ORDER BY [annotated_data_infinite2.d ASC NULLS LAST]], ordering_mode=Sorted
 02)--PartialSortExec: expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST,d@2 ASC NULLS LAST], common_prefix_length=[2]
 03)----StreamingTableExec: partition_sizes=1, projection=[a, b, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST]
 
@@ -2459,12 +2459,12 @@ EXPLAIN SELECT country, (ARRAY_AGG(amount ORDER BY amount ASC)) AS amounts
   GROUP BY country
 ----
 logical_plan
-01)Projection: sales_global.country, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS amounts
-02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]]
+01)Projection: sales_global.country, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS amounts
+02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]]
 03)----TableScan: sales_global projection=[country, amount]
 physical_plan
-01)ProjectionExec: expr=[country@0 as country, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@1 as amounts]
-02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
+01)ProjectionExec: expr=[country@0 as country, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@1 as amounts]
+02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
 03)----SortExec: expr=[amount@1 ASC NULLS LAST], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -2488,13 +2488,13 @@ EXPLAIN SELECT s.country, ARRAY_AGG(s.amount ORDER BY s.amount DESC) AS amounts,
         GROUP BY s.country
 ----
 logical_plan
-01)Projection: s.country, ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
-02)--Aggregate: groupBy=[[s.country]], aggr=[[ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
+01)Projection: s.country, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
+02)--Aggregate: groupBy=[[s.country]], aggr=[[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
 03)----SubqueryAlias: s
 04)------TableScan: sales_global projection=[country, amount]
 physical_plan
-01)ProjectionExec: expr=[country@0 as country, ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
-02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)]
+01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
+02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)]
 03)----SortExec: expr=[amount@1 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -2531,14 +2531,14 @@ EXPLAIN SELECT s.country, ARRAY_AGG(s.amount ORDER BY s.amount DESC) AS amounts,
           GROUP BY s.country
 ----
 logical_plan
-01)Projection: s.country, ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
-02)--Aggregate: groupBy=[[s.country]], aggr=[[ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
+01)Projection: s.country, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
+02)--Aggregate: groupBy=[[s.country]], aggr=[[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
 03)----SubqueryAlias: s
 04)------Sort: sales_global.country ASC NULLS LAST
 05)--------TableScan: sales_global projection=[country, amount]
 physical_plan
-01)ProjectionExec: expr=[country@0 as country, ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
-02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
+01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
+02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
 03)----SortExec: expr=[country@0 ASC NULLS LAST,amount@1 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -2567,14 +2567,14 @@ EXPLAIN SELECT s.country, s.zip_code, ARRAY_AGG(s.amount ORDER BY s.amount DESC)
           GROUP BY s.country, s.zip_code
 ----
 logical_plan
-01)Projection: s.country, s.zip_code, ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
-02)--Aggregate: groupBy=[[s.country, s.zip_code]], aggr=[[ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
+01)Projection: s.country, s.zip_code, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
+02)--Aggregate: groupBy=[[s.country, s.zip_code]], aggr=[[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
 03)----SubqueryAlias: s
 04)------Sort: sales_global.country ASC NULLS LAST
 05)--------TableScan: sales_global projection=[zip_code, country, amount]
 physical_plan
-01)ProjectionExec: expr=[country@0 as country, zip_code@1 as zip_code, ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@2 as amounts, sum(s.amount)@3 as sum1]
-02)--AggregateExec: mode=Single, gby=[country@1 as country, zip_code@0 as zip_code], aggr=[ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=PartiallySorted([0])
+01)ProjectionExec: expr=[country@0 as country, zip_code@1 as zip_code, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@2 as amounts, sum(s.amount)@3 as sum1]
+02)--AggregateExec: mode=Single, gby=[country@1 as country, zip_code@0 as zip_code], aggr=[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=PartiallySorted([0])
 03)----SortExec: expr=[country@1 ASC NULLS LAST,amount@2 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -2603,14 +2603,14 @@ EXPLAIN SELECT s.country, ARRAY_AGG(s.amount ORDER BY s.country DESC) AS amounts
           GROUP BY s.country
 ----
 logical_plan
-01)Projection: s.country, ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
-02)--Aggregate: groupBy=[[s.country]], aggr=[[ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
+01)Projection: s.country, array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
+02)--Aggregate: groupBy=[[s.country]], aggr=[[array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
 03)----SubqueryAlias: s
 04)------Sort: sales_global.country ASC NULLS LAST
 05)--------TableScan: sales_global projection=[country, amount]
 physical_plan
-01)ProjectionExec: expr=[country@0 as country, ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
-02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
+01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
+02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
 03)----SortExec: expr=[country@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -2638,14 +2638,14 @@ EXPLAIN SELECT s.country, ARRAY_AGG(s.amount ORDER BY s.country DESC, s.amount D
           GROUP BY s.country
 ----
 logical_plan
-01)Projection: s.country, ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
-02)--Aggregate: groupBy=[[s.country]], aggr=[[ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
+01)Projection: s.country, array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
+02)--Aggregate: groupBy=[[s.country]], aggr=[[array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
 03)----SubqueryAlias: s
 04)------Sort: sales_global.country ASC NULLS LAST
 05)--------TableScan: sales_global projection=[country, amount]
 physical_plan
-01)ProjectionExec: expr=[country@0 as country, ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
-02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
+01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
+02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
 03)----SortExec: expr=[country@0 ASC NULLS LAST,amount@1 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -2672,12 +2672,12 @@ EXPLAIN SELECT country, ARRAY_AGG(amount ORDER BY amount DESC) AS amounts,
   GROUP BY country
 ----
 logical_plan
-01)Projection: sales_global.country, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS fv2
-02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]]
+01)Projection: sales_global.country, array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS fv2
+02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]]
 03)----TableScan: sales_global projection=[country, amount]
 physical_plan
-01)ProjectionExec: expr=[country@0 as country, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@1 as amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@2 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@3 as fv2]
-02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]
+01)ProjectionExec: expr=[country@0 as country, array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@1 as amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@2 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@3 as fv2]
+02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]
 03)----SortExec: expr=[amount@1 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -2703,12 +2703,12 @@ EXPLAIN SELECT country, ARRAY_AGG(amount ORDER BY amount ASC) AS amounts,
   GROUP BY country
 ----
 logical_plan
-01)Projection: sales_global.country, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS fv2
-02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]]
+01)Projection: sales_global.country, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS fv2
+02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]]
 03)----TableScan: sales_global projection=[country, amount]
 physical_plan
-01)ProjectionExec: expr=[country@0 as country, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@1 as amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@2 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@3 as fv2]
-02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
+01)ProjectionExec: expr=[country@0 as country, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@1 as amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@2 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@3 as fv2]
+02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
 03)----SortExec: expr=[amount@1 ASC NULLS LAST], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -2735,12 +2735,12 @@ EXPLAIN SELECT country, FIRST_VALUE(amount ORDER BY amount ASC) AS fv1,
   GROUP BY country
 ----
 logical_plan
-01)Projection: sales_global.country, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS fv2, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS amounts
-02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]]
+01)Projection: sales_global.country, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS fv2, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS amounts
+02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]]
 03)----TableScan: sales_global projection=[country, amount]
 physical_plan
-01)ProjectionExec: expr=[country@0 as country, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@1 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@2 as fv2, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@3 as amounts]
-02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
+01)ProjectionExec: expr=[country@0 as country, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@1 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@2 as fv2, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@3 as amounts]
+02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
 03)----SortExec: expr=[amount@1 ASC NULLS LAST], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -2765,12 +2765,12 @@ EXPLAIN SELECT country, SUM(amount ORDER BY ts DESC) AS sum1,
   GROUP BY country
 ----
 logical_plan
-01)Projection: sales_global.country, sum(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST] AS sum1, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS amounts
-02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[sum(CAST(sales_global.amount AS Float64)) ORDER BY [sales_global.ts DESC NULLS FIRST], ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]]
+01)Projection: sales_global.country, sum(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST] AS sum1, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS amounts
+02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[sum(CAST(sales_global.amount AS Float64)) ORDER BY [sales_global.ts DESC NULLS FIRST], array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]]
 03)----TableScan: sales_global projection=[country, ts, amount]
 physical_plan
-01)ProjectionExec: expr=[country@0 as country, sum(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]@1 as sum1, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@2 as amounts]
-02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[sum(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST], ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
+01)ProjectionExec: expr=[country@0 as country, sum(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]@1 as sum1, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@2 as amounts]
+02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[sum(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST], array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
 03)----SortExec: expr=[amount@2 ASC NULLS LAST], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -3036,14 +3036,14 @@ EXPLAIN SELECT ARRAY_AGG(amount ORDER BY ts ASC) AS array_agg1
   FROM sales_global
 ----
 logical_plan
-01)Projection: ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST] AS array_agg1
-02)--Aggregate: groupBy=[[]], aggr=[[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]]
+01)Projection: array_agg(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST] AS array_agg1
+02)--Aggregate: groupBy=[[]], aggr=[[array_agg(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]]
 03)----TableScan: sales_global projection=[ts, amount]
 physical_plan
-01)ProjectionExec: expr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]@0 as array_agg1]
-02)--AggregateExec: mode=Final, gby=[], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]
+01)ProjectionExec: expr=[array_agg(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]@0 as array_agg1]
+02)--AggregateExec: mode=Final, gby=[], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]
 03)----CoalescePartitionsExec
-04)------AggregateExec: mode=Partial, gby=[], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]
+04)------AggregateExec: mode=Partial, gby=[], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]
 05)--------SortExec: expr=[ts@0 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
 07)------------MemoryExec: partitions=1, partition_sizes=[1]
@@ -3060,14 +3060,14 @@ EXPLAIN SELECT ARRAY_AGG(amount ORDER BY ts DESC) AS array_agg1
   FROM sales_global
 ----
 logical_plan
-01)Projection: ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST] AS array_agg1
-02)--Aggregate: groupBy=[[]], aggr=[[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]]
+01)Projection: array_agg(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST] AS array_agg1
+02)--Aggregate: groupBy=[[]], aggr=[[array_agg(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]]
 03)----TableScan: sales_global projection=[ts, amount]
 physical_plan
-01)ProjectionExec: expr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]@0 as array_agg1]
-02)--AggregateExec: mode=Final, gby=[], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
+01)ProjectionExec: expr=[array_agg(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]@0 as array_agg1]
+02)--AggregateExec: mode=Final, gby=[], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
 03)----CoalescePartitionsExec
-04)------AggregateExec: mode=Partial, gby=[], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
+04)------AggregateExec: mode=Partial, gby=[], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
 05)--------SortExec: expr=[ts@0 DESC], preserve_partitioning=[true]
 06)----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
 07)------------MemoryExec: partitions=1, partition_sizes=[1]
@@ -3084,14 +3084,14 @@ EXPLAIN SELECT ARRAY_AGG(amount ORDER BY amount ASC) AS array_agg1
   FROM sales_global
 ----
 logical_plan
-01)Projection: ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS array_agg1
-02)--Aggregate: groupBy=[[]], aggr=[[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]]
+01)Projection: array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS array_agg1
+02)--Aggregate: groupBy=[[]], aggr=[[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]]
 03)----TableScan: sales_global projection=[amount]
 physical_plan
-01)ProjectionExec: expr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@0 as array_agg1]
-02)--AggregateExec: mode=Final, gby=[], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
+01)ProjectionExec: expr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@0 as array_agg1]
+02)--AggregateExec: mode=Final, gby=[], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
 03)----CoalescePartitionsExec
-04)------AggregateExec: mode=Partial, gby=[], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
+04)------AggregateExec: mode=Partial, gby=[], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
 05)--------SortExec: expr=[amount@0 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
 07)------------MemoryExec: partitions=1, partition_sizes=[1]
@@ -3111,17 +3111,17 @@ EXPLAIN SELECT country, ARRAY_AGG(amount ORDER BY amount ASC) AS array_agg1
 ----
 logical_plan
 01)Sort: sales_global.country ASC NULLS LAST
-02)--Projection: sales_global.country, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS array_agg1
-03)----Aggregate: groupBy=[[sales_global.country]], aggr=[[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]]
+02)--Projection: sales_global.country, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS array_agg1
+03)----Aggregate: groupBy=[[sales_global.country]], aggr=[[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]]
 04)------TableScan: sales_global projection=[country, amount]
 physical_plan
 01)SortPreservingMergeExec: [country@0 ASC NULLS LAST]
 02)--SortExec: expr=[country@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----ProjectionExec: expr=[country@0 as country, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@1 as array_agg1]
-04)------AggregateExec: mode=FinalPartitioned, gby=[country@0 as country], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
+03)----ProjectionExec: expr=[country@0 as country, array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@1 as array_agg1]
+04)------AggregateExec: mode=FinalPartitioned, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
 05)--------CoalesceBatchesExec: target_batch_size=4
 06)----------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
+07)------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]]
 08)--------------SortExec: expr=[amount@1 ASC NULLS LAST], preserve_partitioning=[true]
 09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
 10)------------------MemoryExec: partitions=1, partition_sizes=[1]
@@ -3147,17 +3147,17 @@ EXPLAIN SELECT country, ARRAY_AGG(amount ORDER BY amount DESC) AS amounts,
 ----
 logical_plan
 01)Sort: sales_global.country ASC NULLS LAST
-02)--Projection: sales_global.country, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS fv2
-03)----Aggregate: groupBy=[[sales_global.country]], aggr=[[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]]
+02)--Projection: sales_global.country, array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST] AS fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST] AS fv2
+03)----Aggregate: groupBy=[[sales_global.country]], aggr=[[array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]]
 04)------TableScan: sales_global projection=[country, amount]
 physical_plan
 01)SortPreservingMergeExec: [country@0 ASC NULLS LAST]
 02)--SortExec: expr=[country@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----ProjectionExec: expr=[country@0 as country, ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@1 as amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@2 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@3 as fv2]
-04)------AggregateExec: mode=FinalPartitioned, gby=[country@0 as country], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]
+03)----ProjectionExec: expr=[country@0 as country, array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@1 as amounts, first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST]@2 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]@3 as fv2]
+04)------AggregateExec: mode=FinalPartitioned, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], first_value(sales_global.amount) ORDER BY [sales_global.amount ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]
 05)--------CoalesceBatchesExec: target_batch_size=4
 06)----------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[ARRAY_AGG(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]
+07)------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[array_agg(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.amount DESC NULLS FIRST]]
 08)--------------SortExec: expr=[amount@1 DESC], preserve_partitioning=[true]
 09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
 10)------------------MemoryExec: partitions=1, partition_sizes=[1]
@@ -4971,10 +4971,10 @@ ORDER BY a, b;
 ----
 logical_plan
 01)Sort: multiple_ordered_table.a ASC NULLS LAST, multiple_ordered_table.b ASC NULLS LAST
-02)--Aggregate: groupBy=[[multiple_ordered_table.a, multiple_ordered_table.b]], aggr=[[ARRAY_AGG(multiple_ordered_table.c) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST]]]
+02)--Aggregate: groupBy=[[multiple_ordered_table.a, multiple_ordered_table.b]], aggr=[[array_agg(multiple_ordered_table.c) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST]]]
 03)----TableScan: multiple_ordered_table projection=[a, b, c]
 physical_plan
-01)AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[ARRAY_AGG(multiple_ordered_table.c) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST]], ordering_mode=Sorted
+01)AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[array_agg(multiple_ordered_table.c) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST]], ordering_mode=Sorted
 02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_orderings=[[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], has_header=true
 
 query II?
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 37214e11eae80..e9d417c93a575 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -2040,16 +2040,16 @@ query TT
 EXPLAIN SELECT ARRAY_AGG(c13) as array_agg1 FROM (SELECT * FROM aggregate_test_100 ORDER BY c13 LIMIT 1)
 ----
 logical_plan
-01)Projection: ARRAY_AGG(aggregate_test_100.c13) AS array_agg1
-02)--Aggregate: groupBy=[[]], aggr=[[ARRAY_AGG(aggregate_test_100.c13)]]
+01)Projection: array_agg(aggregate_test_100.c13) AS array_agg1
+02)--Aggregate: groupBy=[[]], aggr=[[array_agg(aggregate_test_100.c13)]]
 03)----Limit: skip=0, fetch=1
 04)------Sort: aggregate_test_100.c13 ASC NULLS LAST, fetch=1
 05)--------TableScan: aggregate_test_100 projection=[c13]
 physical_plan
-01)ProjectionExec: expr=[ARRAY_AGG(aggregate_test_100.c13)@0 as array_agg1]
-02)--AggregateExec: mode=Final, gby=[], aggr=[ARRAY_AGG(aggregate_test_100.c13)]
+01)ProjectionExec: expr=[array_agg(aggregate_test_100.c13)@0 as array_agg1]
+02)--AggregateExec: mode=Final, gby=[], aggr=[array_agg(aggregate_test_100.c13)]
 03)----CoalescePartitionsExec
-04)------AggregateExec: mode=Partial, gby=[], aggr=[ARRAY_AGG(aggregate_test_100.c13)]
+04)------AggregateExec: mode=Partial, gby=[], aggr=[array_agg(aggregate_test_100.c13)]
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 06)----------GlobalLimitExec: skip=0, fetch=1
 07)------------SortExec: TopK(fetch=1), expr=[c13@0 ASC NULLS LAST], preserve_partitioning=[false]

From 5c37d008dc075aac431b13ebf20e409cfe94079b Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Wed, 24 Jul 2024 05:55:47 +0800
Subject: [PATCH 129/357] refactor: simplify
 `DFSchema::field_with_unqualified_name` (#11619)

---
 datafusion/common/src/dfschema.rs | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index eef0a03df700b..f0eecd2ffeb19 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -521,34 +521,8 @@ impl DFSchema {
 
     /// Find the field with the given name
     pub fn field_with_unqualified_name(&self, name: &str) -> Result<&Field> {
-        let matches = self.qualified_fields_with_unqualified_name(name);
-        match matches.len() {
-            0 => Err(unqualified_field_not_found(name, self)),
-            1 => Ok(matches[0].1),
-            _ => {
-                // When `matches` size > 1, it doesn't necessarily mean an `ambiguous name` problem.
-                // Because name may generate from Alias/... . It means that it don't own qualifier.
-                // For example:
-                //             Join on id = b.id
-                // Project a.id as id   TableScan b id
-                // In this case, there isn't `ambiguous name` problem. When `matches` just contains
-                // one field without qualifier, we should return it.
-                let fields_without_qualifier = matches
-                    .iter()
-                    .filter(|(q, _)| q.is_none())
-                    .collect::<Vec<_>>();
-                if fields_without_qualifier.len() == 1 {
-                    Ok(fields_without_qualifier[0].1)
-                } else {
-                    _schema_err!(SchemaError::AmbiguousReference {
-                        field: Column {
-                            relation: None,
-                            name: name.to_string(),
-                        },
-                    })
-                }
-            }
-        }
+        self.qualified_field_with_unqualified_name(name)
+            .map(|(_, field)| field)
     }
 
     /// Find the field with the given qualified name

From c8ef54537c9e180eac10e13c1327f81e5e005394 Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Wed, 24 Jul 2024 09:47:25 +0800
Subject: [PATCH 130/357] fix: panic and incorrect results in
 `LogFunc::output_ordering()` (#11571)

* fix: panic and incorrect results in `LogFunc::output_ordering()`

* fix for nulls_first
---
 datafusion/functions/src/math/log.rs         | 123 ++++++++++++++++++-
 datafusion/sqllogictest/test_files/order.slt |  31 +++--
 2 files changed, 138 insertions(+), 16 deletions(-)

diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs
index ea424c14749e8..0e181aa612503 100644
--- a/datafusion/functions/src/math/log.rs
+++ b/datafusion/functions/src/math/log.rs
@@ -82,10 +82,16 @@ impl ScalarUDFImpl for LogFunc {
     }
 
     fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
-        match (input[0].sort_properties, input[1].sort_properties) {
-            (first @ SortProperties::Ordered(value), SortProperties::Ordered(base))
-                if !value.descending && base.descending
-                    || value.descending && !base.descending =>
+        let (base_sort_properties, num_sort_properties) = if input.len() == 1 {
+            // log(x) defaults to log(10, x)
+            (SortProperties::Singleton, input[0].sort_properties)
+        } else {
+            (input[0].sort_properties, input[1].sort_properties)
+        };
+        match (num_sort_properties, base_sort_properties) {
+            (first @ SortProperties::Ordered(num), SortProperties::Ordered(base))
+                if num.descending != base.descending
+                    && num.nulls_first == base.nulls_first =>
             {
                 Ok(first)
             }
@@ -230,6 +236,7 @@ mod tests {
 
     use super::*;
 
+    use arrow::compute::SortOptions;
     use datafusion_common::cast::{as_float32_array, as_float64_array};
     use datafusion_common::DFSchema;
     use datafusion_expr::execution_props::ExecutionProps;
@@ -334,4 +341,112 @@ mod tests {
         assert_eq!(args[0], lit(2));
         assert_eq!(args[1], lit(3));
     }
+
+    #[test]
+    fn test_log_output_ordering() {
+        // [Unordered, Ascending, Descending, Literal]
+        let orders = vec![
+            ExprProperties::new_unknown(),
+            ExprProperties::new_unknown().with_order(SortProperties::Ordered(
+                SortOptions {
+                    descending: false,
+                    nulls_first: true,
+                },
+            )),
+            ExprProperties::new_unknown().with_order(SortProperties::Ordered(
+                SortOptions {
+                    descending: true,
+                    nulls_first: true,
+                },
+            )),
+            ExprProperties::new_unknown().with_order(SortProperties::Singleton),
+        ];
+
+        let log = LogFunc::new();
+
+        // Test log(num)
+        for order in orders.iter().cloned() {
+            let result = log.output_ordering(&[order.clone()]).unwrap();
+            assert_eq!(result, order.sort_properties);
+        }
+
+        // Test log(base, num), where `nulls_first` is the same
+        let mut results = Vec::with_capacity(orders.len() * orders.len());
+        for base_order in orders.iter() {
+            for num_order in orders.iter().cloned() {
+                let result = log
+                    .output_ordering(&[base_order.clone(), num_order])
+                    .unwrap();
+                results.push(result);
+            }
+        }
+        let expected = vec![
+            // base: Unordered
+            SortProperties::Unordered,
+            SortProperties::Unordered,
+            SortProperties::Unordered,
+            SortProperties::Unordered,
+            // base: Ascending, num: Unordered
+            SortProperties::Unordered,
+            // base: Ascending, num: Ascending
+            SortProperties::Unordered,
+            // base: Ascending, num: Descending
+            SortProperties::Ordered(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            // base: Ascending, num: Literal
+            SortProperties::Ordered(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            // base: Descending, num: Unordered
+            SortProperties::Unordered,
+            // base: Descending, num: Ascending
+            SortProperties::Ordered(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            // base: Descending, num: Descending
+            SortProperties::Unordered,
+            // base: Descending, num: Literal
+            SortProperties::Ordered(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            // base: Literal, num: Unordered
+            SortProperties::Unordered,
+            // base: Literal, num: Ascending
+            SortProperties::Ordered(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            // base: Literal, num: Descending
+            SortProperties::Ordered(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            // base: Literal, num: Literal
+            SortProperties::Singleton,
+        ];
+        assert_eq!(results, expected);
+
+        // Test with different `nulls_first`
+        let base_order = ExprProperties::new_unknown().with_order(
+            SortProperties::Ordered(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+        );
+        let num_order = ExprProperties::new_unknown().with_order(
+            SortProperties::Ordered(SortOptions {
+                descending: false,
+                nulls_first: false,
+            }),
+        );
+        assert_eq!(
+            log.output_ordering(&[base_order, num_order]).unwrap(),
+            SortProperties::Unordered
+        );
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index 1aeaf9b76d48a..d0a6d6adc1079 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -326,6 +326,13 @@ select column1 + column2 from foo group by column1, column2 ORDER BY column2 des
 7
 3
 
+# Test issue: https://github.com/apache/datafusion/issues/11549
+query I
+select column1 from foo order by log(column2);
+----
+1
+3
+5
 
 # Cleanup
 statement ok
@@ -512,7 +519,7 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
 )
 STORED AS CSV
 WITH ORDER(c11)
-WITH ORDER(c12 DESC)
+WITH ORDER(c12 DESC NULLS LAST)
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
 OPTIONS ('format.has_header' 'true');
 
@@ -547,34 +554,34 @@ physical_plan
 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11], output_ordering=[c11@0 ASC NULLS LAST], has_header=true
 
 query TT
-  EXPLAIN SELECT LOG(c11, c12) as log_c11_base_c12
+  EXPLAIN SELECT LOG(c12, c11) as log_c11_base_c12
   FROM aggregate_test_100
   ORDER BY log_c11_base_c12;
 ----
 logical_plan
 01)Sort: log_c11_base_c12 ASC NULLS LAST
-02)--Projection: log(CAST(aggregate_test_100.c11 AS Float64), aggregate_test_100.c12) AS log_c11_base_c12
+02)--Projection: log(aggregate_test_100.c12, CAST(aggregate_test_100.c11 AS Float64)) AS log_c11_base_c12
 03)----TableScan: aggregate_test_100 projection=[c11, c12]
 physical_plan
 01)SortPreservingMergeExec: [log_c11_base_c12@0 ASC NULLS LAST]
-02)--ProjectionExec: expr=[log(CAST(c11@0 AS Float64), c12@1) as log_c11_base_c12]
+02)--ProjectionExec: expr=[log(c12@1, CAST(c11@0 AS Float64)) as log_c11_base_c12]
 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11, c12], output_orderings=[[c11@0 ASC NULLS LAST], [c12@1 DESC]], has_header=true
+04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11, c12], output_orderings=[[c11@0 ASC NULLS LAST], [c12@1 DESC NULLS LAST]], has_header=true
 
 query TT
-EXPLAIN SELECT LOG(c12, c11) as log_c12_base_c11
+EXPLAIN SELECT LOG(c11, c12) as log_c12_base_c11
 FROM aggregate_test_100
-ORDER BY log_c12_base_c11 DESC;
+ORDER BY log_c12_base_c11 DESC NULLS LAST;
 ----
 logical_plan
-01)Sort: log_c12_base_c11 DESC NULLS FIRST
-02)--Projection: log(aggregate_test_100.c12, CAST(aggregate_test_100.c11 AS Float64)) AS log_c12_base_c11
+01)Sort: log_c12_base_c11 DESC NULLS LAST
+02)--Projection: log(CAST(aggregate_test_100.c11 AS Float64), aggregate_test_100.c12) AS log_c12_base_c11
 03)----TableScan: aggregate_test_100 projection=[c11, c12]
 physical_plan
-01)SortPreservingMergeExec: [log_c12_base_c11@0 DESC]
-02)--ProjectionExec: expr=[log(c12@1, CAST(c11@0 AS Float64)) as log_c12_base_c11]
+01)SortPreservingMergeExec: [log_c12_base_c11@0 DESC NULLS LAST]
+02)--ProjectionExec: expr=[log(CAST(c11@0 AS Float64), c12@1) as log_c12_base_c11]
 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11, c12], output_orderings=[[c11@0 ASC NULLS LAST], [c12@1 DESC]], has_header=true
+04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11, c12], output_orderings=[[c11@0 ASC NULLS LAST], [c12@1 DESC NULLS LAST]], has_header=true
 
 statement ok
 drop table aggregate_test_100;

From 72c6491d25fe253b8757028be77e1e6f5cd74c71 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 24 Jul 2024 08:36:49 -0400
Subject: [PATCH 131/357] Minor: Use upstream concat_batches (#11615)

---
 .../physical-plan/src/coalesce_batches.rs     | 38 ++++---------------
 .../physical-plan/src/joins/cross_join.rs     |  9 ++---
 .../src/joins/nested_loop_join.rs             |  6 +--
 3 files changed, 14 insertions(+), 39 deletions(-)

diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index b9bdfcdee712c..8cb25827ff8fb 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -18,25 +18,23 @@
 //! CoalesceBatchesExec combines small batches into larger batches for more efficient use of
 //! vectorized processing by upstream operators.
 
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
 use crate::{
     DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
 };
+use arrow::compute::concat_batches;
+use std::any::Any;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
 
 use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 
 use futures::stream::{Stream, StreamExt};
-use log::trace;
 
 /// CoalesceBatchesExec combines small batches into larger batches for more efficient use of
 /// vectorized processing by upstream operators.
@@ -229,11 +227,7 @@ impl CoalesceBatchesStream {
                             // check to see if we have enough batches yet
                             if self.buffered_rows >= self.target_batch_size {
                                 // combine the batches and return
-                                let batch = concat_batches(
-                                    &self.schema,
-                                    &self.buffer,
-                                    self.buffered_rows,
-                                )?;
+                                let batch = concat_batches(&self.schema, &self.buffer)?;
                                 // reset buffer state
                                 self.buffer.clear();
                                 self.buffered_rows = 0;
@@ -250,11 +244,7 @@ impl CoalesceBatchesStream {
                             return Poll::Ready(None);
                         } else {
                             // combine the batches and return
-                            let batch = concat_batches(
-                                &self.schema,
-                                &self.buffer,
-                                self.buffered_rows,
-                            )?;
+                            let batch = concat_batches(&self.schema, &self.buffer)?;
                             // reset buffer state
                             self.buffer.clear();
                             self.buffered_rows = 0;
@@ -276,20 +266,6 @@ impl RecordBatchStream for CoalesceBatchesStream {
     }
 }
 
-/// Concatenates an array of `RecordBatch` into one batch
-pub fn concat_batches(
-    schema: &SchemaRef,
-    batches: &[RecordBatch],
-    row_count: usize,
-) -> ArrowResult<RecordBatch> {
-    trace!(
-        "Combined {} batches containing {} rows",
-        batches.len(),
-        row_count
-    );
-    arrow::compute::concat_batches(schema, batches)
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 8304ddc7331a0..b1482a9699d56 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -18,13 +18,10 @@
 //! Defines the cross join plan for loading the left side of the cross join
 //! and producing batches in parallel for the right partitions
 
-use std::{any::Any, sync::Arc, task::Poll};
-
 use super::utils::{
     adjust_right_output_partitioning, BuildProbeJoinMetrics, OnceAsync, OnceFut,
     StatefulStreamResult,
 };
-use crate::coalesce_batches::concat_batches;
 use crate::coalesce_partitions::CoalescePartitionsExec;
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::{
@@ -33,6 +30,8 @@ use crate::{
     ExecutionPlanProperties, PlanProperties, RecordBatchStream,
     SendableRecordBatchStream, Statistics,
 };
+use arrow::compute::concat_batches;
+use std::{any::Any, sync::Arc, task::Poll};
 
 use arrow::datatypes::{Fields, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
@@ -155,7 +154,7 @@ async fn load_left_input(
     let stream = merge.execute(0, context)?;
 
     // Load all batches and count the rows
-    let (batches, num_rows, _, reservation) = stream
+    let (batches, _num_rows, _, reservation) = stream
         .try_fold(
             (Vec::new(), 0usize, metrics, reservation),
             |mut acc, batch| async {
@@ -175,7 +174,7 @@ async fn load_left_input(
         )
         .await?;
 
-    let merged_batch = concat_batches(&left_schema, &batches, num_rows)?;
+    let merged_batch = concat_batches(&left_schema, &batches)?;
 
     Ok((merged_batch, reservation))
 }
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index f8ca389808500..eac135bfd0fe3 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -26,7 +26,6 @@ use std::sync::Arc;
 use std::task::Poll;
 
 use super::utils::{asymmetric_join_output_partitioning, need_produce_result_in_final};
-use crate::coalesce_batches::concat_batches;
 use crate::coalesce_partitions::CoalescePartitionsExec;
 use crate::joins::utils::{
     adjust_indices_by_join_type, apply_join_filter_to_indices, build_batch_from_indices,
@@ -44,6 +43,7 @@ use crate::{
 use arrow::array::{
     BooleanBufferBuilder, UInt32Array, UInt32Builder, UInt64Array, UInt64Builder,
 };
+use arrow::compute::concat_batches;
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow::util::bit_util;
@@ -364,7 +364,7 @@ async fn collect_left_input(
     let stream = merge.execute(0, context)?;
 
     // Load all batches and count the rows
-    let (batches, num_rows, metrics, mut reservation) = stream
+    let (batches, _num_rows, metrics, mut reservation) = stream
         .try_fold(
             (Vec::new(), 0usize, join_metrics, reservation),
             |mut acc, batch| async {
@@ -384,7 +384,7 @@ async fn collect_left_input(
         )
         .await?;
 
-    let merged_batch = concat_batches(&schema, &batches, num_rows)?;
+    let merged_batch = concat_batches(&schema, &batches)?;
 
     // Reserve memory for visited_left_side bitmap if required by join type
     let visited_left_side = if with_visited_left_side {

From 1e06b91d598782f8f732b104fe4c46468c4e3136 Mon Sep 17 00:00:00 2001
From: Jax Liu <liugs963@gmail.com>
Date: Wed, 24 Jul 2024 21:09:33 +0800
Subject: [PATCH 132/357] Rename `functions-array` to `functions-nested`
 (#11602)

* rename create to function-nested

* rename array_expressions to nested_expression

* rename doc and workflow

* cargo fmt

* update lock

* Update readme

* rename the missing parts

* rename the planner

* add backward compatibility
---
 .github/workflows/rust.yml                    |  4 ++--
 Cargo.toml                                    |  4 ++--
 README.md                                     |  2 +-
 datafusion-cli/Cargo.lock                     |  4 ++--
 datafusion/core/Cargo.toml                    | 10 ++++----
 datafusion/core/benches/map_query_sql.rs      |  2 +-
 .../src/execution/session_state_defaults.rs   | 23 ++++++++++---------
 datafusion/core/src/lib.rs                    | 15 ++++++++----
 datafusion/core/src/prelude.rs                |  4 ++--
 .../tests/dataframe/dataframe_functions.rs    |  2 +-
 datafusion/core/tests/expr_api/mod.rs         |  2 +-
 .../user_defined_scalar_functions.rs          |  2 +-
 datafusion/expr/src/expr_rewriter/mod.rs      |  2 +-
 .../Cargo.toml                                |  6 ++---
 .../README.md                                 |  4 ++--
 .../benches/array_expression.rs               |  2 +-
 .../benches/map.rs                            |  6 ++---
 .../src/array_has.rs                          |  0
 .../src/cardinality.rs                        |  0
 .../src/concat.rs                             |  0
 .../src/dimension.rs                          |  0
 .../src/empty.rs                              |  0
 .../src/except.rs                             |  0
 .../src/expr_ext.rs                           |  4 ++--
 .../src/extract.rs                            |  0
 .../src/flatten.rs                            |  0
 .../src/length.rs                             |  0
 .../src/lib.rs                                | 14 +++++------
 .../src/macros.rs                             |  0
 .../src/make_array.rs                         |  0
 .../src/map.rs                                |  0
 .../src/planner.rs                            |  6 ++---
 .../src/position.rs                           |  0
 .../src/range.rs                              |  0
 .../src/remove.rs                             |  0
 .../src/repeat.rs                             |  0
 .../src/replace.rs                            |  0
 .../src/resize.rs                             |  0
 .../src/reverse.rs                            |  0
 .../src/set_ops.rs                            |  0
 .../src/sort.rs                               |  0
 .../src/string.rs                             |  0
 .../src/utils.rs                              |  0
 .../tests/cases/roundtrip_logical_plan.rs     |  2 +-
 dev/release/README.md                         |  2 +-
 dev/release/crate-deps.dot                    | 10 ++++----
 dev/release/crate-deps.svg                    | 22 +++++++++---------
 dev/update_datafusion_versions.py             |  2 +-
 48 files changed, 83 insertions(+), 73 deletions(-)
 rename datafusion/{functions-array => functions-nested}/Cargo.toml (92%)
 rename datafusion/{functions-array => functions-nested}/README.md (87%)
 rename datafusion/{functions-array => functions-nested}/benches/array_expression.rs (95%)
 rename datafusion/{functions-array => functions-nested}/benches/map.rs (95%)
 rename datafusion/{functions-array => functions-nested}/src/array_has.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/cardinality.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/concat.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/dimension.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/empty.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/except.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/expr_ext.rs (95%)
 rename datafusion/{functions-array => functions-nested}/src/extract.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/flatten.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/length.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/lib.rs (93%)
 rename datafusion/{functions-array => functions-nested}/src/macros.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/make_array.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/map.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/planner.rs (97%)
 rename datafusion/{functions-array => functions-nested}/src/position.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/range.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/remove.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/repeat.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/replace.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/resize.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/reverse.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/set_ops.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/sort.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/string.rs (100%)
 rename datafusion/{functions-array => functions-nested}/src/utils.rs (100%)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 2ddeebbc558e5..4a41fd542e5d4 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -90,8 +90,8 @@ jobs:
 
       # Ensure that the datafusion crate can be built with only a subset of the function
       # packages enabled.
-      - name: Check datafusion (array_expressions)
-        run: cargo check --no-default-features --features=array_expressions -p datafusion
+      - name: Check datafusion (nested_expressions)
+        run: cargo check --no-default-features --features=nested_expressions -p datafusion
 
       - name: Check datafusion (crypto)
         run: cargo check --no-default-features --features=crypto_expressions -p datafusion
diff --git a/Cargo.toml b/Cargo.toml
index 24bde78b3001b..cb27a8761a8e4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,7 +25,7 @@ members = [
     "datafusion/execution",
     "datafusion/functions-aggregate",
     "datafusion/functions",
-    "datafusion/functions-array",
+    "datafusion/functions-nested",
     "datafusion/optimizer",
     "datafusion/physical-expr-common",
     "datafusion/physical-expr",
@@ -94,7 +94,7 @@ datafusion-execution = { path = "datafusion/execution", version = "40.0.0" }
 datafusion-expr = { path = "datafusion/expr", version = "40.0.0" }
 datafusion-functions = { path = "datafusion/functions", version = "40.0.0" }
 datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "40.0.0" }
-datafusion-functions-array = { path = "datafusion/functions-array", version = "40.0.0" }
+datafusion-functions-nested = { path = "datafusion/functions-nested", version = "40.0.0" }
 datafusion-optimizer = { path = "datafusion/optimizer", version = "40.0.0", default-features = false }
 datafusion-physical-expr = { path = "datafusion/physical-expr", version = "40.0.0", default-features = false }
 datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "40.0.0", default-features = false }
diff --git a/README.md b/README.md
index 197e5d2b3fe16..b1d38b61109fe 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ This crate has several [features] which can be specified in your `Cargo.toml`.
 
 Default features:
 
-- `array_expressions`: functions for working with arrays such as `array_to_string`
+- `nested_expressions`: functions for working with nested type function such as `array_to_string`
 - `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd`
 - `crypto_expressions`: cryptographic functions such as `md5` and `sha256`
 - `datetime_expressions`: date and time functions such as `to_timestamp`
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 84bff8c87190a..a4e87f99b5c37 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1149,7 +1149,7 @@ dependencies = [
  "datafusion-expr",
  "datafusion-functions",
  "datafusion-functions-aggregate",
- "datafusion-functions-array",
+ "datafusion-functions-nested",
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
@@ -1315,7 +1315,7 @@ dependencies = [
 ]
 
 [[package]]
-name = "datafusion-functions-array"
+name = "datafusion-functions-nested"
 version = "40.0.0"
 dependencies = [
  "arrow",
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 4301396b231fe..bed9265ff0163 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -40,15 +40,17 @@ name = "datafusion"
 path = "src/lib.rs"
 
 [features]
+nested_expressions = ["datafusion-functions-nested"]
+# This feature is deprecated. Use the `nested_expressions` feature instead.
+array_expressions = ["nested_expressions"]
 # Used to enable the avro format
-array_expressions = ["datafusion-functions-array"]
 avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
 backtrace = ["datafusion-common/backtrace"]
 compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"]
 crypto_expressions = ["datafusion-functions/crypto_expressions"]
 datetime_expressions = ["datafusion-functions/datetime_expressions"]
 default = [
-    "array_expressions",
+    "nested_expressions",
     "crypto_expressions",
     "datetime_expressions",
     "encoding_expressions",
@@ -102,7 +104,7 @@ datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-functions = { workspace = true }
 datafusion-functions-aggregate = { workspace = true }
-datafusion-functions-array = { workspace = true, optional = true }
+datafusion-functions-nested = { workspace = true, optional = true }
 datafusion-optimizer = { workspace = true }
 datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
@@ -221,4 +223,4 @@ name = "parquet_statistic"
 [[bench]]
 harness = false
 name = "map_query_sql"
-required-features = ["array_expressions"]
+required-features = ["nested_expressions"]
diff --git a/datafusion/core/benches/map_query_sql.rs b/datafusion/core/benches/map_query_sql.rs
index b6ac8b6b647a1..e4c5f7c5deb3b 100644
--- a/datafusion/core/benches/map_query_sql.rs
+++ b/datafusion/core/benches/map_query_sql.rs
@@ -27,7 +27,7 @@ use tokio::runtime::Runtime;
 use datafusion::prelude::SessionContext;
 use datafusion_common::ScalarValue;
 use datafusion_expr::Expr;
-use datafusion_functions_array::map::map;
+use datafusion_functions_nested::map::map;
 
 mod data_utils;
 
diff --git a/datafusion/core/src/execution/session_state_defaults.rs b/datafusion/core/src/execution/session_state_defaults.rs
index 0b0465e446054..b7e7b5f0955f1 100644
--- a/datafusion/core/src/execution/session_state_defaults.rs
+++ b/datafusion/core/src/execution/session_state_defaults.rs
@@ -26,8 +26,8 @@ use crate::datasource::file_format::parquet::ParquetFormatFactory;
 use crate::datasource::file_format::FileFormatFactory;
 use crate::datasource::provider::{DefaultTableFactory, TableProviderFactory};
 use crate::execution::context::SessionState;
-#[cfg(feature = "array_expressions")]
-use crate::functions_array;
+#[cfg(feature = "nested_expressions")]
+use crate::functions_nested;
 use crate::{functions, functions_aggregate};
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::object_store::ObjectStoreUrl;
@@ -82,11 +82,11 @@ impl SessionStateDefaults {
     pub fn default_expr_planners() -> Vec<Arc<dyn ExprPlanner>> {
         let expr_planners: Vec<Arc<dyn ExprPlanner>> = vec![
             Arc::new(functions::core::planner::CoreFunctionPlanner::default()),
-            // register crate of array expressions (if enabled)
-            #[cfg(feature = "array_expressions")]
-            Arc::new(functions_array::planner::ArrayFunctionPlanner),
-            #[cfg(feature = "array_expressions")]
-            Arc::new(functions_array::planner::FieldAccessPlanner),
+            // register crate of nested expressions (if enabled)
+            #[cfg(feature = "nested_expressions")]
+            Arc::new(functions_nested::planner::NestedFunctionPlanner),
+            #[cfg(feature = "nested_expressions")]
+            Arc::new(functions_nested::planner::FieldAccessPlanner),
             #[cfg(any(
                 feature = "datetime_expressions",
                 feature = "unicode_expressions"
@@ -100,8 +100,8 @@ impl SessionStateDefaults {
     /// returns the list of default [`ScalarUDF']'s
     pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
         let mut functions: Vec<Arc<ScalarUDF>> = functions::all_default_functions();
-        #[cfg(feature = "array_expressions")]
-        functions.append(&mut functions_array::all_default_array_functions());
+        #[cfg(feature = "nested_expressions")]
+        functions.append(&mut functions_nested::all_default_nested_functions());
 
         functions
     }
@@ -140,8 +140,9 @@ impl SessionStateDefaults {
     /// registers all the builtin array functions
     pub fn register_array_functions(state: &mut SessionState) {
         // register crate of array expressions (if enabled)
-        #[cfg(feature = "array_expressions")]
-        functions_array::register_all(state).expect("can not register array expressions");
+        #[cfg(feature = "nested_expressions")]
+        functions_nested::register_all(state)
+            .expect("can not register nested expressions");
     }
 
     /// registers all the builtin aggregate functions
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 9b9b1db8ff817..9ab6ed527d82b 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -458,7 +458,7 @@
 //! * [datafusion_execution]: State and structures needed for execution
 //! * [datafusion_expr]: [`LogicalPlan`],  [`Expr`] and related logical planning structure
 //! * [datafusion_functions]: Scalar function packages
-//! * [datafusion_functions_array]: Scalar function packages for `ARRAY`s
+//! * [datafusion_functions_nested]: Scalar function packages for `ARRAY`s, `MAP`s and `STRUCT`s
 //! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s
 //! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions
 //! * [datafusion_physical_plan]: [`ExecutionPlan`] and related expressions
@@ -569,10 +569,17 @@ pub mod functions {
     pub use datafusion_functions::*;
 }
 
-/// re-export of [`datafusion_functions_array`] crate, if "array_expressions" feature is enabled
+/// re-export of [`datafusion_functions_nested`] crate, if "nested_expressions" feature is enabled
+pub mod functions_nested {
+    #[cfg(feature = "nested_expressions")]
+    pub use datafusion_functions_nested::*;
+}
+
+/// re-export of [`datafusion_functions_nested`] crate as [`functions_array`] for backward compatibility, if "nested_expressions" feature is enabled
+#[deprecated(since = "41.0.0", note = "use datafusion-functions-nested instead")]
 pub mod functions_array {
-    #[cfg(feature = "array_expressions")]
-    pub use datafusion_functions_array::*;
+    #[cfg(feature = "nested_expressions")]
+    pub use datafusion_functions_nested::*;
 }
 
 /// re-export of [`datafusion_functions_aggregate`] crate
diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs
index d82a5a2cc1a11..9c9fcd04bf09a 100644
--- a/datafusion/core/src/prelude.rs
+++ b/datafusion/core/src/prelude.rs
@@ -39,8 +39,8 @@ pub use datafusion_expr::{
     Expr,
 };
 pub use datafusion_functions::expr_fn::*;
-#[cfg(feature = "array_expressions")]
-pub use datafusion_functions_array::expr_fn::*;
+#[cfg(feature = "nested_expressions")]
+pub use datafusion_functions_nested::expr_fn::*;
 
 pub use std::ops::Not;
 pub use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index f7b02196d8ed5..7a0e9888a61c1 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -34,7 +34,7 @@ use datafusion_common::{DFSchema, ScalarValue};
 use datafusion_expr::expr::Alias;
 use datafusion_expr::ExprSchemable;
 use datafusion_functions_aggregate::expr_fn::{approx_median, approx_percentile_cont};
-use datafusion_functions_array::map::map;
+use datafusion_functions_nested::map::map;
 
 fn test_schema() -> SchemaRef {
     Arc::new(Schema::new(vec![
diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs
index f36f2d539845e..37d06355d2d3f 100644
--- a/datafusion/core/tests/expr_api/mod.rs
+++ b/datafusion/core/tests/expr_api/mod.rs
@@ -25,7 +25,7 @@ use datafusion_expr::AggregateExt;
 use datafusion_functions::core::expr_ext::FieldAccessor;
 use datafusion_functions_aggregate::first_last::first_value_udaf;
 use datafusion_functions_aggregate::sum::sum_udaf;
-use datafusion_functions_array::expr_ext::{IndexAccessor, SliceAccessor};
+use datafusion_functions_nested::expr_ext::{IndexAccessor, SliceAccessor};
 use sqlparser::ast::NullTreatment;
 /// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan
 use std::sync::{Arc, OnceLock};
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index 219f6c26cf8f7..9164e89de8f9a 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -45,7 +45,7 @@ use datafusion_expr::{
     LogicalPlanBuilder, OperateFunctionArg, ScalarUDF, ScalarUDFImpl, Signature,
     Volatility,
 };
-use datafusion_functions_array::range::range_udf;
+use datafusion_functions_nested::range::range_udf;
 
 /// test that casting happens on udfs.
 /// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 8d460bdc8e7db..bf2bfe2c39326 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -42,7 +42,7 @@ pub use order_by::rewrite_sort_cols_by_aggs;
 ///
 /// For example, concatenating arrays `a || b` is represented as
 /// `Operator::ArrowAt`, but can be implemented by calling a function
-/// `array_concat` from the `functions-array` crate.
+/// `array_concat` from the `functions-nested` crate.
 // This is not used in datafusion internally, but it is still helpful for downstream project so don't remove it.
 pub trait FunctionRewrite {
     /// Return a human readable name for this rewrite
diff --git a/datafusion/functions-array/Cargo.toml b/datafusion/functions-nested/Cargo.toml
similarity index 92%
rename from datafusion/functions-array/Cargo.toml
rename to datafusion/functions-nested/Cargo.toml
index de424b259694c..6a1973ecfed17 100644
--- a/datafusion/functions-array/Cargo.toml
+++ b/datafusion/functions-nested/Cargo.toml
@@ -16,8 +16,8 @@
 # under the License.
 
 [package]
-name = "datafusion-functions-array"
-description = "Array Function packages for the DataFusion query engine"
+name = "datafusion-functions-nested"
+description = "Nested Type Function packages for the DataFusion query engine"
 keywords = ["datafusion", "logical", "plan", "expressions"]
 readme = "README.md"
 version = { workspace = true }
@@ -34,7 +34,7 @@ workspace = true
 [features]
 
 [lib]
-name = "datafusion_functions_array"
+name = "datafusion_functions_nested"
 path = "src/lib.rs"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
diff --git a/datafusion/functions-array/README.md b/datafusion/functions-nested/README.md
similarity index 87%
rename from datafusion/functions-array/README.md
rename to datafusion/functions-nested/README.md
index 25deca8e1c778..8a5047c838ab0 100644
--- a/datafusion/functions-array/README.md
+++ b/datafusion/functions-nested/README.md
@@ -17,11 +17,11 @@
   under the License.
 -->
 
-# DataFusion Array Function Library
+# DataFusion Nested Type Function Library
 
 [DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
 
-This crate contains functions for working with arrays, such as `array_append` that work with
+This crate contains functions for working with arrays, maps and structs, such as `array_append` that work with
 `ListArray`, `LargeListArray` and `FixedListArray` types from the `arrow` crate.
 
 [df]: https://crates.io/crates/datafusion
diff --git a/datafusion/functions-array/benches/array_expression.rs b/datafusion/functions-nested/benches/array_expression.rs
similarity index 95%
rename from datafusion/functions-array/benches/array_expression.rs
rename to datafusion/functions-nested/benches/array_expression.rs
index 48b829793cef5..0e3ecbc726413 100644
--- a/datafusion/functions-array/benches/array_expression.rs
+++ b/datafusion/functions-nested/benches/array_expression.rs
@@ -21,7 +21,7 @@ extern crate arrow;
 
 use crate::criterion::Criterion;
 use datafusion_expr::lit;
-use datafusion_functions_array::expr_fn::{array_replace_all, make_array};
+use datafusion_functions_nested::expr_fn::{array_replace_all, make_array};
 
 fn criterion_benchmark(c: &mut Criterion) {
     // Construct large arrays for benchmarking
diff --git a/datafusion/functions-array/benches/map.rs b/datafusion/functions-nested/benches/map.rs
similarity index 95%
rename from datafusion/functions-array/benches/map.rs
rename to datafusion/functions-nested/benches/map.rs
index c2e0e641e80d2..c9a12eefa4fa1 100644
--- a/datafusion/functions-array/benches/map.rs
+++ b/datafusion/functions-nested/benches/map.rs
@@ -28,8 +28,8 @@ use std::sync::Arc;
 use datafusion_common::ScalarValue;
 use datafusion_expr::planner::ExprPlanner;
 use datafusion_expr::{ColumnarValue, Expr};
-use datafusion_functions_array::map::map_udf;
-use datafusion_functions_array::planner::ArrayFunctionPlanner;
+use datafusion_functions_nested::map::map_udf;
+use datafusion_functions_nested::planner::NestedFunctionPlanner;
 
 fn keys(rng: &mut ThreadRng) -> Vec<String> {
     let mut keys = vec![];
@@ -58,7 +58,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             buffer.push(Expr::Literal(ScalarValue::Int32(Some(values[i]))));
         }
 
-        let planner = ArrayFunctionPlanner {};
+        let planner = NestedFunctionPlanner {};
 
         b.iter(|| {
             black_box(
diff --git a/datafusion/functions-array/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs
similarity index 100%
rename from datafusion/functions-array/src/array_has.rs
rename to datafusion/functions-nested/src/array_has.rs
diff --git a/datafusion/functions-array/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs
similarity index 100%
rename from datafusion/functions-array/src/cardinality.rs
rename to datafusion/functions-nested/src/cardinality.rs
diff --git a/datafusion/functions-array/src/concat.rs b/datafusion/functions-nested/src/concat.rs
similarity index 100%
rename from datafusion/functions-array/src/concat.rs
rename to datafusion/functions-nested/src/concat.rs
diff --git a/datafusion/functions-array/src/dimension.rs b/datafusion/functions-nested/src/dimension.rs
similarity index 100%
rename from datafusion/functions-array/src/dimension.rs
rename to datafusion/functions-nested/src/dimension.rs
diff --git a/datafusion/functions-array/src/empty.rs b/datafusion/functions-nested/src/empty.rs
similarity index 100%
rename from datafusion/functions-array/src/empty.rs
rename to datafusion/functions-nested/src/empty.rs
diff --git a/datafusion/functions-array/src/except.rs b/datafusion/functions-nested/src/except.rs
similarity index 100%
rename from datafusion/functions-array/src/except.rs
rename to datafusion/functions-nested/src/except.rs
diff --git a/datafusion/functions-array/src/expr_ext.rs b/datafusion/functions-nested/src/expr_ext.rs
similarity index 95%
rename from datafusion/functions-array/src/expr_ext.rs
rename to datafusion/functions-nested/src/expr_ext.rs
index 5505ef746881d..3524d62d0bc42 100644
--- a/datafusion/functions-array/src/expr_ext.rs
+++ b/datafusion/functions-nested/src/expr_ext.rs
@@ -35,7 +35,7 @@ use crate::extract::{array_element, array_slice};
 ///
 /// ```
 /// # use datafusion_expr::{lit, col, Expr};
-/// # use datafusion_functions_array::expr_ext::IndexAccessor;
+/// # use datafusion_functions_nested::expr_ext::IndexAccessor;
 /// let expr = col("c1")
 ///    .index(lit(3));
 /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]");
@@ -65,7 +65,7 @@ impl IndexAccessor for Expr {
 ///
 /// ```
 /// # use datafusion_expr::{lit, col};
-/// # use datafusion_functions_array::expr_ext::SliceAccessor;
+/// # use datafusion_functions_nested::expr_ext::SliceAccessor;
 /// let expr = col("c1")
 ///    .range(lit(2), lit(4));
 /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]");
diff --git a/datafusion/functions-array/src/extract.rs b/datafusion/functions-nested/src/extract.rs
similarity index 100%
rename from datafusion/functions-array/src/extract.rs
rename to datafusion/functions-nested/src/extract.rs
diff --git a/datafusion/functions-array/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs
similarity index 100%
rename from datafusion/functions-array/src/flatten.rs
rename to datafusion/functions-nested/src/flatten.rs
diff --git a/datafusion/functions-array/src/length.rs b/datafusion/functions-nested/src/length.rs
similarity index 100%
rename from datafusion/functions-array/src/length.rs
rename to datafusion/functions-nested/src/length.rs
diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-nested/src/lib.rs
similarity index 93%
rename from datafusion/functions-array/src/lib.rs
rename to datafusion/functions-nested/src/lib.rs
index f68f59dcd6a12..ef2c5e709bc16 100644
--- a/datafusion/functions-array/src/lib.rs
+++ b/datafusion/functions-nested/src/lib.rs
@@ -17,9 +17,9 @@
 // Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
 
-//! Array Functions for [DataFusion].
+//! Nested type Functions for [DataFusion].
 //!
-//! This crate contains a collection of array functions implemented using the
+//! This crate contains a collection of nested type functions implemented using the
 //! extension API.
 //!
 //! [DataFusion]: https://crates.io/crates/datafusion
@@ -102,8 +102,8 @@ pub mod expr_fn {
     pub use super::string::string_to_array;
 }
 
-/// Return all default array functions
-pub fn all_default_array_functions() -> Vec<Arc<ScalarUDF>> {
+/// Return all default nested type functions
+pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         string::array_to_string_udf(),
         string::string_to_array_udf(),
@@ -148,7 +148,7 @@ pub fn all_default_array_functions() -> Vec<Arc<ScalarUDF>> {
 
 /// Registers all enabled packages with a [`FunctionRegistry`]
 pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
-    let functions: Vec<Arc<ScalarUDF>> = all_default_array_functions();
+    let functions: Vec<Arc<ScalarUDF>> = all_default_nested_functions();
     functions.into_iter().try_for_each(|udf| {
         let existing_udf = registry.register_udf(udf)?;
         if let Some(existing_udf) = existing_udf {
@@ -162,14 +162,14 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
 
 #[cfg(test)]
 mod tests {
-    use crate::all_default_array_functions;
+    use crate::all_default_nested_functions;
     use datafusion_common::Result;
     use std::collections::HashSet;
 
     #[test]
     fn test_no_duplicate_name() -> Result<()> {
         let mut names = HashSet::new();
-        for func in all_default_array_functions() {
+        for func in all_default_nested_functions() {
             assert!(
                 names.insert(func.name().to_string().to_lowercase()),
                 "duplicate function name: {}",
diff --git a/datafusion/functions-array/src/macros.rs b/datafusion/functions-nested/src/macros.rs
similarity index 100%
rename from datafusion/functions-array/src/macros.rs
rename to datafusion/functions-nested/src/macros.rs
diff --git a/datafusion/functions-array/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs
similarity index 100%
rename from datafusion/functions-array/src/make_array.rs
rename to datafusion/functions-nested/src/make_array.rs
diff --git a/datafusion/functions-array/src/map.rs b/datafusion/functions-nested/src/map.rs
similarity index 100%
rename from datafusion/functions-array/src/map.rs
rename to datafusion/functions-nested/src/map.rs
diff --git a/datafusion/functions-array/src/planner.rs b/datafusion/functions-nested/src/planner.rs
similarity index 97%
rename from datafusion/functions-array/src/planner.rs
rename to datafusion/functions-nested/src/planner.rs
index 3f779c9f111ed..97c54cc77bebd 100644
--- a/datafusion/functions-array/src/planner.rs
+++ b/datafusion/functions-nested/src/planner.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! SQL planning extensions like [`ArrayFunctionPlanner`] and [`FieldAccessPlanner`]
+//! SQL planning extensions like [`NestedFunctionPlanner`] and [`FieldAccessPlanner`]
 
 use datafusion_common::{exec_err, utils::list_ndims, DFSchema, Result};
 use datafusion_expr::expr::ScalarFunction;
@@ -35,9 +35,9 @@ use crate::{
     make_array::make_array,
 };
 
-pub struct ArrayFunctionPlanner;
+pub struct NestedFunctionPlanner;
 
-impl ExprPlanner for ArrayFunctionPlanner {
+impl ExprPlanner for NestedFunctionPlanner {
     fn plan_binary_op(
         &self,
         expr: RawBinaryExpr,
diff --git a/datafusion/functions-array/src/position.rs b/datafusion/functions-nested/src/position.rs
similarity index 100%
rename from datafusion/functions-array/src/position.rs
rename to datafusion/functions-nested/src/position.rs
diff --git a/datafusion/functions-array/src/range.rs b/datafusion/functions-nested/src/range.rs
similarity index 100%
rename from datafusion/functions-array/src/range.rs
rename to datafusion/functions-nested/src/range.rs
diff --git a/datafusion/functions-array/src/remove.rs b/datafusion/functions-nested/src/remove.rs
similarity index 100%
rename from datafusion/functions-array/src/remove.rs
rename to datafusion/functions-nested/src/remove.rs
diff --git a/datafusion/functions-array/src/repeat.rs b/datafusion/functions-nested/src/repeat.rs
similarity index 100%
rename from datafusion/functions-array/src/repeat.rs
rename to datafusion/functions-nested/src/repeat.rs
diff --git a/datafusion/functions-array/src/replace.rs b/datafusion/functions-nested/src/replace.rs
similarity index 100%
rename from datafusion/functions-array/src/replace.rs
rename to datafusion/functions-nested/src/replace.rs
diff --git a/datafusion/functions-array/src/resize.rs b/datafusion/functions-nested/src/resize.rs
similarity index 100%
rename from datafusion/functions-array/src/resize.rs
rename to datafusion/functions-nested/src/resize.rs
diff --git a/datafusion/functions-array/src/reverse.rs b/datafusion/functions-nested/src/reverse.rs
similarity index 100%
rename from datafusion/functions-array/src/reverse.rs
rename to datafusion/functions-nested/src/reverse.rs
diff --git a/datafusion/functions-array/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs
similarity index 100%
rename from datafusion/functions-array/src/set_ops.rs
rename to datafusion/functions-nested/src/set_ops.rs
diff --git a/datafusion/functions-array/src/sort.rs b/datafusion/functions-nested/src/sort.rs
similarity index 100%
rename from datafusion/functions-array/src/sort.rs
rename to datafusion/functions-nested/src/sort.rs
diff --git a/datafusion/functions-array/src/string.rs b/datafusion/functions-nested/src/string.rs
similarity index 100%
rename from datafusion/functions-array/src/string.rs
rename to datafusion/functions-nested/src/string.rs
diff --git a/datafusion/functions-array/src/utils.rs b/datafusion/functions-nested/src/utils.rs
similarity index 100%
rename from datafusion/functions-array/src/utils.rs
rename to datafusion/functions-nested/src/utils.rs
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index e17515086ecd9..25223c3731bef 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -44,7 +44,7 @@ use datafusion::functions_aggregate::expr_fn::{
     count_distinct, covar_pop, covar_samp, first_value, grouping, median, stddev,
     stddev_pop, sum, var_pop, var_sample,
 };
-use datafusion::functions_array::map::map;
+use datafusion::functions_nested::map::map;
 use datafusion::prelude::*;
 use datafusion::test_util::{TestTableFactory, TestTableProvider};
 use datafusion_common::config::TableOptions;
diff --git a/dev/release/README.md b/dev/release/README.md
index 9486222c51053..c6bc9be2b0db7 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -268,7 +268,7 @@ dot -Tsvg dev/release/crate-deps.dot > dev/release/crate-deps.svg
 (cd datafusion/functions-aggregate && cargo publish)
 (cd datafusion/physical-expr && cargo publish)
 (cd datafusion/functions && cargo publish)
-(cd datafusion/functions-array && cargo publish)
+(cd datafusion/functions-nested && cargo publish)
 (cd datafusion/sql && cargo publish)
 (cd datafusion/optimizer && cargo publish)
 (cd datafusion/common-runtime && cargo publish)
diff --git a/dev/release/crate-deps.dot b/dev/release/crate-deps.dot
index 69811c7d61099..1d903a56021d1 100644
--- a/dev/release/crate-deps.dot
+++ b/dev/release/crate-deps.dot
@@ -74,15 +74,15 @@ digraph G {
         datafusion -> datafusion_execution
         datafusion -> datafusion_expr
         datafusion -> datafusion_functions
-        datafusion -> datafusion_functions_array
+        datafusion -> datafusion_functions_nested
         datafusion -> datafusion_optimizer
         datafusion -> datafusion_physical_expr
         datafusion -> datafusion_physical_plan
         datafusion -> datafusion_sql
-        datafusion_functions_array
-        datafusion_functions_array -> datafusion_common
-        datafusion_functions_array -> datafusion_execution
-        datafusion_functions_array -> datafusion_expr
+        datafusion_functions_nested
+        datafusion_functions_nested -> datafusion_common
+        datafusion_functions_nested -> datafusion_execution
+        datafusion_functions_nested -> datafusion_expr
         datafusion_execution
         datafusion_execution -> datafusion_common
         datafusion_execution -> datafusion_expr
diff --git a/dev/release/crate-deps.svg b/dev/release/crate-deps.svg
index cf60bf7526420..c76fe3abb4acd 100644
--- a/dev/release/crate-deps.svg
+++ b/dev/release/crate-deps.svg
@@ -153,15 +153,15 @@
 <path fill="none" stroke="black" d="M932.78,-364.97C895.04,-354.05 838.86,-337.8 795.58,-325.28"/>
 <polygon fill="black" stroke="black" points="796.29,-321.84 785.71,-322.43 794.34,-328.57 796.29,-321.84"/>
 </g>
-<!-- datafusion_functions_array -->
+<!-- datafusion_functions_nested -->
 <g id="node16" class="node">
-<title>datafusion_functions_array</title>
+<title>datafusion_functions_nested</title>
 <ellipse fill="none" stroke="black" cx="1279.38" cy="-234" rx="135.68" ry="18"/>
-<text text-anchor="middle" x="1279.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_functions_array</text>
+<text text-anchor="middle" x="1279.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_functions_nested</text>
 </g>
-<!-- datafusion&#45;&gt;datafusion_functions_array -->
+<!-- datafusion&#45;&gt;datafusion_functions_nested -->
 <g id="edge45" class="edge">
-<title>datafusion&#45;&gt;datafusion_functions_array</title>
+<title>datafusion&#45;&gt;datafusion_functions_nested</title>
 <path fill="none" stroke="black" d="M1005.59,-362.47C1059.89,-337.19 1171.92,-285.03 1234.54,-255.88"/>
 <polygon fill="black" stroke="black" points="1236.19,-258.97 1243.78,-251.58 1233.23,-252.63 1236.19,-258.97"/>
 </g>
@@ -411,21 +411,21 @@
 <path fill="none" stroke="black" d="M1574.65,-431.7C1582.24,-405.14 1595.38,-352.61 1595.38,-307 1595.38,-307 1595.38,-307 1595.38,-161 1595.38,-108.7 1565.1,-95.53 1518.38,-72 1467.03,-46.14 1111.46,-29.63 927.36,-22.72"/>
 <polygon fill="black" stroke="black" points="927.23,-19.22 917.1,-22.34 926.97,-26.21 927.23,-19.22"/>
 </g>
-<!-- datafusion_functions_array&#45;&gt;datafusion_common -->
+<!-- datafusion_functions_nested&#45;&gt;datafusion_common -->
 <g id="edge50" class="edge">
-<title>datafusion_functions_array&#45;&gt;datafusion_common</title>
+<title>datafusion_functions_nested&#45;&gt;datafusion_common</title>
 <path fill="none" stroke="black" d="M1253.53,-216.22C1203.95,-184.59 1091.36,-115.46 989.38,-72 955.26,-57.46 915.79,-45 883.28,-35.83"/>
 <polygon fill="black" stroke="black" points="884,-32.4 873.43,-33.09 882.12,-39.14 884,-32.4"/>
 </g>
-<!-- datafusion_functions_array&#45;&gt;datafusion_expr -->
+<!-- datafusion_functions_nested&#45;&gt;datafusion_expr -->
 <g id="edge52" class="edge">
-<title>datafusion_functions_array&#45;&gt;datafusion_expr</title>
+<title>datafusion_functions_nested&#45;&gt;datafusion_expr</title>
 <path fill="none" stroke="black" d="M1240.69,-216.61C1191.68,-196.5 1104.24,-162.78 1026.38,-144 924.46,-119.42 804.61,-105.26 724.94,-97.87"/>
 <polygon fill="black" stroke="black" points="725.22,-94.38 714.94,-96.96 724.59,-101.35 725.22,-94.38"/>
 </g>
-<!-- datafusion_functions_array&#45;&gt;datafusion_execution -->
+<!-- datafusion_functions_nested&#45;&gt;datafusion_execution -->
 <g id="edge51" class="edge">
-<title>datafusion_functions_array&#45;&gt;datafusion_execution</title>
+<title>datafusion_functions_nested&#45;&gt;datafusion_execution</title>
 <path fill="none" stroke="black" d="M1201.74,-219.17C1134.05,-207.11 1036.21,-189.69 967.5,-177.45"/>
 <polygon fill="black" stroke="black" points="968.05,-173.99 957.59,-175.68 966.82,-180.88 968.05,-173.99"/>
 </g>
diff --git a/dev/update_datafusion_versions.py b/dev/update_datafusion_versions.py
index 74a8a2ebd5b60..2e3374cd920b6 100755
--- a/dev/update_datafusion_versions.py
+++ b/dev/update_datafusion_versions.py
@@ -35,7 +35,7 @@
     'datafusion-expr': 'datafusion/expr/Cargo.toml',
     'datafusion-functions': 'datafusion/functions/Cargo.toml',
     'datafusion-functions-aggregate': 'datafusion/functions-aggregate/Cargo.toml',
-    'datafusion-functions-array': 'datafusion/functions-array/Cargo.toml',
+    'datafusion-functions-nested': 'datafusion/functions-nested/Cargo.toml',
     'datafusion-optimizer': 'datafusion/optimizer/Cargo.toml',
     'datafusion-physical-expr': 'datafusion/physical-expr/Cargo.toml',
     'datafusion-physical-expr-common': 'datafusion/physical-expr-common/Cargo.toml',

From 8945462ed0baf20eb4fb8e298407d08072030e33 Mon Sep 17 00:00:00 2001
From: Namgung Chan <33323415+getChan@users.noreply.github.com>
Date: Wed, 24 Jul 2024 22:11:00 +0900
Subject: [PATCH 133/357] Fix : `signum` function bug when `0.0` input (#11580)

* add signum unit test

* fix: signum function implementation - input zero output zero

* fix: run cargo fmt

* fix: not specified return type is float64

* fix: sqllogictest
---
 datafusion/functions/src/math/mod.rs          |   3 +-
 datafusion/functions/src/math/monotonicity.rs |   5 -
 datafusion/functions/src/math/signum.rs       | 215 ++++++++++++++++++
 datafusion/sqllogictest/test_files/scalar.slt |   2 +-
 4 files changed, 218 insertions(+), 7 deletions(-)
 create mode 100644 datafusion/functions/src/math/signum.rs

diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index 9ee173bb61762..3b32a158b8848 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -35,6 +35,7 @@ pub mod pi;
 pub mod power;
 pub mod random;
 pub mod round;
+pub mod signum;
 pub mod trunc;
 
 // Create UDFs
@@ -81,7 +82,7 @@ make_math_unary_udf!(
 );
 make_udf_function!(random::RandomFunc, RANDOM, random);
 make_udf_function!(round::RoundFunc, ROUND, round);
-make_math_unary_udf!(SignumFunc, SIGNUM, signum, signum, super::signum_order);
+make_udf_function!(signum::SignumFunc, SIGNUM, signum);
 make_math_unary_udf!(SinFunc, SIN, sin, sin, super::sin_order);
 make_math_unary_udf!(SinhFunc, SINH, sinh, sinh, super::sinh_order);
 make_math_unary_udf!(SqrtFunc, SQRT, sqrt, sqrt, super::sqrt_order);
diff --git a/datafusion/functions/src/math/monotonicity.rs b/datafusion/functions/src/math/monotonicity.rs
index 56c5a45788bc8..33c061ee11d0d 100644
--- a/datafusion/functions/src/math/monotonicity.rs
+++ b/datafusion/functions/src/math/monotonicity.rs
@@ -197,11 +197,6 @@ pub fn radians_order(input: &[ExprProperties]) -> Result<SortProperties> {
     Ok(input[0].sort_properties)
 }
 
-/// Non-decreasing for all real numbers x.
-pub fn signum_order(input: &[ExprProperties]) -> Result<SortProperties> {
-    Ok(input[0].sort_properties)
-}
-
 /// Non-decreasing on \[0, π\] and then non-increasing on \[π, 2π\].
 /// This pattern repeats periodically with a period of 2π.
 // TODO: Implement ordering rule of the SIN function.
diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs
new file mode 100644
index 0000000000000..d2a806a46e136
--- /dev/null
+++ b/datafusion/functions/src/math/signum.rs
@@ -0,0 +1,215 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, Float32Array, Float64Array};
+use arrow::datatypes::DataType;
+use arrow::datatypes::DataType::{Float32, Float64};
+
+use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+
+use crate::utils::make_scalar_function;
+
+#[derive(Debug)]
+pub struct SignumFunc {
+    signature: Signature,
+}
+
+impl Default for SignumFunc {
+    fn default() -> Self {
+        SignumFunc::new()
+    }
+}
+
+impl SignumFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::uniform(
+                1,
+                vec![Float64, Float32],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SignumFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "signum"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        match &arg_types[0] {
+            Float32 => Ok(Float32),
+            _ => Ok(Float64),
+        }
+    }
+
+    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
+        // Non-decreasing for all real numbers x.
+        Ok(input[0].sort_properties)
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        make_scalar_function(signum, vec![])(args)
+    }
+}
+
+/// signum SQL function
+pub fn signum(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args[0].data_type() {
+        Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
+            &args[0],
+            "signum",
+            Float64Array,
+            Float64Array,
+            {
+                |x: f64| {
+                    if x == 0_f64 {
+                        0_f64
+                    } else {
+                        x.signum()
+                    }
+                }
+            }
+        )) as ArrayRef),
+
+        Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
+            &args[0],
+            "signum",
+            Float32Array,
+            Float32Array,
+            {
+                |x: f32| {
+                    if x == 0_f32 {
+                        0_f32
+                    } else {
+                        x.signum()
+                    }
+                }
+            }
+        )) as ArrayRef),
+
+        other => exec_err!("Unsupported data type {other:?} for function signum"),
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::sync::Arc;
+
+    use arrow::array::{Float32Array, Float64Array};
+
+    use datafusion_common::cast::{as_float32_array, as_float64_array};
+    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+
+    use crate::math::signum::SignumFunc;
+
+    #[test]
+    fn test_signum_f32() {
+        let args = [ColumnarValue::Array(Arc::new(Float32Array::from(vec![
+            -1.0,
+            -0.0,
+            0.0,
+            1.0,
+            -0.01,
+            0.01,
+            f32::NAN,
+            f32::INFINITY,
+            f32::NEG_INFINITY,
+        ])))];
+
+        let result = SignumFunc::new()
+            .invoke(&args)
+            .expect("failed to initialize function signum");
+
+        match result {
+            ColumnarValue::Array(arr) => {
+                let floats = as_float32_array(&arr)
+                    .expect("failed to convert result to a Float32Array");
+
+                assert_eq!(floats.len(), 9);
+                assert_eq!(floats.value(0), -1.0);
+                assert_eq!(floats.value(1), 0.0);
+                assert_eq!(floats.value(2), 0.0);
+                assert_eq!(floats.value(3), 1.0);
+                assert_eq!(floats.value(4), -1.0);
+                assert_eq!(floats.value(5), 1.0);
+                assert!(floats.value(6).is_nan());
+                assert_eq!(floats.value(7), 1.0);
+                assert_eq!(floats.value(8), -1.0);
+            }
+            ColumnarValue::Scalar(_) => {
+                panic!("Expected an array value")
+            }
+        }
+    }
+
+    #[test]
+    fn test_signum_f64() {
+        let args = [ColumnarValue::Array(Arc::new(Float64Array::from(vec![
+            -1.0,
+            -0.0,
+            0.0,
+            1.0,
+            -0.01,
+            0.01,
+            f64::NAN,
+            f64::INFINITY,
+            f64::NEG_INFINITY,
+        ])))];
+
+        let result = SignumFunc::new()
+            .invoke(&args)
+            .expect("failed to initialize function signum");
+
+        match result {
+            ColumnarValue::Array(arr) => {
+                let floats = as_float64_array(&arr)
+                    .expect("failed to convert result to a Float32Array");
+
+                assert_eq!(floats.len(), 9);
+                assert_eq!(floats.value(0), -1.0);
+                assert_eq!(floats.value(1), 0.0);
+                assert_eq!(floats.value(2), 0.0);
+                assert_eq!(floats.value(3), 1.0);
+                assert_eq!(floats.value(4), -1.0);
+                assert_eq!(floats.value(5), 1.0);
+                assert!(floats.value(6).is_nan());
+                assert_eq!(floats.value(7), 1.0);
+                assert_eq!(floats.value(8), -1.0);
+            }
+            ColumnarValue::Scalar(_) => {
+                panic!("Expected an array value")
+            }
+        }
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index ff9afa94f40af..188a2c5863e6f 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -794,7 +794,7 @@ select round(column1, column2) from values (3.14, 2), (3.14, 3), (3.14, 21474836
 query RRR rowsort
 select signum(-2), signum(0), signum(2);
 ----
--1 1 1
+-1 0 1
 
 # signum scalar nulls
 query R rowsort

From 6efdbe6d4b8df4ef8c149f42e57d9c3aed7f3266 Mon Sep 17 00:00:00 2001
From: Dharan Aditya <dharan.aditya@gmail.com>
Date: Wed, 24 Jul 2024 18:42:17 +0530
Subject: [PATCH 134/357] Enforce uniqueness of `named_struct` field names
 (#11614)

* check struct field names for uniqueness

* add logic test

* improve error log
---
 datafusion/functions/src/core/named_struct.rs | 15 ++++++++++++++-
 datafusion/sqllogictest/test_files/struct.slt |  4 ++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs
index 8ccda977f3a42..f71b1b00f0fe6 100644
--- a/datafusion/functions/src/core/named_struct.rs
+++ b/datafusion/functions/src/core/named_struct.rs
@@ -20,6 +20,7 @@ use arrow::datatypes::{DataType, Field, Fields};
 use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
 use datafusion_expr::{ColumnarValue, Expr, ExprSchemable};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use hashbrown::HashSet;
 use std::any::Any;
 use std::sync::Arc;
 
@@ -45,7 +46,6 @@ fn named_struct_expr(args: &[ColumnarValue]) -> Result<ColumnarValue> {
         .map(|(i, chunk)| {
 
             let name_column = &chunk[0];
-
             let name = match name_column {
                 ColumnarValue::Scalar(ScalarValue::Utf8(Some(name_scalar))) => name_scalar,
                 _ => return exec_err!("named_struct even arguments must be string literals, got {name_column:?} instead at position {}", i * 2)
@@ -57,6 +57,19 @@ fn named_struct_expr(args: &[ColumnarValue]) -> Result<ColumnarValue> {
         .into_iter()
         .unzip();
 
+    {
+        // Check to enforce the uniqueness of struct field name
+        let mut unique_field_names = HashSet::new();
+        for name in names.iter() {
+            if unique_field_names.contains(name) {
+                return exec_err!(
+                    "named_struct requires unique field names. Field {name} is used more than once."
+                );
+            }
+            unique_field_names.insert(name);
+        }
+    }
+
     let arrays = ColumnarValue::values_to_arrays(&values)?;
 
     let fields = names
diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt
index a7384fd4d8ad6..caa612f556fed 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -122,6 +122,10 @@ physical_plan
 query error
 select named_struct();
 
+# error on duplicate field names
+query error
+select named_struct('c0': 1, 'c1': 2, 'c1': 3);
+
 # error on odd number of arguments #1
 query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 1 instead
 select named_struct('a');

From e90b3ac5cf89ec5b1a94506ac69e85bd9b7d319e Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 24 Jul 2024 10:20:18 -0400
Subject: [PATCH 135/357] Minor: unecessary row_count calculation in
 `CrossJoinExec` and `NestedLoopsJoinExec` (#11632)

* Minor: remove row_count calculation

* Minor: remove row_count calculation
---
 .../physical-plan/src/joins/cross_join.rs     | 31 ++++++++-----------
 .../src/joins/nested_loop_join.rs             | 14 ++++-----
 2 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index b1482a9699d56..2840d3f62bf93 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -154,24 +154,19 @@ async fn load_left_input(
     let stream = merge.execute(0, context)?;
 
     // Load all batches and count the rows
-    let (batches, _num_rows, _, reservation) = stream
-        .try_fold(
-            (Vec::new(), 0usize, metrics, reservation),
-            |mut acc, batch| async {
-                let batch_size = batch.get_array_memory_size();
-                // Reserve memory for incoming batch
-                acc.3.try_grow(batch_size)?;
-                // Update metrics
-                acc.2.build_mem_used.add(batch_size);
-                acc.2.build_input_batches.add(1);
-                acc.2.build_input_rows.add(batch.num_rows());
-                // Update rowcount
-                acc.1 += batch.num_rows();
-                // Push batch to output
-                acc.0.push(batch);
-                Ok(acc)
-            },
-        )
+    let (batches, _metrics, reservation) = stream
+        .try_fold((Vec::new(), metrics, reservation), |mut acc, batch| async {
+            let batch_size = batch.get_array_memory_size();
+            // Reserve memory for incoming batch
+            acc.2.try_grow(batch_size)?;
+            // Update metrics
+            acc.1.build_mem_used.add(batch_size);
+            acc.1.build_input_batches.add(1);
+            acc.1.build_input_rows.add(batch.num_rows());
+            // Push batch to output
+            acc.0.push(batch);
+            Ok(acc)
+        })
         .await?;
 
     let merged_batch = concat_batches(&left_schema, &batches)?;
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index eac135bfd0fe3..9f1465c2d7c1a 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -364,19 +364,17 @@ async fn collect_left_input(
     let stream = merge.execute(0, context)?;
 
     // Load all batches and count the rows
-    let (batches, _num_rows, metrics, mut reservation) = stream
+    let (batches, metrics, mut reservation) = stream
         .try_fold(
-            (Vec::new(), 0usize, join_metrics, reservation),
+            (Vec::new(), join_metrics, reservation),
             |mut acc, batch| async {
                 let batch_size = batch.get_array_memory_size();
                 // Reserve memory for incoming batch
-                acc.3.try_grow(batch_size)?;
+                acc.2.try_grow(batch_size)?;
                 // Update metrics
-                acc.2.build_mem_used.add(batch_size);
-                acc.2.build_input_batches.add(1);
-                acc.2.build_input_rows.add(batch.num_rows());
-                // Update rowcount
-                acc.1 += batch.num_rows();
+                acc.1.build_mem_used.add(batch_size);
+                acc.1.build_input_batches.add(1);
+                acc.1.build_input_rows.add(batch.num_rows());
                 // Push batch to output
                 acc.0.push(batch);
                 Ok(acc)

From 13569340bce99e4a317ec4d71e5c46d69dfa733d Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Wed, 24 Jul 2024 22:30:05 +0800
Subject: [PATCH 136/357] ExprBuilder for Physical Aggregate Expr (#11617)

* aggregate expr builder

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* replace parts of test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* continue

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup all

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* clipp

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add sort

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm field

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* address comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix import path

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/core/src/lib.rs                    |   5 +
 .../aggregate_statistics.rs                   |  20 +-
 .../combine_partial_final_agg.rs              |  41 +--
 .../core/tests/fuzz_cases/aggregate_fuzz.rs   |  23 +-
 .../physical-expr-common/src/aggregate/mod.rs | 286 +++++++++++++-----
 .../physical-plan/src/aggregates/mod.rs       | 134 +++-----
 datafusion/physical-plan/src/windows/mod.rs   |  39 +--
 datafusion/proto/src/physical_plan/mod.rs     |  11 +-
 .../tests/cases/roundtrip_physical_plan.rs    | 177 ++++-------
 9 files changed, 369 insertions(+), 367 deletions(-)

diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 9ab6ed527d82b..d9ab9e1c07dd8 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -545,6 +545,11 @@ pub mod optimizer {
     pub use datafusion_optimizer::*;
 }
 
+/// re-export of [`datafusion_physical_expr`] crate
+pub mod physical_expr_common {
+    pub use datafusion_physical_expr_common::*;
+}
+
 /// re-export of [`datafusion_physical_expr`] crate
 pub mod physical_expr {
     pub use datafusion_physical_expr::*;
diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
index e7580d3e33ef2..5f08e4512b3a0 100644
--- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
@@ -326,7 +326,7 @@ pub(crate) mod tests {
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_physical_expr::expressions::cast;
     use datafusion_physical_expr::PhysicalExpr;
-    use datafusion_physical_expr_common::aggregate::create_aggregate_expr;
+    use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
     use datafusion_physical_plan::aggregates::AggregateMode;
 
     /// Mock data using a MemoryExec which has an exact count statistic
@@ -419,19 +419,11 @@ pub(crate) mod tests {
 
         // Return appropriate expr depending if COUNT is for col or table (*)
         pub(crate) fn count_expr(&self, schema: &Schema) -> Arc<dyn AggregateExpr> {
-            create_aggregate_expr(
-                &count_udaf(),
-                &[self.column()],
-                &[],
-                &[],
-                &[],
-                schema,
-                self.column_name(),
-                false,
-                false,
-                false,
-            )
-            .unwrap()
+            AggregateExprBuilder::new(count_udaf(), vec![self.column()])
+                .schema(Arc::new(schema.clone()))
+                .name(self.column_name())
+                .build()
+                .unwrap()
         }
 
         /// what argument would this aggregate need in the plan?
diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index ddb7d36fb5950..6f3274820c8c1 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -177,7 +177,7 @@ mod tests {
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_functions_aggregate::sum::sum_udaf;
     use datafusion_physical_expr::expressions::col;
-    use datafusion_physical_plan::udaf::create_aggregate_expr;
+    use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
 
     /// Runs the CombinePartialFinalAggregate optimizer and asserts the plan against the expected
     macro_rules! assert_optimized {
@@ -278,19 +278,11 @@ mod tests {
         name: &str,
         schema: &Schema,
     ) -> Arc<dyn AggregateExpr> {
-        create_aggregate_expr(
-            &count_udaf(),
-            &[expr],
-            &[],
-            &[],
-            &[],
-            schema,
-            name,
-            false,
-            false,
-            false,
-        )
-        .unwrap()
+        AggregateExprBuilder::new(count_udaf(), vec![expr])
+            .schema(Arc::new(schema.clone()))
+            .name(name)
+            .build()
+            .unwrap()
     }
 
     #[test]
@@ -368,19 +360,14 @@ mod tests {
     #[test]
     fn aggregations_with_group_combined() -> Result<()> {
         let schema = schema();
-
-        let aggr_expr = vec![create_aggregate_expr(
-            &sum_udaf(),
-            &[col("b", &schema)?],
-            &[],
-            &[],
-            &[],
-            &schema,
-            "Sum(b)",
-            false,
-            false,
-            false,
-        )?];
+        let aggr_expr =
+            vec![
+                AggregateExprBuilder::new(sum_udaf(), vec![col("b", &schema)?])
+                    .schema(Arc::clone(&schema))
+                    .name("Sum(b)")
+                    .build()
+                    .unwrap(),
+            ];
         let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
             vec![(col("c", &schema)?, "c".to_string())];
 
diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index 736560da97db9..6f286c9aeba1e 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -35,7 +35,7 @@ use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}
 use datafusion_functions_aggregate::sum::sum_udaf;
 use datafusion_physical_expr::expressions::col;
 use datafusion_physical_expr::PhysicalSortExpr;
-use datafusion_physical_plan::udaf::create_aggregate_expr;
+use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
 use datafusion_physical_plan::InputOrderMode;
 use test_utils::{add_empty_batches, StringBatchGenerator};
 
@@ -103,19 +103,14 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
             .with_sort_information(vec![sort_keys]),
     );
 
-    let aggregate_expr = vec![create_aggregate_expr(
-        &sum_udaf(),
-        &[col("d", &schema).unwrap()],
-        &[],
-        &[],
-        &[],
-        &schema,
-        "sum1",
-        false,
-        false,
-        false,
-    )
-    .unwrap()];
+    let aggregate_expr =
+        vec![
+            AggregateExprBuilder::new(sum_udaf(), vec![col("d", &schema).unwrap()])
+                .schema(Arc::clone(&schema))
+                .name("sum1")
+                .build()
+                .unwrap(),
+        ];
     let expr = group_by_columns
         .iter()
         .map(|elem| (col(elem, &schema).unwrap(), elem.to_string()))
diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
index 8c5f9f9e5a7e1..b58a5a6faf242 100644
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -22,8 +22,8 @@ pub mod stats;
 pub mod tdigest;
 pub mod utils;
 
-use arrow::datatypes::{DataType, Field, Schema};
-use datafusion_common::{not_impl_err, DFSchema, Result};
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion_common::{internal_err, not_impl_err, DFSchema, Result};
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::type_coercion::aggregates::check_arg_count;
 use datafusion_expr::ReversedUDAF;
@@ -33,7 +33,7 @@ use datafusion_expr::{
 use std::fmt::Debug;
 use std::{any::Any, sync::Arc};
 
-use self::utils::{down_cast_any_ref, ordering_fields};
+use self::utils::down_cast_any_ref;
 use crate::physical_expr::PhysicalExpr;
 use crate::sort_expr::{LexOrdering, PhysicalSortExpr};
 use crate::utils::reverse_order_bys;
@@ -55,6 +55,8 @@ use datafusion_expr::utils::AggregateOrderSensitivity;
 /// `is_reversed` is used to indicate whether the aggregation is running in reverse order,
 /// it could be used to hint Accumulator to accumulate in the reversed order,
 /// you can just set to false if you are not reversing expression
+///
+/// You can also create expression by [`AggregateExprBuilder`]
 #[allow(clippy::too_many_arguments)]
 pub fn create_aggregate_expr(
     fun: &AggregateUDF,
@@ -66,45 +68,23 @@ pub fn create_aggregate_expr(
     name: impl Into<String>,
     ignore_nulls: bool,
     is_distinct: bool,
-    is_reversed: bool,
 ) -> Result<Arc<dyn AggregateExpr>> {
-    debug_assert_eq!(sort_exprs.len(), ordering_req.len());
-
-    let input_exprs_types = input_phy_exprs
-        .iter()
-        .map(|arg| arg.data_type(schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    check_arg_count(
-        fun.name(),
-        &input_exprs_types,
-        &fun.signature().type_signature,
-    )?;
-
-    let ordering_types = ordering_req
-        .iter()
-        .map(|e| e.expr.data_type(schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    let ordering_fields = ordering_fields(ordering_req, &ordering_types);
-    let name = name.into();
-
-    Ok(Arc::new(AggregateFunctionExpr {
-        fun: fun.clone(),
-        args: input_phy_exprs.to_vec(),
-        logical_args: input_exprs.to_vec(),
-        data_type: fun.return_type(&input_exprs_types)?,
-        name,
-        schema: schema.clone(),
-        dfschema: DFSchema::empty(),
-        sort_exprs: sort_exprs.to_vec(),
-        ordering_req: ordering_req.to_vec(),
-        ignore_nulls,
-        ordering_fields,
-        is_distinct,
-        input_type: input_exprs_types[0].clone(),
-        is_reversed,
-    }))
+    let mut builder =
+        AggregateExprBuilder::new(Arc::new(fun.clone()), input_phy_exprs.to_vec());
+    builder = builder.sort_exprs(sort_exprs.to_vec());
+    builder = builder.order_by(ordering_req.to_vec());
+    builder = builder.logical_exprs(input_exprs.to_vec());
+    builder = builder.schema(Arc::new(schema.clone()));
+    builder = builder.name(name);
+
+    if ignore_nulls {
+        builder = builder.ignore_nulls();
+    }
+    if is_distinct {
+        builder = builder.distinct();
+    }
+
+    builder.build()
 }
 
 #[allow(clippy::too_many_arguments)]
@@ -121,44 +101,196 @@ pub fn create_aggregate_expr_with_dfschema(
     is_distinct: bool,
     is_reversed: bool,
 ) -> Result<Arc<dyn AggregateExpr>> {
-    debug_assert_eq!(sort_exprs.len(), ordering_req.len());
-
+    let mut builder =
+        AggregateExprBuilder::new(Arc::new(fun.clone()), input_phy_exprs.to_vec());
+    builder = builder.sort_exprs(sort_exprs.to_vec());
+    builder = builder.order_by(ordering_req.to_vec());
+    builder = builder.logical_exprs(input_exprs.to_vec());
+    builder = builder.dfschema(dfschema.clone());
     let schema: Schema = dfschema.into();
+    builder = builder.schema(Arc::new(schema));
+    builder = builder.name(name);
+
+    if ignore_nulls {
+        builder = builder.ignore_nulls();
+    }
+    if is_distinct {
+        builder = builder.distinct();
+    }
+    if is_reversed {
+        builder = builder.reversed();
+    }
+
+    builder.build()
+}
+
+/// Builder for physical [`AggregateExpr`]
+///
+/// `AggregateExpr` contains the information necessary to call
+/// an aggregate expression.
+#[derive(Debug, Clone)]
+pub struct AggregateExprBuilder {
+    fun: Arc<AggregateUDF>,
+    /// Physical expressions of the aggregate function
+    args: Vec<Arc<dyn PhysicalExpr>>,
+    /// Logical expressions of the aggregate function, it will be deprecated in <https://github.com/apache/datafusion/issues/11359>
+    logical_args: Vec<Expr>,
+    name: String,
+    /// Arrow Schema for the aggregate function
+    schema: SchemaRef,
+    /// Datafusion Schema for the aggregate function
+    dfschema: DFSchema,
+    /// The logical order by expressions, it will be deprecated in <https://github.com/apache/datafusion/issues/11359>
+    sort_exprs: Vec<Expr>,
+    /// The physical order by expressions
+    ordering_req: LexOrdering,
+    /// Whether to ignore null values
+    ignore_nulls: bool,
+    /// Whether is distinct aggregate function
+    is_distinct: bool,
+    /// Whether the expression is reversed
+    is_reversed: bool,
+}
+
+impl AggregateExprBuilder {
+    pub fn new(fun: Arc<AggregateUDF>, args: Vec<Arc<dyn PhysicalExpr>>) -> Self {
+        Self {
+            fun,
+            args,
+            logical_args: vec![],
+            name: String::new(),
+            schema: Arc::new(Schema::empty()),
+            dfschema: DFSchema::empty(),
+            sort_exprs: vec![],
+            ordering_req: vec![],
+            ignore_nulls: false,
+            is_distinct: false,
+            is_reversed: false,
+        }
+    }
+
+    pub fn build(self) -> Result<Arc<dyn AggregateExpr>> {
+        let Self {
+            fun,
+            args,
+            logical_args,
+            name,
+            schema,
+            dfschema,
+            sort_exprs,
+            ordering_req,
+            ignore_nulls,
+            is_distinct,
+            is_reversed,
+        } = self;
+        if args.is_empty() {
+            return internal_err!("args should not be empty");
+        }
+
+        let mut ordering_fields = vec![];
+
+        debug_assert_eq!(sort_exprs.len(), ordering_req.len());
+        if !ordering_req.is_empty() {
+            let ordering_types = ordering_req
+                .iter()
+                .map(|e| e.expr.data_type(&schema))
+                .collect::<Result<Vec<_>>>()?;
+
+            ordering_fields = utils::ordering_fields(&ordering_req, &ordering_types);
+        }
+
+        let input_exprs_types = args
+            .iter()
+            .map(|arg| arg.data_type(&schema))
+            .collect::<Result<Vec<_>>>()?;
+
+        check_arg_count(
+            fun.name(),
+            &input_exprs_types,
+            &fun.signature().type_signature,
+        )?;
 
-    let input_exprs_types = input_phy_exprs
-        .iter()
-        .map(|arg| arg.data_type(&schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    check_arg_count(
-        fun.name(),
-        &input_exprs_types,
-        &fun.signature().type_signature,
-    )?;
-
-    let ordering_types = ordering_req
-        .iter()
-        .map(|e| e.expr.data_type(&schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    let ordering_fields = ordering_fields(ordering_req, &ordering_types);
-
-    Ok(Arc::new(AggregateFunctionExpr {
-        fun: fun.clone(),
-        args: input_phy_exprs.to_vec(),
-        logical_args: input_exprs.to_vec(),
-        data_type: fun.return_type(&input_exprs_types)?,
-        name: name.into(),
-        schema: schema.clone(),
-        dfschema: dfschema.clone(),
-        sort_exprs: sort_exprs.to_vec(),
-        ordering_req: ordering_req.to_vec(),
-        ignore_nulls,
-        ordering_fields,
-        is_distinct,
-        input_type: input_exprs_types[0].clone(),
-        is_reversed,
-    }))
+        let data_type = fun.return_type(&input_exprs_types)?;
+
+        Ok(Arc::new(AggregateFunctionExpr {
+            fun: Arc::unwrap_or_clone(fun),
+            args,
+            logical_args,
+            data_type,
+            name,
+            schema: Arc::unwrap_or_clone(schema),
+            dfschema,
+            sort_exprs,
+            ordering_req,
+            ignore_nulls,
+            ordering_fields,
+            is_distinct,
+            input_type: input_exprs_types[0].clone(),
+            is_reversed,
+        }))
+    }
+
+    pub fn name(mut self, name: impl Into<String>) -> Self {
+        self.name = name.into();
+        self
+    }
+
+    pub fn schema(mut self, schema: SchemaRef) -> Self {
+        self.schema = schema;
+        self
+    }
+
+    pub fn dfschema(mut self, dfschema: DFSchema) -> Self {
+        self.dfschema = dfschema;
+        self
+    }
+
+    pub fn order_by(mut self, order_by: LexOrdering) -> Self {
+        self.ordering_req = order_by;
+        self
+    }
+
+    pub fn reversed(mut self) -> Self {
+        self.is_reversed = true;
+        self
+    }
+
+    pub fn with_reversed(mut self, is_reversed: bool) -> Self {
+        self.is_reversed = is_reversed;
+        self
+    }
+
+    pub fn distinct(mut self) -> Self {
+        self.is_distinct = true;
+        self
+    }
+
+    pub fn with_distinct(mut self, is_distinct: bool) -> Self {
+        self.is_distinct = is_distinct;
+        self
+    }
+
+    pub fn ignore_nulls(mut self) -> Self {
+        self.ignore_nulls = true;
+        self
+    }
+
+    pub fn with_ignore_nulls(mut self, ignore_nulls: bool) -> Self {
+        self.ignore_nulls = ignore_nulls;
+        self
+    }
+
+    /// This method will be deprecated in <https://github.com/apache/datafusion/issues/11359>
+    pub fn sort_exprs(mut self, sort_exprs: Vec<Expr>) -> Self {
+        self.sort_exprs = sort_exprs;
+        self
+    }
+
+    /// This method will be deprecated in <https://github.com/apache/datafusion/issues/11359>
+    pub fn logical_exprs(mut self, logical_args: Vec<Expr>) -> Self {
+        self.logical_args = logical_args;
+        self
+    }
 }
 
 /// An aggregate expression that:
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index e7cd5cb2725be..d1152038eb2a2 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -1211,7 +1211,7 @@ mod tests {
 
     use crate::common::collect;
     use datafusion_physical_expr_common::aggregate::{
-        create_aggregate_expr, create_aggregate_expr_with_dfschema,
+        create_aggregate_expr_with_dfschema, AggregateExprBuilder,
     };
     use datafusion_physical_expr_common::expressions::Literal;
     use futures::{FutureExt, Stream};
@@ -1351,18 +1351,11 @@ mod tests {
             ],
         };
 
-        let aggregates = vec![create_aggregate_expr(
-            &count_udaf(),
-            &[lit(1i8)],
-            &[datafusion_expr::lit(1i8)],
-            &[],
-            &[],
-            &input_schema,
-            "COUNT(1)",
-            false,
-            false,
-            false,
-        )?];
+        let aggregates = vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1i8)])
+            .schema(Arc::clone(&input_schema))
+            .name("COUNT(1)")
+            .logical_exprs(vec![datafusion_expr::lit(1i8)])
+            .build()?];
 
         let task_ctx = if spill {
             new_spill_ctx(4, 1000)
@@ -1501,18 +1494,13 @@ mod tests {
             groups: vec![vec![false]],
         };
 
-        let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![create_aggregate_expr(
-            &avg_udaf(),
-            &[col("b", &input_schema)?],
-            &[datafusion_expr::col("b")],
-            &[],
-            &[],
-            &input_schema,
-            "AVG(b)",
-            false,
-            false,
-            false,
-        )?];
+        let aggregates: Vec<Arc<dyn AggregateExpr>> =
+            vec![
+                AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?])
+                    .schema(Arc::clone(&input_schema))
+                    .name("AVG(b)")
+                    .build()?,
+            ];
 
         let task_ctx = if spill {
             // set to an appropriate value to trigger spill
@@ -1803,21 +1791,11 @@ mod tests {
     }
 
     // Median(a)
-    fn test_median_agg_expr(schema: &Schema) -> Result<Arc<dyn AggregateExpr>> {
-        let args = vec![col("a", schema)?];
-        let fun = median_udaf();
-        datafusion_physical_expr_common::aggregate::create_aggregate_expr(
-            &fun,
-            &args,
-            &[],
-            &[],
-            &[],
-            schema,
-            "MEDIAN(a)",
-            false,
-            false,
-            false,
-        )
+    fn test_median_agg_expr(schema: SchemaRef) -> Result<Arc<dyn AggregateExpr>> {
+        AggregateExprBuilder::new(median_udaf(), vec![col("a", &schema)?])
+            .schema(schema)
+            .name("MEDIAN(a)")
+            .build()
     }
 
     #[tokio::test]
@@ -1840,21 +1818,16 @@ mod tests {
 
         // something that allocates within the aggregator
         let aggregates_v0: Vec<Arc<dyn AggregateExpr>> =
-            vec![test_median_agg_expr(&input_schema)?];
+            vec![test_median_agg_expr(Arc::clone(&input_schema))?];
 
         // use fast-path in `row_hash.rs`.
-        let aggregates_v2: Vec<Arc<dyn AggregateExpr>> = vec![create_aggregate_expr(
-            &avg_udaf(),
-            &[col("b", &input_schema)?],
-            &[datafusion_expr::col("b")],
-            &[],
-            &[],
-            &input_schema,
-            "AVG(b)",
-            false,
-            false,
-            false,
-        )?];
+        let aggregates_v2: Vec<Arc<dyn AggregateExpr>> =
+            vec![
+                AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?])
+                    .schema(Arc::clone(&input_schema))
+                    .name("AVG(b)")
+                    .build()?,
+            ];
 
         for (version, groups, aggregates) in [
             (0, groups_none, aggregates_v0),
@@ -1908,18 +1881,13 @@ mod tests {
 
         let groups = PhysicalGroupBy::default();
 
-        let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![create_aggregate_expr(
-            &avg_udaf(),
-            &[col("a", &schema)?],
-            &[datafusion_expr::col("a")],
-            &[],
-            &[],
-            &schema,
-            "AVG(a)",
-            false,
-            false,
-            false,
-        )?];
+        let aggregates: Vec<Arc<dyn AggregateExpr>> =
+            vec![
+                AggregateExprBuilder::new(avg_udaf(), vec![col("a", &schema)?])
+                    .schema(Arc::clone(&schema))
+                    .name("AVG(a)")
+                    .build()?,
+            ];
 
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
         let refs = blocking_exec.refs();
@@ -1953,18 +1921,13 @@ mod tests {
         let groups =
             PhysicalGroupBy::new_single(vec![(col("a", &schema)?, "a".to_string())]);
 
-        let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![create_aggregate_expr(
-            &avg_udaf(),
-            &[col("b", &schema)?],
-            &[datafusion_expr::col("b")],
-            &[],
-            &[],
-            &schema,
-            "AVG(b)",
-            false,
-            false,
-            false,
-        )?];
+        let aggregates: Vec<Arc<dyn AggregateExpr>> =
+            vec![
+                AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
+                    .schema(Arc::clone(&schema))
+                    .name("AVG(b)")
+                    .build()?,
+            ];
 
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
         let refs = blocking_exec.refs();
@@ -2388,18 +2351,11 @@ mod tests {
             ],
         );
 
-        let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![create_aggregate_expr(
-            count_udaf().as_ref(),
-            &[lit(1)],
-            &[datafusion_expr::lit(1)],
-            &[],
-            &[],
-            schema.as_ref(),
-            "1",
-            false,
-            false,
-            false,
-        )?];
+        let aggregates: Vec<Arc<dyn AggregateExpr>> =
+            vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1)])
+                .schema(Arc::clone(&schema))
+                .name("1")
+                .build()?];
 
         let input_batches = (0..4)
             .map(|_| {
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 959796489c191..ffe558e215839 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -26,16 +26,16 @@ use crate::{
         cume_dist, dense_rank, lag, lead, percent_rank, rank, Literal, NthValue, Ntile,
         PhysicalSortExpr, RowNumber,
     },
-    udaf, ExecutionPlan, ExecutionPlanProperties, InputOrderMode, PhysicalExpr,
+    ExecutionPlan, ExecutionPlanProperties, InputOrderMode, PhysicalExpr,
 };
 
 use arrow::datatypes::Schema;
 use arrow_schema::{DataType, Field, SchemaRef};
-use datafusion_common::{exec_err, Column, DataFusionError, Result, ScalarValue};
-use datafusion_expr::Expr;
+use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
+use datafusion_expr::{col, Expr, SortExpr};
 use datafusion_expr::{
-    BuiltInWindowFunction, PartitionEvaluator, SortExpr, WindowFrame,
-    WindowFunctionDefinition, WindowUDF,
+    BuiltInWindowFunction, PartitionEvaluator, WindowFrame, WindowFunctionDefinition,
+    WindowUDF,
 };
 use datafusion_physical_expr::equivalence::collapse_lex_req;
 use datafusion_physical_expr::{
@@ -44,6 +44,7 @@ use datafusion_physical_expr::{
     AggregateExpr, ConstExpr, EquivalenceProperties, LexOrdering,
     PhysicalSortRequirement,
 };
+use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
 use itertools::Itertools;
 
 mod bounded_window_agg_exec;
@@ -95,7 +96,7 @@ pub fn create_window_expr(
     fun: &WindowFunctionDefinition,
     name: String,
     args: &[Arc<dyn PhysicalExpr>],
-    logical_args: &[Expr],
+    _logical_args: &[Expr],
     partition_by: &[Arc<dyn PhysicalExpr>],
     order_by: &[PhysicalSortExpr],
     window_frame: Arc<WindowFrame>,
@@ -129,7 +130,6 @@ pub fn create_window_expr(
             ))
         }
         WindowFunctionDefinition::AggregateUDF(fun) => {
-            // TODO: Ordering not supported for Window UDFs yet
             // Convert `Vec<PhysicalSortExpr>` into `Vec<Expr::Sort>`
             let sort_exprs = order_by
                 .iter()
@@ -137,28 +137,20 @@ pub fn create_window_expr(
                     let field_name = expr.to_string();
                     let field_name = field_name.split('@').next().unwrap_or(&field_name);
                     Expr::Sort(SortExpr {
-                        expr: Box::new(Expr::Column(Column::new(
-                            None::<String>,
-                            field_name,
-                        ))),
+                        expr: Box::new(col(field_name)),
                         asc: !options.descending,
                         nulls_first: options.nulls_first,
                     })
                 })
                 .collect::<Vec<_>>();
 
-            let aggregate = udaf::create_aggregate_expr(
-                fun.as_ref(),
-                args,
-                logical_args,
-                &sort_exprs,
-                order_by,
-                input_schema,
-                name,
-                ignore_nulls,
-                false,
-                false,
-            )?;
+            let aggregate = AggregateExprBuilder::new(Arc::clone(fun), args.to_vec())
+                .schema(Arc::new(input_schema.clone()))
+                .name(name)
+                .order_by(order_by.to_vec())
+                .sort_exprs(sort_exprs)
+                .with_ignore_nulls(ignore_nulls)
+                .build()?;
             window_expr_from_aggregate_expr(
                 partition_by,
                 order_by,
@@ -166,6 +158,7 @@ pub fn create_window_expr(
                 aggregate,
             )
         }
+        // TODO: Ordering not supported for Window UDFs yet
         WindowFunctionDefinition::WindowUDF(fun) => Arc::new(BuiltInWindowExpr::new(
             create_udwf_window_expr(fun, args, input_schema, name)?,
             partition_by,
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 8c9e5bbd0e959..5c4d41f0eca61 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -18,6 +18,7 @@
 use std::fmt::Debug;
 use std::sync::Arc;
 
+use datafusion::physical_expr_common::aggregate::AggregateExprBuilder;
 use prost::bytes::BufMut;
 use prost::Message;
 
@@ -58,7 +59,7 @@ use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMerge
 use datafusion::physical_plan::union::{InterleaveExec, UnionExec};
 use datafusion::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
 use datafusion::physical_plan::{
-    udaf, AggregateExpr, ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr,
+    AggregateExpr, ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr,
 };
 use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result};
 use datafusion_expr::{AggregateUDF, ScalarUDF};
@@ -501,13 +502,9 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                                                 None => registry.udaf(udaf_name)?
                                             };
 
-                                            // TODO: 'logical_exprs' is not supported for UDAF yet.
-                                            // approx_percentile_cont and approx_percentile_cont_weight are not supported for UDAF from protobuf yet.
-                                            let logical_exprs = &[];
+                                            // TODO: approx_percentile_cont and approx_percentile_cont_weight are not supported for UDAF from protobuf yet.
                                             // TODO: `order by` is not supported for UDAF yet
-                                            let sort_exprs = &[];
-                                            let ordering_req = &[];
-                                            udaf::create_aggregate_expr(agg_udf.as_ref(), &input_phy_expr, logical_exprs, sort_exprs, ordering_req, &physical_schema, name, agg_node.ignore_nulls, agg_node.distinct, false)
+                                            AggregateExprBuilder::new(agg_udf, input_phy_expr).schema(Arc::clone(&physical_schema)).name(name).with_ignore_nulls(agg_node.ignore_nulls).with_distinct(agg_node.distinct).build()
                                         }
                                     }
                                 }).transpose()?.ok_or_else(|| {
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 31ed0837d2f5b..3ddc122e3de2a 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -24,6 +24,7 @@ use std::vec;
 
 use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
+use datafusion::physical_expr_common::aggregate::AggregateExprBuilder;
 use prost::Message;
 
 use datafusion::arrow::array::ArrayRef;
@@ -64,7 +65,6 @@ use datafusion::physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion::physical_plan::projection::ProjectionExec;
 use datafusion::physical_plan::repartition::RepartitionExec;
 use datafusion::physical_plan::sorts::sort::SortExec;
-use datafusion::physical_plan::udaf::create_aggregate_expr;
 use datafusion::physical_plan::union::{InterleaveExec, UnionExec};
 use datafusion::physical_plan::windows::{
     BuiltInWindowExpr, PlainAggregateWindowExpr, WindowAggExec,
@@ -86,7 +86,7 @@ use datafusion_expr::{
 };
 use datafusion_functions_aggregate::average::avg_udaf;
 use datafusion_functions_aggregate::nth_value::nth_value_udaf;
-use datafusion_functions_aggregate::string_agg::StringAgg;
+use datafusion_functions_aggregate::string_agg::string_agg_udaf;
 use datafusion_proto::physical_plan::{
     AsExecutionPlan, DefaultPhysicalExtensionCodec, PhysicalExtensionCodec,
 };
@@ -291,18 +291,13 @@ fn roundtrip_window() -> Result<()> {
     ));
 
     let plain_aggr_window_expr = Arc::new(PlainAggregateWindowExpr::new(
-        create_aggregate_expr(
-            &avg_udaf(),
-            &[cast(col("b", &schema)?, &schema, DataType::Float64)?],
-            &[],
-            &[],
-            &[],
-            &schema,
-            "avg(b)",
-            false,
-            false,
-            false,
-        )?,
+        AggregateExprBuilder::new(
+            avg_udaf(),
+            vec![cast(col("b", &schema)?, &schema, DataType::Float64)?],
+        )
+        .schema(Arc::clone(&schema))
+        .name("avg(b)")
+        .build()?,
         &[],
         &[],
         Arc::new(WindowFrame::new(None)),
@@ -315,18 +310,10 @@ fn roundtrip_window() -> Result<()> {
     );
 
     let args = vec![cast(col("a", &schema)?, &schema, DataType::Float64)?];
-    let sum_expr = create_aggregate_expr(
-        &sum_udaf(),
-        &args,
-        &[],
-        &[],
-        &[],
-        &schema,
-        "SUM(a) RANGE BETWEEN CURRENT ROW AND UNBOUNDED PRECEEDING",
-        false,
-        false,
-        false,
-    )?;
+    let sum_expr = AggregateExprBuilder::new(sum_udaf(), args)
+        .schema(Arc::clone(&schema))
+        .name("SUM(a) RANGE BETWEEN CURRENT ROW AND UNBOUNDED PRECEEDING")
+        .build()?;
 
     let sliding_aggr_window_expr = Arc::new(SlidingAggregateWindowExpr::new(
         sum_expr,
@@ -357,49 +344,28 @@ fn rountrip_aggregate() -> Result<()> {
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
 
+    let avg_expr = AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
+        .schema(Arc::clone(&schema))
+        .name("AVG(b)")
+        .build()?;
+    let nth_expr =
+        AggregateExprBuilder::new(nth_value_udaf(), vec![col("b", &schema)?, lit(1u64)])
+            .schema(Arc::clone(&schema))
+            .name("NTH_VALUE(b, 1)")
+            .build()?;
+    let str_agg_expr =
+        AggregateExprBuilder::new(string_agg_udaf(), vec![col("b", &schema)?, lit(1u64)])
+            .schema(Arc::clone(&schema))
+            .name("NTH_VALUE(b, 1)")
+            .build()?;
+
     let test_cases: Vec<Vec<Arc<dyn AggregateExpr>>> = vec![
         // AVG
-        vec![create_aggregate_expr(
-            &avg_udaf(),
-            &[col("b", &schema)?],
-            &[],
-            &[],
-            &[],
-            &schema,
-            "AVG(b)",
-            false,
-            false,
-            false,
-        )?],
+        vec![avg_expr],
         // NTH_VALUE
-        vec![create_aggregate_expr(
-            &nth_value_udaf(),
-            &[col("b", &schema)?, lit(1u64)],
-            &[],
-            &[],
-            &[],
-            &schema,
-            "NTH_VALUE(b, 1)",
-            false,
-            false,
-            false,
-        )?],
+        vec![nth_expr],
         // STRING_AGG
-        vec![create_aggregate_expr(
-            &AggregateUDF::new_from_impl(StringAgg::new()),
-            &[
-                cast(col("b", &schema)?, &schema, DataType::Utf8)?,
-                lit(ScalarValue::Utf8(Some(",".to_string()))),
-            ],
-            &[],
-            &[],
-            &[],
-            &schema,
-            "STRING_AGG(name, ',')",
-            false,
-            false,
-            false,
-        )?],
+        vec![str_agg_expr],
     ];
 
     for aggregates in test_cases {
@@ -426,18 +392,13 @@ fn rountrip_aggregate_with_limit() -> Result<()> {
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
 
-    let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![create_aggregate_expr(
-        &avg_udaf(),
-        &[col("b", &schema)?],
-        &[],
-        &[],
-        &[],
-        &schema,
-        "AVG(b)",
-        false,
-        false,
-        false,
-    )?];
+    let aggregates: Vec<Arc<dyn AggregateExpr>> =
+        vec![
+            AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
+                .schema(Arc::clone(&schema))
+                .name("AVG(b)")
+                .build()?,
+        ];
 
     let agg = AggregateExec::try_new(
         AggregateMode::Final,
@@ -498,18 +459,13 @@ fn roundtrip_aggregate_udaf() -> Result<()> {
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
 
-    let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![create_aggregate_expr(
-        &udaf,
-        &[col("b", &schema)?],
-        &[],
-        &[],
-        &[],
-        &schema,
-        "example_agg",
-        false,
-        false,
-        false,
-    )?];
+    let aggregates: Vec<Arc<dyn AggregateExpr>> =
+        vec![
+            AggregateExprBuilder::new(Arc::new(udaf), vec![col("b", &schema)?])
+                .schema(Arc::clone(&schema))
+                .name("example_agg")
+                .build()?,
+        ];
 
     roundtrip_test_with_context(
         Arc::new(AggregateExec::try_new(
@@ -994,21 +950,16 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
         DataType::Int64,
     ));
 
-    let udaf = AggregateUDF::from(MyAggregateUDF::new("result".to_string()));
-    let aggr_args: [Arc<dyn PhysicalExpr>; 1] =
-        [Arc::new(Literal::new(ScalarValue::from(42)))];
-    let aggr_expr = create_aggregate_expr(
-        &udaf,
-        &aggr_args,
-        &[],
-        &[],
-        &[],
-        &schema,
-        "aggregate_udf",
-        false,
-        false,
-        false,
-    )?;
+    let udaf = Arc::new(AggregateUDF::from(MyAggregateUDF::new(
+        "result".to_string(),
+    )));
+    let aggr_args: Vec<Arc<dyn PhysicalExpr>> =
+        vec![Arc::new(Literal::new(ScalarValue::from(42)))];
+
+    let aggr_expr = AggregateExprBuilder::new(Arc::clone(&udaf), aggr_args.clone())
+        .schema(Arc::clone(&schema))
+        .name("aggregate_udf")
+        .build()?;
 
     let filter = Arc::new(FilterExec::try_new(
         Arc::new(BinaryExpr::new(
@@ -1030,18 +981,12 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
         vec![col("author", &schema)?],
     )?);
 
-    let aggr_expr = create_aggregate_expr(
-        &udaf,
-        &aggr_args,
-        &[],
-        &[],
-        &[],
-        &schema,
-        "aggregate_udf",
-        true,
-        true,
-        false,
-    )?;
+    let aggr_expr = AggregateExprBuilder::new(udaf, aggr_args.clone())
+        .schema(Arc::clone(&schema))
+        .name("aggregate_udf")
+        .distinct()
+        .ignore_nulls()
+        .build()?;
 
     let aggregate = Arc::new(AggregateExec::try_new(
         AggregateMode::Final,

From 5901df58b21b8b4e36011744e7ddc17bcb6a37b3 Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Wed, 24 Jul 2024 12:21:13 -0700
Subject: [PATCH 137/357] feat: add bounds for unary math scalar functions
 (#11584)

* feat: unary udf function bounds

* feat: add bounds for more types

* feat: remove eprint

* fix: add missing bounds file

* tests: add tests for unary udf bounds

* tests: test f32 and f64

* build: remove unrelated changes

* refactor: better unbounded func name

* tests: fix tests

* refactor: use data_type method

* refactor: add more useful intervals to Interval

* refactor: use typed bounds for (-inf, inf)

* refactor: inf to unbounded

* refactor: add lower/upper pi bounds

* refactor: consts to consts module

* fix: add missing file

* fix: docstring typo

* refactor: remove unused signum bounds
---
 datafusion/common/src/scalar/consts.rs        |  44 +++
 datafusion/common/src/scalar/mod.rs           | 119 +++++++
 datafusion/expr/src/interval_arithmetic.rs    |  32 ++
 datafusion/functions/src/macros.rs            |   7 +-
 datafusion/functions/src/math/bounds.rs       | 108 +++++++
 datafusion/functions/src/math/mod.rs          | 302 ++++++++++++++++--
 datafusion/functions/src/math/monotonicity.rs |  17 +-
 7 files changed, 595 insertions(+), 34 deletions(-)
 create mode 100644 datafusion/common/src/scalar/consts.rs
 create mode 100644 datafusion/functions/src/math/bounds.rs

diff --git a/datafusion/common/src/scalar/consts.rs b/datafusion/common/src/scalar/consts.rs
new file mode 100644
index 0000000000000..efcde651841b0
--- /dev/null
+++ b/datafusion/common/src/scalar/consts.rs
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Constants defined for scalar construction.
+
+// PI ~ 3.1415927 in f32
+#[allow(clippy::approx_constant)]
+pub(super) const PI_UPPER_F32: f32 = 3.141593_f32;
+
+// PI ~ 3.141592653589793 in f64
+pub(super) const PI_UPPER_F64: f64 = 3.141592653589794_f64;
+
+// -PI ~ -3.1415927 in f32
+#[allow(clippy::approx_constant)]
+pub(super) const NEGATIVE_PI_LOWER_F32: f32 = -3.141593_f32;
+
+// -PI ~ -3.141592653589793 in f64
+pub(super) const NEGATIVE_PI_LOWER_F64: f64 = -3.141592653589794_f64;
+
+// PI / 2 ~ 1.5707964 in f32
+pub(super) const FRAC_PI_2_UPPER_F32: f32 = 1.5707965_f32;
+
+// PI / 2 ~ 1.5707963267948966 in f64
+pub(super) const FRAC_PI_2_UPPER_F64: f64 = 1.5707963267948967_f64;
+
+// -PI / 2 ~ -1.5707964 in f32
+pub(super) const NEGATIVE_FRAC_PI_2_LOWER_F32: f32 = -1.5707965_f32;
+
+// -PI / 2 ~ -1.5707963267948966 in f64
+pub(super) const NEGATIVE_FRAC_PI_2_LOWER_F64: f64 = -1.5707963267948967_f64;
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 92ed897e7185c..286df339adcf4 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -17,7 +17,9 @@
 
 //! [`ScalarValue`]: stores single  values
 
+mod consts;
 mod struct_builder;
+
 use std::borrow::Borrow;
 use std::cmp::Ordering;
 use std::collections::{HashSet, VecDeque};
@@ -1007,6 +1009,123 @@ impl ScalarValue {
         }
     }
 
+    /// Returns a [`ScalarValue`] representing PI
+    pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
+            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
+            _ => _internal_err!("PI is not supported for data type: {:?}", datatype),
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing PI's upper bound
+    pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
+        // TODO: replace the constants with next_up/next_down when
+        // they are stabilized: https://doc.rust-lang.org/std/primitive.f64.html#method.next_up
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
+            DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
+            _ => {
+                _internal_err!("PI_UPPER is not supported for data type: {:?}", datatype)
+            }
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing -PI's lower bound
+    pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
+            DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
+            _ => {
+                _internal_err!("-PI_LOWER is not supported for data type: {:?}", datatype)
+            }
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
+    pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
+            DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
+            _ => {
+                _internal_err!(
+                    "PI_UPPER/2 is not supported for data type: {:?}",
+                    datatype
+                )
+            }
+        }
+    }
+
+    // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
+    pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => {
+                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
+            }
+            DataType::Float64 => {
+                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
+            }
+            _ => {
+                _internal_err!(
+                    "-PI/2_LOWER is not supported for data type: {:?}",
+                    datatype
+                )
+            }
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing -PI
+    pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
+            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
+            _ => _internal_err!("-PI is not supported for data type: {:?}", datatype),
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing PI/2
+    pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
+            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
+            _ => _internal_err!("PI/2 is not supported for data type: {:?}", datatype),
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing -PI/2
+    pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
+            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
+            _ => _internal_err!("-PI/2 is not supported for data type: {:?}", datatype),
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing infinity
+    pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
+            DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
+            _ => {
+                _internal_err!("Infinity is not supported for data type: {:?}", datatype)
+            }
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing negative infinity
+    pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
+            DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
+            _ => {
+                _internal_err!(
+                    "Negative Infinity is not supported for data type: {:?}",
+                    datatype
+                )
+            }
+        }
+    }
+
     /// Create a zero value in the given type.
     pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
         Ok(match datatype {
diff --git a/datafusion/expr/src/interval_arithmetic.rs b/datafusion/expr/src/interval_arithmetic.rs
index 18f92334ff14d..d0dd418c78e7e 100644
--- a/datafusion/expr/src/interval_arithmetic.rs
+++ b/datafusion/expr/src/interval_arithmetic.rs
@@ -332,6 +332,38 @@ impl Interval {
         Ok(Self::new(unbounded_endpoint.clone(), unbounded_endpoint))
     }
 
+    /// Creates an interval between -1 to 1.
+    pub fn make_symmetric_unit_interval(data_type: &DataType) -> Result<Self> {
+        Self::try_new(
+            ScalarValue::new_negative_one(data_type)?,
+            ScalarValue::new_one(data_type)?,
+        )
+    }
+
+    /// Create an interval from -π to π.
+    pub fn make_symmetric_pi_interval(data_type: &DataType) -> Result<Self> {
+        Self::try_new(
+            ScalarValue::new_negative_pi_lower(data_type)?,
+            ScalarValue::new_pi_upper(data_type)?,
+        )
+    }
+
+    /// Create an interval from -π/2 to π/2.
+    pub fn make_symmetric_half_pi_interval(data_type: &DataType) -> Result<Self> {
+        Self::try_new(
+            ScalarValue::new_neg_frac_pi_2_lower(data_type)?,
+            ScalarValue::new_frac_pi_2_upper(data_type)?,
+        )
+    }
+
+    /// Create an interval from 0 to infinity.
+    pub fn make_non_negative_infinity_interval(data_type: &DataType) -> Result<Self> {
+        Self::try_new(
+            ScalarValue::new_zero(data_type)?,
+            ScalarValue::try_from(data_type)?,
+        )
+    }
+
     /// Returns a reference to the lower bound.
     pub fn lower(&self) -> &ScalarValue {
         &self.lower
diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
index cae689b3e0cb7..e26c94e1bb799 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -162,7 +162,7 @@ macro_rules! downcast_arg {
 /// $UNARY_FUNC: the unary function to apply to the argument
 /// $OUTPUT_ORDERING: the output ordering calculation method of the function
 macro_rules! make_math_unary_udf {
-    ($UDF:ident, $GNAME:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr) => {
+    ($UDF:ident, $GNAME:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr) => {
         make_udf_function!($NAME::$UDF, $GNAME, $NAME);
 
         mod $NAME {
@@ -172,6 +172,7 @@ macro_rules! make_math_unary_udf {
             use arrow::array::{ArrayRef, Float32Array, Float64Array};
             use arrow::datatypes::DataType;
             use datafusion_common::{exec_err, DataFusionError, Result};
+            use datafusion_expr::interval_arithmetic::Interval;
             use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
             use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
@@ -222,6 +223,10 @@ macro_rules! make_math_unary_udf {
                     $OUTPUT_ORDERING(input)
                 }
 
+                fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
+                    $EVALUATE_BOUNDS(inputs)
+                }
+
                 fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
                     let args = ColumnarValue::values_to_arrays(args)?;
 
diff --git a/datafusion/functions/src/math/bounds.rs b/datafusion/functions/src/math/bounds.rs
new file mode 100644
index 0000000000000..894d2bded5ebe
--- /dev/null
+++ b/datafusion/functions/src/math/bounds.rs
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::ScalarValue;
+use datafusion_expr::interval_arithmetic::Interval;
+
+pub(super) fn unbounded_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    let data_type = input[0].data_type();
+
+    Interval::make_unbounded(&data_type)
+}
+
+pub(super) fn sin_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // sin(x) is bounded by [-1, 1]
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_unit_interval(&data_type)
+}
+
+pub(super) fn asin_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // asin(x) is bounded by [-π/2, π/2]
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_half_pi_interval(&data_type)
+}
+
+pub(super) fn atan_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // atan(x) is bounded by [-π/2, π/2]
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_half_pi_interval(&data_type)
+}
+
+pub(super) fn acos_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // acos(x) is bounded by [0, π]
+    let data_type = input[0].data_type();
+
+    Interval::try_new(
+        ScalarValue::new_zero(&data_type)?,
+        ScalarValue::new_pi_upper(&data_type)?,
+    )
+}
+
+pub(super) fn acosh_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // acosh(x) is bounded by [0, ∞)
+    let data_type = input[0].data_type();
+
+    Interval::make_non_negative_infinity_interval(&data_type)
+}
+
+pub(super) fn cos_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // cos(x) is bounded by [-1, 1]
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_unit_interval(&data_type)
+}
+
+pub(super) fn cosh_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // cosh(x) is bounded by [1, ∞)
+    let data_type = input[0].data_type();
+
+    Interval::try_new(
+        ScalarValue::new_one(&data_type)?,
+        ScalarValue::try_from(&data_type)?,
+    )
+}
+
+pub(super) fn exp_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // exp(x) is bounded by [0, ∞)
+    let data_type = input[0].data_type();
+
+    Interval::make_non_negative_infinity_interval(&data_type)
+}
+
+pub(super) fn radians_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // radians(x) is bounded by (-π, π)
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_pi_interval(&data_type)
+}
+
+pub(super) fn sqrt_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // sqrt(x) is bounded by [0, ∞)
+    let data_type = input[0].data_type();
+
+    Interval::make_non_negative_infinity_interval(&data_type)
+}
+
+pub(super) fn tanh_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // tanh(x) is bounded by (-1, 1)
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_unit_interval(&data_type)
+}
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index 3b32a158b8848..1e41fff289a48 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -22,6 +22,7 @@ use datafusion_expr::ScalarUDF;
 use std::sync::Arc;
 
 pub mod abs;
+pub mod bounds;
 pub mod cot;
 pub mod factorial;
 pub mod gcd;
@@ -40,36 +41,142 @@ pub mod trunc;
 
 // Create UDFs
 make_udf_function!(abs::AbsFunc, ABS, abs);
-make_math_unary_udf!(AcosFunc, ACOS, acos, acos, super::acos_order);
-make_math_unary_udf!(AcoshFunc, ACOSH, acosh, acosh, super::acosh_order);
-make_math_unary_udf!(AsinFunc, ASIN, asin, asin, super::asin_order);
-make_math_unary_udf!(AsinhFunc, ASINH, asinh, asinh, super::asinh_order);
-make_math_unary_udf!(AtanFunc, ATAN, atan, atan, super::atan_order);
-make_math_unary_udf!(AtanhFunc, ATANH, atanh, atanh, super::atanh_order);
+make_math_unary_udf!(
+    AcosFunc,
+    ACOS,
+    acos,
+    acos,
+    super::acos_order,
+    super::bounds::acos_bounds
+);
+make_math_unary_udf!(
+    AcoshFunc,
+    ACOSH,
+    acosh,
+    acosh,
+    super::acosh_order,
+    super::bounds::acosh_bounds
+);
+make_math_unary_udf!(
+    AsinFunc,
+    ASIN,
+    asin,
+    asin,
+    super::asin_order,
+    super::bounds::asin_bounds
+);
+make_math_unary_udf!(
+    AsinhFunc,
+    ASINH,
+    asinh,
+    asinh,
+    super::asinh_order,
+    super::bounds::unbounded_bounds
+);
+make_math_unary_udf!(
+    AtanFunc,
+    ATAN,
+    atan,
+    atan,
+    super::atan_order,
+    super::bounds::atan_bounds
+);
+make_math_unary_udf!(
+    AtanhFunc,
+    ATANH,
+    atanh,
+    atanh,
+    super::atanh_order,
+    super::bounds::unbounded_bounds
+);
 make_math_binary_udf!(Atan2, ATAN2, atan2, atan2, super::atan2_order);
-make_math_unary_udf!(CbrtFunc, CBRT, cbrt, cbrt, super::cbrt_order);
-make_math_unary_udf!(CeilFunc, CEIL, ceil, ceil, super::ceil_order);
-make_math_unary_udf!(CosFunc, COS, cos, cos, super::cos_order);
-make_math_unary_udf!(CoshFunc, COSH, cosh, cosh, super::cosh_order);
+make_math_unary_udf!(
+    CbrtFunc,
+    CBRT,
+    cbrt,
+    cbrt,
+    super::cbrt_order,
+    super::bounds::unbounded_bounds
+);
+make_math_unary_udf!(
+    CeilFunc,
+    CEIL,
+    ceil,
+    ceil,
+    super::ceil_order,
+    super::bounds::unbounded_bounds
+);
+make_math_unary_udf!(
+    CosFunc,
+    COS,
+    cos,
+    cos,
+    super::cos_order,
+    super::bounds::cos_bounds
+);
+make_math_unary_udf!(
+    CoshFunc,
+    COSH,
+    cosh,
+    cosh,
+    super::cosh_order,
+    super::bounds::cosh_bounds
+);
 make_udf_function!(cot::CotFunc, COT, cot);
 make_math_unary_udf!(
     DegreesFunc,
     DEGREES,
     degrees,
     to_degrees,
-    super::degrees_order
+    super::degrees_order,
+    super::bounds::unbounded_bounds
+);
+make_math_unary_udf!(
+    ExpFunc,
+    EXP,
+    exp,
+    exp,
+    super::exp_order,
+    super::bounds::exp_bounds
 );
-make_math_unary_udf!(ExpFunc, EXP, exp, exp, super::exp_order);
 make_udf_function!(factorial::FactorialFunc, FACTORIAL, factorial);
-make_math_unary_udf!(FloorFunc, FLOOR, floor, floor, super::floor_order);
+make_math_unary_udf!(
+    FloorFunc,
+    FLOOR,
+    floor,
+    floor,
+    super::floor_order,
+    super::bounds::unbounded_bounds
+);
 make_udf_function!(log::LogFunc, LOG, log);
 make_udf_function!(gcd::GcdFunc, GCD, gcd);
 make_udf_function!(nans::IsNanFunc, ISNAN, isnan);
 make_udf_function!(iszero::IsZeroFunc, ISZERO, iszero);
 make_udf_function!(lcm::LcmFunc, LCM, lcm);
-make_math_unary_udf!(LnFunc, LN, ln, ln, super::ln_order);
-make_math_unary_udf!(Log2Func, LOG2, log2, log2, super::log2_order);
-make_math_unary_udf!(Log10Func, LOG10, log10, log10, super::log10_order);
+make_math_unary_udf!(
+    LnFunc,
+    LN,
+    ln,
+    ln,
+    super::ln_order,
+    super::bounds::unbounded_bounds
+);
+make_math_unary_udf!(
+    Log2Func,
+    LOG2,
+    log2,
+    log2,
+    super::log2_order,
+    super::bounds::unbounded_bounds
+);
+make_math_unary_udf!(
+    Log10Func,
+    LOG10,
+    log10,
+    log10,
+    super::log10_order,
+    super::bounds::unbounded_bounds
+);
 make_udf_function!(nanvl::NanvlFunc, NANVL, nanvl);
 make_udf_function!(pi::PiFunc, PI, pi);
 make_udf_function!(power::PowerFunc, POWER, power);
@@ -78,16 +185,52 @@ make_math_unary_udf!(
     RADIANS,
     radians,
     to_radians,
-    super::radians_order
+    super::radians_order,
+    super::bounds::radians_bounds
 );
 make_udf_function!(random::RandomFunc, RANDOM, random);
 make_udf_function!(round::RoundFunc, ROUND, round);
 make_udf_function!(signum::SignumFunc, SIGNUM, signum);
-make_math_unary_udf!(SinFunc, SIN, sin, sin, super::sin_order);
-make_math_unary_udf!(SinhFunc, SINH, sinh, sinh, super::sinh_order);
-make_math_unary_udf!(SqrtFunc, SQRT, sqrt, sqrt, super::sqrt_order);
-make_math_unary_udf!(TanFunc, TAN, tan, tan, super::tan_order);
-make_math_unary_udf!(TanhFunc, TANH, tanh, tanh, super::tanh_order);
+make_math_unary_udf!(
+    SinFunc,
+    SIN,
+    sin,
+    sin,
+    super::sin_order,
+    super::bounds::sin_bounds
+);
+make_math_unary_udf!(
+    SinhFunc,
+    SINH,
+    sinh,
+    sinh,
+    super::sinh_order,
+    super::bounds::unbounded_bounds
+);
+make_math_unary_udf!(
+    SqrtFunc,
+    SQRT,
+    sqrt,
+    sqrt,
+    super::sqrt_order,
+    super::bounds::sqrt_bounds
+);
+make_math_unary_udf!(
+    TanFunc,
+    TAN,
+    tan,
+    tan,
+    super::tan_order,
+    super::bounds::unbounded_bounds
+);
+make_math_unary_udf!(
+    TanhFunc,
+    TANH,
+    tanh,
+    tanh,
+    super::tanh_order,
+    super::bounds::tanh_bounds
+);
 make_udf_function!(trunc::TruncFunc, TRUNC, trunc);
 
 pub mod expr_fn {
@@ -175,3 +318,118 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         trunc(),
     ]
 }
+
+#[cfg(test)]
+mod tests {
+    use arrow::datatypes::DataType;
+    use datafusion_common::ScalarValue;
+    use datafusion_expr::interval_arithmetic::Interval;
+
+    fn unbounded_interval(data_type: &DataType) -> Interval {
+        Interval::make_unbounded(data_type).unwrap()
+    }
+
+    fn one_to_inf_interval(data_type: &DataType) -> Interval {
+        Interval::try_new(
+            ScalarValue::new_one(data_type).unwrap(),
+            ScalarValue::try_from(data_type).unwrap(),
+        )
+        .unwrap()
+    }
+
+    fn zero_to_pi_interval(data_type: &DataType) -> Interval {
+        Interval::try_new(
+            ScalarValue::new_zero(data_type).unwrap(),
+            ScalarValue::new_pi_upper(data_type).unwrap(),
+        )
+        .unwrap()
+    }
+
+    fn assert_udf_evaluates_to_bounds(
+        udf: &datafusion_expr::ScalarUDF,
+        interval: Interval,
+        expected: Interval,
+    ) {
+        let input = vec![&interval];
+        let result = udf.evaluate_bounds(&input).unwrap();
+        assert_eq!(
+            result,
+            expected,
+            "Bounds check failed on UDF: {:?}",
+            udf.name()
+        );
+    }
+
+    #[test]
+    fn test_cases() -> crate::Result<()> {
+        let datatypes = [DataType::Float32, DataType::Float64];
+        let cases = datatypes
+            .iter()
+            .flat_map(|data_type| {
+                vec![
+                    (
+                        super::acos(),
+                        unbounded_interval(data_type),
+                        zero_to_pi_interval(data_type),
+                    ),
+                    (
+                        super::acosh(),
+                        unbounded_interval(data_type),
+                        Interval::make_non_negative_infinity_interval(data_type).unwrap(),
+                    ),
+                    (
+                        super::asin(),
+                        unbounded_interval(data_type),
+                        Interval::make_symmetric_half_pi_interval(data_type).unwrap(),
+                    ),
+                    (
+                        super::atan(),
+                        unbounded_interval(data_type),
+                        Interval::make_symmetric_half_pi_interval(data_type).unwrap(),
+                    ),
+                    (
+                        super::cos(),
+                        unbounded_interval(data_type),
+                        Interval::make_symmetric_unit_interval(data_type).unwrap(),
+                    ),
+                    (
+                        super::cosh(),
+                        unbounded_interval(data_type),
+                        one_to_inf_interval(data_type),
+                    ),
+                    (
+                        super::sin(),
+                        unbounded_interval(data_type),
+                        Interval::make_symmetric_unit_interval(data_type).unwrap(),
+                    ),
+                    (
+                        super::exp(),
+                        unbounded_interval(data_type),
+                        Interval::make_non_negative_infinity_interval(data_type).unwrap(),
+                    ),
+                    (
+                        super::sqrt(),
+                        unbounded_interval(data_type),
+                        Interval::make_non_negative_infinity_interval(data_type).unwrap(),
+                    ),
+                    (
+                        super::radians(),
+                        unbounded_interval(data_type),
+                        Interval::make_symmetric_pi_interval(data_type).unwrap(),
+                    ),
+                    (
+                        super::sqrt(),
+                        unbounded_interval(data_type),
+                        Interval::make_non_negative_infinity_interval(data_type).unwrap(),
+                    ),
+                ]
+            })
+            .collect::<Vec<_>>();
+
+        for (udf, interval, expected) in cases {
+            assert_udf_evaluates_to_bounds(&udf, interval, expected);
+        }
+
+        Ok(())
+    }
+}
diff --git a/datafusion/functions/src/math/monotonicity.rs b/datafusion/functions/src/math/monotonicity.rs
index 33c061ee11d0d..52f2ec5171982 100644
--- a/datafusion/functions/src/math/monotonicity.rs
+++ b/datafusion/functions/src/math/monotonicity.rs
@@ -15,24 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, Result, ScalarValue};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 
-fn symmetric_unit_interval(data_type: &DataType) -> Result<Interval> {
-    Interval::try_new(
-        ScalarValue::new_negative_one(data_type)?,
-        ScalarValue::new_one(data_type)?,
-    )
-}
-
 /// Non-increasing on the interval \[−1, 1\], undefined otherwise.
 pub fn acos_order(input: &[ExprProperties]) -> Result<SortProperties> {
     let arg = &input[0];
     let range = &arg.range;
 
-    let valid_domain = symmetric_unit_interval(&range.lower().data_type())?;
+    let valid_domain =
+        Interval::make_symmetric_unit_interval(&range.lower().data_type())?;
 
     if valid_domain.contains(range)? == Interval::CERTAINLY_TRUE {
         Ok(-arg.sort_properties)
@@ -63,7 +56,8 @@ pub fn asin_order(input: &[ExprProperties]) -> Result<SortProperties> {
     let arg = &input[0];
     let range = &arg.range;
 
-    let valid_domain = symmetric_unit_interval(&range.lower().data_type())?;
+    let valid_domain =
+        Interval::make_symmetric_unit_interval(&range.lower().data_type())?;
 
     if valid_domain.contains(range)? == Interval::CERTAINLY_TRUE {
         Ok(arg.sort_properties)
@@ -87,7 +81,8 @@ pub fn atanh_order(input: &[ExprProperties]) -> Result<SortProperties> {
     let arg = &input[0];
     let range = &arg.range;
 
-    let valid_domain = symmetric_unit_interval(&range.lower().data_type())?;
+    let valid_domain =
+        Interval::make_symmetric_unit_interval(&range.lower().data_type())?;
 
     if valid_domain.contains(range)? == Interval::CERTAINLY_TRUE {
         Ok(arg.sort_properties)

From bcf715c892f74d48bdbef54ac7165358be6fb741 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 24 Jul 2024 15:22:02 -0400
Subject: [PATCH 138/357] Minor: avoid copying order by exprs in planner
 (#11634)

---
 datafusion/sql/src/expr/function.rs |  6 +++---
 datafusion/sql/src/expr/order_by.rs | 10 ++++------
 datafusion/sql/src/query.rs         |  2 +-
 datafusion/sql/src/select.rs        |  2 +-
 datafusion/sql/src/statement.rs     |  2 +-
 5 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index 4804752d8389f..0c4b125e76d0a 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -274,7 +274,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 .map(|e| self.sql_expr_to_logical_expr(e, schema, planner_context))
                 .collect::<Result<Vec<_>>>()?;
             let mut order_by = self.order_by_to_sort_expr(
-                &window.order_by,
+                window.order_by,
                 schema,
                 planner_context,
                 // Numeric literals in window function ORDER BY are treated as constants
@@ -350,7 +350,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             // User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function
             if let Some(fm) = self.context_provider.get_aggregate_meta(&name) {
                 let order_by = self.order_by_to_sort_expr(
-                    &order_by,
+                    order_by,
                     schema,
                     planner_context,
                     true,
@@ -375,7 +375,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             // next, aggregate built-ins
             if let Ok(fun) = AggregateFunction::from_str(&name) {
                 let order_by = self.order_by_to_sort_expr(
-                    &order_by,
+                    order_by,
                     schema,
                     planner_context,
                     true,
diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs
index 4dd81517e958b..6010da6fd325a 100644
--- a/datafusion/sql/src/expr/order_by.rs
+++ b/datafusion/sql/src/expr/order_by.rs
@@ -37,7 +37,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     /// If false, interpret numeric literals as constant values.
     pub(crate) fn order_by_to_sort_expr(
         &self,
-        exprs: &[OrderByExpr],
+        exprs: Vec<OrderByExpr>,
         input_schema: &DFSchema,
         planner_context: &mut PlannerContext,
         literal_to_column: bool,
@@ -87,11 +87,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                         input_schema.qualified_field(field_index - 1),
                     ))
                 }
-                e => self.sql_expr_to_logical_expr(
-                    e.clone(),
-                    order_by_schema,
-                    planner_context,
-                )?,
+                e => {
+                    self.sql_expr_to_logical_expr(e, order_by_schema, planner_context)?
+                }
             };
             let asc = asc.unwrap_or(true);
             expr_vec.push(Expr::Sort(Sort::new(
diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs
index cbbff19321d81..00560b5c93089 100644
--- a/datafusion/sql/src/query.rs
+++ b/datafusion/sql/src/query.rs
@@ -59,7 +59,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             other => {
                 let plan = self.set_expr_to_plan(other, planner_context)?;
                 let order_by_rex = self.order_by_to_sort_expr(
-                    &query.order_by,
+                    query.order_by,
                     plan.schema(),
                     planner_context,
                     true,
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index fc46c3a841b53..9b105117af157 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -101,7 +101,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // Order-by expressions prioritize referencing columns from the select list,
         // then from the FROM clause.
         let order_by_rex = self.order_by_to_sort_expr(
-            &order_by,
+            order_by,
             projected_plan.schema().as_ref(),
             planner_context,
             true,
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 8eb4113f80a6c..67107bae0202b 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -967,7 +967,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         for expr in order_exprs {
             // Convert each OrderByExpr to a SortExpr:
             let expr_vec =
-                self.order_by_to_sort_expr(&expr, schema, planner_context, true, None)?;
+                self.order_by_to_sort_expr(expr, schema, planner_context, true, None)?;
             // Verify that columns of all SortExprs exist in the schema:
             for expr in expr_vec.iter() {
                 for column in expr.column_refs().iter() {

From 20b298e9d82e483e28087e595c409a8cc04872f3 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 24 Jul 2024 15:34:31 -0600
Subject: [PATCH 139/357] perf: Optimize IsNotNullExpr (#11586)

* add criterion benchmarks for IsNullExpr and IsNotNullExpr

* Improve IsNotNull performance by avoiding calling is_null then not and just calling is_not_null kernel directly

* fast path if input array is all nulls or no nulls

* revert experimental change

* remove unused import

* simplify PR
---
 datafusion/physical-expr/Cargo.toml           |  4 +
 datafusion/physical-expr/benches/is_null.rs   | 95 +++++++++++++++++++
 .../src/expressions/is_not_null.rs            |  4 +-
 .../physical-expr/src/expressions/is_null.rs  | 10 ++
 4 files changed, 110 insertions(+), 3 deletions(-)
 create mode 100644 datafusion/physical-expr/benches/is_null.rs

diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 067617a697a98..8436b5279bd76 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -81,3 +81,7 @@ name = "in_list"
 [[bench]]
 harness = false
 name = "case_when"
+
+[[bench]]
+harness = false
+name = "is_null"
diff --git a/datafusion/physical-expr/benches/is_null.rs b/datafusion/physical-expr/benches/is_null.rs
new file mode 100644
index 0000000000000..3dad8e9b456a0
--- /dev/null
+++ b/datafusion/physical-expr/benches/is_null.rs
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{Field, Schema};
+use arrow::record_batch::RecordBatch;
+use arrow_array::builder::Int32Builder;
+use arrow_schema::DataType;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_physical_expr::expressions::{IsNotNullExpr, IsNullExpr};
+use datafusion_physical_expr_common::expressions::column::Column;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    // create input data
+    let mut c1 = Int32Builder::new();
+    let mut c2 = Int32Builder::new();
+    let mut c3 = Int32Builder::new();
+    for i in 0..1000 {
+        // c1 is always null
+        c1.append_null();
+        // c2 is never null
+        c2.append_value(i);
+        // c3 is a mix of values and nulls
+        if i % 7 == 0 {
+            c3.append_null();
+        } else {
+            c3.append_value(i);
+        }
+    }
+    let c1 = Arc::new(c1.finish());
+    let c2 = Arc::new(c2.finish());
+    let c3 = Arc::new(c3.finish());
+    let schema = Schema::new(vec![
+        Field::new("c1", DataType::Int32, true),
+        Field::new("c2", DataType::Int32, false),
+        Field::new("c3", DataType::Int32, true),
+    ]);
+    let batch = RecordBatch::try_new(Arc::new(schema), vec![c1, c2, c3]).unwrap();
+
+    c.bench_function("is_null: column is all nulls", |b| {
+        let expr = is_null("c1", 0);
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+
+    c.bench_function("is_null: column is never null", |b| {
+        let expr = is_null("c2", 1);
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+
+    c.bench_function("is_null: column is mix of values and nulls", |b| {
+        let expr = is_null("c3", 2);
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+
+    c.bench_function("is_not_null: column is all nulls", |b| {
+        let expr = is_not_null("c1", 0);
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+
+    c.bench_function("is_not_null: column is never null", |b| {
+        let expr = is_not_null("c2", 1);
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+
+    c.bench_function("is_not_null: column is mix of values and nulls", |b| {
+        let expr = is_not_null("c3", 2);
+        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+    });
+}
+
+fn is_null(name: &str, index: usize) -> Arc<dyn PhysicalExpr> {
+    Arc::new(IsNullExpr::new(Arc::new(Column::new(name, index))))
+}
+
+fn is_not_null(name: &str, index: usize) -> Arc<dyn PhysicalExpr> {
+    Arc::new(IsNotNullExpr::new(Arc::new(Column::new(name, index))))
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/physical-expr/src/expressions/is_not_null.rs b/datafusion/physical-expr/src/expressions/is_not_null.rs
index 9f7438d13e051..58559352d44c0 100644
--- a/datafusion/physical-expr/src/expressions/is_not_null.rs
+++ b/datafusion/physical-expr/src/expressions/is_not_null.rs
@@ -22,7 +22,6 @@ use std::{any::Any, sync::Arc};
 
 use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
-use arrow::compute;
 use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
@@ -74,8 +73,7 @@ impl PhysicalExpr for IsNotNullExpr {
         let arg = self.arg.evaluate(batch)?;
         match arg {
             ColumnarValue::Array(array) => {
-                let is_null = super::is_null::compute_is_null(array)?;
-                let is_not_null = compute::not(&is_null)?;
+                let is_not_null = super::is_null::compute_is_not_null(array)?;
                 Ok(ColumnarValue::Array(Arc::new(is_not_null)))
             }
             ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
diff --git a/datafusion/physical-expr/src/expressions/is_null.rs b/datafusion/physical-expr/src/expressions/is_null.rs
index e2dc941e26bce..3cdb49bcab42f 100644
--- a/datafusion/physical-expr/src/expressions/is_null.rs
+++ b/datafusion/physical-expr/src/expressions/is_null.rs
@@ -117,6 +117,16 @@ pub(crate) fn compute_is_null(array: ArrayRef) -> Result<BooleanArray> {
     }
 }
 
+/// workaround <https://github.com/apache/arrow-rs/issues/6017>,
+/// this can be replaced with a direct call to `arrow::compute::is_not_null` once it's fixed.
+pub(crate) fn compute_is_not_null(array: ArrayRef) -> Result<BooleanArray> {
+    if array.as_any().is::<UnionArray>() {
+        compute::not(&compute_is_null(array)?).map_err(Into::into)
+    } else {
+        compute::is_not_null(array.as_ref()).map_err(Into::into)
+    }
+}
+
 fn dense_union_is_null(
     union_array: &UnionArray,
     offsets: &ScalarBuffer<i32>,

From 76039fadd934a9f0798fee160877a4247c71c352 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Thu, 25 Jul 2024 01:30:55 +0200
Subject: [PATCH 140/357] Unify CI and pre-commit hook settings for clippy
 (#11640)

pre-commit hook runs clippy and CI scripts run clippy too. The commands
are and should be the same. Let's define them once.
---
 pre-commit.sh | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/pre-commit.sh b/pre-commit.sh
index 09cf431a1409f..c81dd9f8e5e83 100755
--- a/pre-commit.sh
+++ b/pre-commit.sh
@@ -57,13 +57,7 @@ fi
 # 1. cargo clippy
 
 echo -e "$(GREEN INFO): cargo clippy ..."
-
-# Cargo clippy always return exit code 0, and `tee` doesn't work.
-# So let's just run cargo clippy.
-cargo clippy --all-targets --workspace --features avro,pyarrow -- -D warnings
-pushd datafusion-cli
-cargo clippy --all-targets --all-features -- -D warnings
-popd
+./ci/scripts/rust_clippy.sh
 echo -e "$(GREEN INFO): cargo clippy done"
 
 # 2. cargo fmt: format with nightly and stable.

From 886e8accdaa85d7b3dca45340b955437786a9b6a Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Wed, 24 Jul 2024 19:54:57 -0400
Subject: [PATCH 141/357] Consistent API to set parameters of aggregate and
 window functions (`AggregateExt` --> `ExprFunctionExt`) (#11550)

* Moving over AggregateExt to ExprFunctionExt and adding in function settings for window functions

* Switch WindowFrame to only need the window function definition and arguments. Other parameters will be set via the ExprFuncBuilder

* Changing null_treatment to take an option, but this is mostly for code cleanliness and not strictly required

* Moving functions in ExprFuncBuilder over to be explicitly implementing ExprFunctionExt trait so we can guarantee a consistent user experience no matter which they call on the Expr and which on the builder

* Apply cargo fmt

* Add deprecated trait AggregateExt so that users get a warning but still builds

* Window helper functions should return Expr

* Update documentation to show window function example

* Add license info

* Update comments that are no longer applicable

* Remove first_value and last_value since these are already implemented in the aggregate functions

* Update  to use WindowFunction::new to set additional parameters for order_by using ExprFunctionExt

* Apply cargo fmt

* Fix up clippy

* fix doc example

* fmt

* doc tweaks

* more doc tweaks

* fix up links

* fix integration test

* fix anothr doc example

---------

Co-authored-by: Tim Saucer <timothy.saucer@maymobility.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion-examples/examples/advanced_udwf.rs |  12 +-
 datafusion-examples/examples/expr_api.rs      |   4 +-
 datafusion-examples/examples/simple_udwf.rs   |  12 +-
 datafusion/core/src/dataframe/mod.rs          |  13 +-
 datafusion/core/tests/dataframe/mod.rs        |  22 +-
 datafusion/core/tests/expr_api/mod.rs         |   2 +-
 datafusion/expr/src/expr.rs                   |  85 ++++--
 datafusion/expr/src/expr_fn.rs                | 279 +++++++++++++++++-
 datafusion/expr/src/lib.rs                    |   3 +-
 datafusion/expr/src/tree_node.rs              |  17 +-
 datafusion/expr/src/udaf.rs                   | 177 +----------
 datafusion/expr/src/udwf.rs                   |  47 ++-
 datafusion/expr/src/utils.rs                  |  89 +++---
 datafusion/expr/src/window_function.rs        |  99 +++++++
 .../functions-aggregate/src/first_last.rs     |   4 +-
 .../src/analyzer/count_wildcard_rule.rs       |  18 +-
 .../optimizer/src/analyzer/type_coercion.rs   |  21 +-
 .../optimizer/src/optimize_projections/mod.rs |  17 +-
 .../src/replace_distinct_aggregate.rs         |   2 +-
 .../simplify_expressions/expr_simplifier.rs   |  24 +-
 .../src/single_distinct_to_groupby.rs         |   2 +-
 .../proto/src/logical_plan/from_proto.rs      |  46 +--
 .../tests/cases/roundtrip_logical_plan.rs     |  77 ++---
 datafusion/sql/src/expr/function.rs           |  25 +-
 datafusion/sql/src/unparser/expr.rs           |   2 +-
 docs/source/user-guide/expressions.md         |   2 +-
 26 files changed, 657 insertions(+), 444 deletions(-)
 create mode 100644 datafusion/expr/src/window_function.rs

diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs
index 11fb6f6ccc480..ec0318a561b97 100644
--- a/datafusion-examples/examples/advanced_udwf.rs
+++ b/datafusion-examples/examples/advanced_udwf.rs
@@ -216,12 +216,12 @@ async fn main() -> Result<()> {
     df.show().await?;
 
     // Now, run the function using the DataFrame API:
-    let window_expr = smooth_it.call(
-        vec![col("speed")],                 // smooth_it(speed)
-        vec![col("car")],                   // PARTITION BY car
-        vec![col("time").sort(true, true)], // ORDER BY time ASC
-        WindowFrame::new(None),
-    );
+    let window_expr = smooth_it
+        .call(vec![col("speed")]) // smooth_it(speed)
+        .partition_by(vec![col("car")]) // PARTITION BY car
+        .order_by(vec![col("time").sort(true, true)]) // ORDER BY time ASC
+        .window_frame(WindowFrame::new(None))
+        .build()?;
     let df = ctx.table("cars").await?.window(vec![window_expr])?;
 
     // print the results
diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs
index a48171c625a81..0eb823302acf6 100644
--- a/datafusion-examples/examples/expr_api.rs
+++ b/datafusion-examples/examples/expr_api.rs
@@ -33,7 +33,7 @@ use datafusion_expr::execution_props::ExecutionProps;
 use datafusion_expr::expr::BinaryExpr;
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::simplify::SimplifyContext;
-use datafusion_expr::{AggregateExt, ColumnarValue, ExprSchemable, Operator};
+use datafusion_expr::{ColumnarValue, ExprFunctionExt, ExprSchemable, Operator};
 
 /// This example demonstrates the DataFusion [`Expr`] API.
 ///
@@ -95,7 +95,7 @@ fn expr_fn_demo() -> Result<()> {
     let agg = first_value.call(vec![col("price")]);
     assert_eq!(agg.to_string(), "first_value(price)");
 
-    // You can use the AggregateExt trait to create more complex aggregates
+    // You can use the ExprFunctionExt trait to create more complex aggregates
     // such as `FIRST_VALUE(price FILTER quantity > 100 ORDER BY ts )
     let agg = first_value
         .call(vec![col("price")])
diff --git a/datafusion-examples/examples/simple_udwf.rs b/datafusion-examples/examples/simple_udwf.rs
index 563f02cee6a6c..22dfbbbf0c3a5 100644
--- a/datafusion-examples/examples/simple_udwf.rs
+++ b/datafusion-examples/examples/simple_udwf.rs
@@ -118,12 +118,12 @@ async fn main() -> Result<()> {
     df.show().await?;
 
     // Now, run the function using the DataFrame API:
-    let window_expr = smooth_it.call(
-        vec![col("speed")],                 // smooth_it(speed)
-        vec![col("car")],                   // PARTITION BY car
-        vec![col("time").sort(true, true)], // ORDER BY time ASC
-        WindowFrame::new(None),
-    );
+    let window_expr = smooth_it
+        .call(vec![col("speed")]) // smooth_it(speed)
+        .partition_by(vec![col("car")]) // PARTITION BY car
+        .order_by(vec![col("time").sort(true, true)]) // ORDER BY time ASC
+        .window_frame(WindowFrame::new(None))
+        .build()?;
     let df = ctx.table("cars").await?.window(vec![window_expr])?;
 
     // print the results
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index fb28b5c1ab470..ea437cc99a336 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1696,8 +1696,8 @@ mod tests {
     use datafusion_common::{Constraint, Constraints, ScalarValue};
     use datafusion_common_runtime::SpawnedTask;
     use datafusion_expr::{
-        cast, create_udf, expr, lit, BuiltInWindowFunction, ScalarFunctionImplementation,
-        Volatility, WindowFrame, WindowFunctionDefinition,
+        cast, create_udf, expr, lit, BuiltInWindowFunction, ExprFunctionExt,
+        ScalarFunctionImplementation, Volatility, WindowFunctionDefinition,
     };
     use datafusion_functions_aggregate::expr_fn::{array_agg, count_distinct};
     use datafusion_physical_expr::expressions::Column;
@@ -1867,11 +1867,10 @@ mod tests {
                 BuiltInWindowFunction::FirstValue,
             ),
             vec![col("aggregate_test_100.c1")],
-            vec![col("aggregate_test_100.c2")],
-            vec![],
-            WindowFrame::new(None),
-            None,
-        ));
+        ))
+        .partition_by(vec![col("aggregate_test_100.c2")])
+        .build()
+        .unwrap();
         let t2 = t.select(vec![col("c1"), first_row])?;
         let plan = t2.plan.clone();
 
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index bc01ada1e04b8..d83a47ceb0699 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -55,8 +55,8 @@ use datafusion_expr::expr::{GroupingSet, Sort};
 use datafusion_expr::var_provider::{VarProvider, VarType};
 use datafusion_expr::{
     cast, col, exists, expr, in_subquery, lit, max, out_ref_col, placeholder,
-    scalar_subquery, when, wildcard, Expr, ExprSchemable, WindowFrame, WindowFrameBound,
-    WindowFrameUnits, WindowFunctionDefinition,
+    scalar_subquery, when, wildcard, Expr, ExprFunctionExt, ExprSchemable, WindowFrame,
+    WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
 };
 use datafusion_functions_aggregate::expr_fn::{array_agg, avg, count, sum};
 
@@ -183,15 +183,15 @@ async fn test_count_wildcard_on_window() -> Result<()> {
         .select(vec![Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateUDF(count_udaf()),
             vec![wildcard()],
-            vec![],
-            vec![Expr::Sort(Sort::new(Box::new(col("a")), false, true))],
-            WindowFrame::new_bounds(
-                WindowFrameUnits::Range,
-                WindowFrameBound::Preceding(ScalarValue::UInt32(Some(6))),
-                WindowFrameBound::Following(ScalarValue::UInt32(Some(2))),
-            ),
-            None,
-        ))])?
+        ))
+        .order_by(vec![Expr::Sort(Sort::new(Box::new(col("a")), false, true))])
+        .window_frame(WindowFrame::new_bounds(
+            WindowFrameUnits::Range,
+            WindowFrameBound::Preceding(ScalarValue::UInt32(Some(6))),
+            WindowFrameBound::Following(ScalarValue::UInt32(Some(2))),
+        ))
+        .build()
+        .unwrap()])?
         .explain(false, false)?
         .collect()
         .await?;
diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs
index 37d06355d2d3f..051d65652633d 100644
--- a/datafusion/core/tests/expr_api/mod.rs
+++ b/datafusion/core/tests/expr_api/mod.rs
@@ -21,7 +21,7 @@ use arrow_array::{ArrayRef, Int64Array, RecordBatch, StringArray, StructArray};
 use arrow_schema::{DataType, Field};
 use datafusion::prelude::*;
 use datafusion_common::{assert_contains, DFSchema, ScalarValue};
-use datafusion_expr::AggregateExt;
+use datafusion_expr::ExprFunctionExt;
 use datafusion_functions::core::expr_ext::FieldAccessor;
 use datafusion_functions_aggregate::first_last::first_value_udaf;
 use datafusion_functions_aggregate::sum::sum_udaf;
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 452c05be34f49..68d5504eea48b 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -28,8 +28,8 @@ use crate::expr_fn::binary_expr;
 use crate::logical_plan::Subquery;
 use crate::utils::expr_to_columns;
 use crate::{
-    aggregate_function, built_in_window_function, udaf, ExprSchemable, Operator,
-    Signature,
+    aggregate_function, built_in_window_function, udaf, BuiltInWindowFunction,
+    ExprSchemable, Operator, Signature, WindowFrame, WindowUDF,
 };
 use crate::{window_frame, Volatility};
 
@@ -60,6 +60,10 @@ use sqlparser::ast::NullTreatment;
 /// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
 /// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
 ///
+/// See also [`ExprFunctionExt`] for creating aggregate and window functions.
+///
+/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
+///
 /// # Schema Access
 ///
 /// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
@@ -283,15 +287,17 @@ pub enum Expr {
     /// This expression is guaranteed to have a fixed type.
     TryCast(TryCast),
     /// A sort expression, that can be used to sort values.
+    ///
+    /// See [Expr::sort] for more details
     Sort(Sort),
     /// Represents the call of a scalar function with a set of arguments.
     ScalarFunction(ScalarFunction),
     /// Calls an aggregate function with arguments, and optional
     /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`.
     ///
-    /// See also [`AggregateExt`] to set these fields.
+    /// See also [`ExprFunctionExt`] to set these fields.
     ///
-    /// [`AggregateExt`]: crate::udaf::AggregateExt
+    /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
     AggregateFunction(AggregateFunction),
     /// Represents the call of a window function with arguments.
     WindowFunction(WindowFunction),
@@ -641,9 +647,9 @@ impl AggregateFunctionDefinition {
 
 /// Aggregate function
 ///
-/// See also  [`AggregateExt`] to set these fields on `Expr`
+/// See also  [`ExprFunctionExt`] to set these fields on `Expr`
 ///
-/// [`AggregateExt`]: crate::udaf::AggregateExt
+/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
 #[derive(Clone, PartialEq, Eq, Hash, Debug)]
 pub struct AggregateFunction {
     /// Name of the function
@@ -769,7 +775,52 @@ impl fmt::Display for WindowFunctionDefinition {
     }
 }
 
+impl From<aggregate_function::AggregateFunction> for WindowFunctionDefinition {
+    fn from(value: aggregate_function::AggregateFunction) -> Self {
+        Self::AggregateFunction(value)
+    }
+}
+
+impl From<BuiltInWindowFunction> for WindowFunctionDefinition {
+    fn from(value: BuiltInWindowFunction) -> Self {
+        Self::BuiltInWindowFunction(value)
+    }
+}
+
+impl From<Arc<crate::AggregateUDF>> for WindowFunctionDefinition {
+    fn from(value: Arc<crate::AggregateUDF>) -> Self {
+        Self::AggregateUDF(value)
+    }
+}
+
+impl From<Arc<WindowUDF>> for WindowFunctionDefinition {
+    fn from(value: Arc<WindowUDF>) -> Self {
+        Self::WindowUDF(value)
+    }
+}
+
 /// Window function
+///
+/// Holds the actual actual function to call [`WindowFunction`] as well as its
+/// arguments (`args`) and the contents of the `OVER` clause:
+///
+/// 1. `PARTITION BY`
+/// 2. `ORDER BY`
+/// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`)
+///
+/// # Example
+/// ```
+/// # use datafusion_expr::{Expr, BuiltInWindowFunction, col, ExprFunctionExt};
+/// # use datafusion_expr::expr::WindowFunction;
+/// // Create FIRST_VALUE(a) OVER (PARTITION BY b ORDER BY c)
+/// let expr = Expr::WindowFunction(
+///     WindowFunction::new(BuiltInWindowFunction::FirstValue, vec![col("a")])
+/// )
+///   .partition_by(vec![col("b")])
+///   .order_by(vec![col("b").sort(true, true)])
+///   .build()
+///   .unwrap();
+/// ```
 #[derive(Clone, PartialEq, Eq, Hash, Debug)]
 pub struct WindowFunction {
     /// Name of the function
@@ -787,22 +838,16 @@ pub struct WindowFunction {
 }
 
 impl WindowFunction {
-    /// Create a new Window expression
-    pub fn new(
-        fun: WindowFunctionDefinition,
-        args: Vec<Expr>,
-        partition_by: Vec<Expr>,
-        order_by: Vec<Expr>,
-        window_frame: window_frame::WindowFrame,
-        null_treatment: Option<NullTreatment>,
-    ) -> Self {
+    /// Create a new Window expression with the specified argument an
+    /// empty `OVER` clause
+    pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self {
         Self {
-            fun,
+            fun: fun.into(),
             args,
-            partition_by,
-            order_by,
-            window_frame,
-            null_treatment,
+            partition_by: Vec::default(),
+            order_by: Vec::default(),
+            window_frame: WindowFrame::new(None),
+            null_treatment: None,
         }
     }
 }
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 9187e83522052..1f51cded22399 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -19,7 +19,7 @@
 
 use crate::expr::{
     AggregateFunction, BinaryExpr, Cast, Exists, GroupingSet, InList, InSubquery,
-    Placeholder, TryCast, Unnest,
+    Placeholder, TryCast, Unnest, WindowFunction,
 };
 use crate::function::{
     AccumulatorArgs, AccumulatorFactoryFunction, PartitionEvaluatorFactory,
@@ -30,12 +30,15 @@ use crate::{
     AggregateUDF, Expr, LogicalPlan, Operator, ScalarFunctionImplementation, ScalarUDF,
     Signature, Volatility,
 };
-use crate::{AggregateUDFImpl, ColumnarValue, ScalarUDFImpl, WindowUDF, WindowUDFImpl};
+use crate::{
+    AggregateUDFImpl, ColumnarValue, ScalarUDFImpl, WindowFrame, WindowUDF, WindowUDFImpl,
+};
 use arrow::compute::kernels::cast_utils::{
     parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
 };
 use arrow::datatypes::{DataType, Field};
-use datafusion_common::{Column, Result, ScalarValue};
+use datafusion_common::{plan_err, Column, Result, ScalarValue};
+use sqlparser::ast::NullTreatment;
 use std::any::Any;
 use std::fmt::Debug;
 use std::ops::Not;
@@ -664,6 +667,276 @@ pub fn interval_month_day_nano_lit(value: &str) -> Expr {
     Expr::Literal(ScalarValue::IntervalMonthDayNano(interval))
 }
 
+/// Extensions for configuring [`Expr::AggregateFunction`] or [`Expr::WindowFunction`]
+///
+/// Adds methods to [`Expr`] that make it easy to set optional options
+/// such as `ORDER BY`, `FILTER` and `DISTINCT`
+///
+/// # Example
+/// ```no_run
+/// # use datafusion_common::Result;
+/// # use datafusion_expr::test::function_stub::count;
+/// # use sqlparser::ast::NullTreatment;
+/// # use datafusion_expr::{ExprFunctionExt, lit, Expr, col};
+/// # use datafusion_expr::window_function::percent_rank;
+/// # // first_value is an aggregate function in another crate
+/// # fn first_value(_arg: Expr) -> Expr {
+/// unimplemented!() }
+/// # fn main() -> Result<()> {
+/// // Create an aggregate count, filtering on column y > 5
+/// let agg = count(col("x")).filter(col("y").gt(lit(5))).build()?;
+///
+/// // Find the first value in an aggregate sorted by column y
+/// // equivalent to:
+/// // `FIRST_VALUE(x ORDER BY y ASC IGNORE NULLS)`
+/// let sort_expr = col("y").sort(true, true);
+/// let agg = first_value(col("x"))
+///     .order_by(vec![sort_expr])
+///     .null_treatment(NullTreatment::IgnoreNulls)
+///     .build()?;
+///
+/// // Create a window expression for percent rank partitioned on column a
+/// // equivalent to:
+/// // `PERCENT_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS LAST IGNORE NULLS)`
+/// let window = percent_rank()
+///     .partition_by(vec![col("a")])
+///     .order_by(vec![col("b").sort(true, true)])
+///     .null_treatment(NullTreatment::IgnoreNulls)
+///     .build()?;
+/// #     Ok(())
+/// # }
+/// ```
+pub trait ExprFunctionExt {
+    /// Add `ORDER BY <order_by>`
+    ///
+    /// Note: `order_by` must be [`Expr::Sort`]
+    fn order_by(self, order_by: Vec<Expr>) -> ExprFuncBuilder;
+    /// Add `FILTER <filter>`
+    fn filter(self, filter: Expr) -> ExprFuncBuilder;
+    /// Add `DISTINCT`
+    fn distinct(self) -> ExprFuncBuilder;
+    /// Add `RESPECT NULLS` or `IGNORE NULLS`
+    fn null_treatment(
+        self,
+        null_treatment: impl Into<Option<NullTreatment>>,
+    ) -> ExprFuncBuilder;
+    /// Add `PARTITION BY`
+    fn partition_by(self, partition_by: Vec<Expr>) -> ExprFuncBuilder;
+    /// Add appropriate window frame conditions
+    fn window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder;
+}
+
+#[derive(Debug, Clone)]
+pub enum ExprFuncKind {
+    Aggregate(AggregateFunction),
+    Window(WindowFunction),
+}
+
+/// Implementation of [`ExprFunctionExt`].
+///
+/// See [`ExprFunctionExt`] for usage and examples
+#[derive(Debug, Clone)]
+pub struct ExprFuncBuilder {
+    fun: Option<ExprFuncKind>,
+    order_by: Option<Vec<Expr>>,
+    filter: Option<Expr>,
+    distinct: bool,
+    null_treatment: Option<NullTreatment>,
+    partition_by: Option<Vec<Expr>>,
+    window_frame: Option<WindowFrame>,
+}
+
+impl ExprFuncBuilder {
+    /// Create a new `ExprFuncBuilder`, see [`ExprFunctionExt`]
+    fn new(fun: Option<ExprFuncKind>) -> Self {
+        Self {
+            fun,
+            order_by: None,
+            filter: None,
+            distinct: false,
+            null_treatment: None,
+            partition_by: None,
+            window_frame: None,
+        }
+    }
+
+    /// Updates and returns the in progress [`Expr::AggregateFunction`] or [`Expr::WindowFunction`]
+    ///
+    /// # Errors:
+    ///
+    /// Returns an error if this builder  [`ExprFunctionExt`] was used with an
+    /// `Expr` variant other than [`Expr::AggregateFunction`] or [`Expr::WindowFunction`]
+    pub fn build(self) -> Result<Expr> {
+        let Self {
+            fun,
+            order_by,
+            filter,
+            distinct,
+            null_treatment,
+            partition_by,
+            window_frame,
+        } = self;
+
+        let Some(fun) = fun else {
+            return plan_err!(
+                "ExprFunctionExt can only be used with Expr::AggregateFunction or Expr::WindowFunction"
+            );
+        };
+
+        if let Some(order_by) = &order_by {
+            for expr in order_by.iter() {
+                if !matches!(expr, Expr::Sort(_)) {
+                    return plan_err!(
+                        "ORDER BY expressions must be Expr::Sort, found {expr:?}"
+                    );
+                }
+            }
+        }
+
+        let fun_expr = match fun {
+            ExprFuncKind::Aggregate(mut udaf) => {
+                udaf.order_by = order_by;
+                udaf.filter = filter.map(Box::new);
+                udaf.distinct = distinct;
+                udaf.null_treatment = null_treatment;
+                Expr::AggregateFunction(udaf)
+            }
+            ExprFuncKind::Window(mut udwf) => {
+                let has_order_by = order_by.as_ref().map(|o| !o.is_empty());
+                udwf.order_by = order_by.unwrap_or_default();
+                udwf.partition_by = partition_by.unwrap_or_default();
+                udwf.window_frame =
+                    window_frame.unwrap_or(WindowFrame::new(has_order_by));
+                udwf.null_treatment = null_treatment;
+                Expr::WindowFunction(udwf)
+            }
+        };
+
+        Ok(fun_expr)
+    }
+}
+
+impl ExprFunctionExt for ExprFuncBuilder {
+    /// Add `ORDER BY <order_by>`
+    ///
+    /// Note: `order_by` must be [`Expr::Sort`]
+    fn order_by(mut self, order_by: Vec<Expr>) -> ExprFuncBuilder {
+        self.order_by = Some(order_by);
+        self
+    }
+
+    /// Add `FILTER <filter>`
+    fn filter(mut self, filter: Expr) -> ExprFuncBuilder {
+        self.filter = Some(filter);
+        self
+    }
+
+    /// Add `DISTINCT`
+    fn distinct(mut self) -> ExprFuncBuilder {
+        self.distinct = true;
+        self
+    }
+
+    /// Add `RESPECT NULLS` or `IGNORE NULLS`
+    fn null_treatment(
+        mut self,
+        null_treatment: impl Into<Option<NullTreatment>>,
+    ) -> ExprFuncBuilder {
+        self.null_treatment = null_treatment.into();
+        self
+    }
+
+    fn partition_by(mut self, partition_by: Vec<Expr>) -> ExprFuncBuilder {
+        self.partition_by = Some(partition_by);
+        self
+    }
+
+    fn window_frame(mut self, window_frame: WindowFrame) -> ExprFuncBuilder {
+        self.window_frame = Some(window_frame);
+        self
+    }
+}
+
+impl ExprFunctionExt for Expr {
+    fn order_by(self, order_by: Vec<Expr>) -> ExprFuncBuilder {
+        let mut builder = match self {
+            Expr::AggregateFunction(udaf) => {
+                ExprFuncBuilder::new(Some(ExprFuncKind::Aggregate(udaf)))
+            }
+            Expr::WindowFunction(udwf) => {
+                ExprFuncBuilder::new(Some(ExprFuncKind::Window(udwf)))
+            }
+            _ => ExprFuncBuilder::new(None),
+        };
+        if builder.fun.is_some() {
+            builder.order_by = Some(order_by);
+        }
+        builder
+    }
+    fn filter(self, filter: Expr) -> ExprFuncBuilder {
+        match self {
+            Expr::AggregateFunction(udaf) => {
+                let mut builder =
+                    ExprFuncBuilder::new(Some(ExprFuncKind::Aggregate(udaf)));
+                builder.filter = Some(filter);
+                builder
+            }
+            _ => ExprFuncBuilder::new(None),
+        }
+    }
+    fn distinct(self) -> ExprFuncBuilder {
+        match self {
+            Expr::AggregateFunction(udaf) => {
+                let mut builder =
+                    ExprFuncBuilder::new(Some(ExprFuncKind::Aggregate(udaf)));
+                builder.distinct = true;
+                builder
+            }
+            _ => ExprFuncBuilder::new(None),
+        }
+    }
+    fn null_treatment(
+        self,
+        null_treatment: impl Into<Option<NullTreatment>>,
+    ) -> ExprFuncBuilder {
+        let mut builder = match self {
+            Expr::AggregateFunction(udaf) => {
+                ExprFuncBuilder::new(Some(ExprFuncKind::Aggregate(udaf)))
+            }
+            Expr::WindowFunction(udwf) => {
+                ExprFuncBuilder::new(Some(ExprFuncKind::Window(udwf)))
+            }
+            _ => ExprFuncBuilder::new(None),
+        };
+        if builder.fun.is_some() {
+            builder.null_treatment = null_treatment.into();
+        }
+        builder
+    }
+
+    fn partition_by(self, partition_by: Vec<Expr>) -> ExprFuncBuilder {
+        match self {
+            Expr::WindowFunction(udwf) => {
+                let mut builder = ExprFuncBuilder::new(Some(ExprFuncKind::Window(udwf)));
+                builder.partition_by = Some(partition_by);
+                builder
+            }
+            _ => ExprFuncBuilder::new(None),
+        }
+    }
+
+    fn window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder {
+        match self {
+            Expr::WindowFunction(udwf) => {
+                let mut builder = ExprFuncBuilder::new(Some(ExprFuncKind::Window(udwf)));
+                builder.window_frame = Some(window_frame);
+                builder
+            }
+            _ => ExprFuncBuilder::new(None),
+        }
+    }
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index e1943c890e7c3..0a5cf4653a228 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -60,6 +60,7 @@ pub mod type_coercion;
 pub mod utils;
 pub mod var_provider;
 pub mod window_frame;
+pub mod window_function;
 pub mod window_state;
 
 pub use accumulator::Accumulator;
@@ -86,7 +87,7 @@ pub use signature::{
 };
 pub use sqlparser;
 pub use table_source::{TableProviderFilterPushDown, TableSource, TableType};
-pub use udaf::{AggregateExt, AggregateUDF, AggregateUDFImpl, ReversedUDAF};
+pub use udaf::{AggregateUDF, AggregateUDFImpl, ReversedUDAF};
 pub use udf::{ScalarUDF, ScalarUDFImpl};
 pub use udwf::{WindowUDF, WindowUDFImpl};
 pub use window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits};
diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs
index f1df8609f903c..a97b9f010f792 100644
--- a/datafusion/expr/src/tree_node.rs
+++ b/datafusion/expr/src/tree_node.rs
@@ -22,7 +22,7 @@ use crate::expr::{
     Cast, GroupingSet, InList, InSubquery, Like, Placeholder, ScalarFunction, Sort,
     TryCast, Unnest, WindowFunction,
 };
-use crate::Expr;
+use crate::{Expr, ExprFunctionExt};
 
 use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion,
@@ -294,14 +294,13 @@ impl TreeNode for Expr {
                 transform_vec(order_by, &mut f)
             )?
             .update_data(|(new_args, new_partition_by, new_order_by)| {
-                Expr::WindowFunction(WindowFunction::new(
-                    fun,
-                    new_args,
-                    new_partition_by,
-                    new_order_by,
-                    window_frame,
-                    null_treatment,
-                ))
+                Expr::WindowFunction(WindowFunction::new(fun, new_args))
+                    .partition_by(new_partition_by)
+                    .order_by(new_order_by)
+                    .window_frame(window_frame)
+                    .null_treatment(null_treatment)
+                    .build()
+                    .unwrap()
             }),
             Expr::AggregateFunction(AggregateFunction {
                 args,
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index 2851ca811e0c0..8867a478f790a 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -24,9 +24,8 @@ use std::sync::Arc;
 use std::vec;
 
 use arrow::datatypes::{DataType, Field};
-use sqlparser::ast::NullTreatment;
 
-use datafusion_common::{exec_err, not_impl_err, plan_err, Result};
+use datafusion_common::{exec_err, not_impl_err, Result};
 
 use crate::expr::AggregateFunction;
 use crate::function::{
@@ -655,177 +654,3 @@ impl AggregateUDFImpl for AggregateUDFLegacyWrapper {
         (self.accumulator)(acc_args)
     }
 }
-
-/// Extensions for configuring [`Expr::AggregateFunction`]
-///
-/// Adds methods to [`Expr`] that make it easy to set optional aggregate options
-/// such as `ORDER BY`, `FILTER` and `DISTINCT`
-///
-/// # Example
-/// ```no_run
-/// # use datafusion_common::Result;
-/// # use datafusion_expr::{AggregateUDF, col, Expr, lit};
-/// # use sqlparser::ast::NullTreatment;
-/// # fn count(arg: Expr) -> Expr { todo!{} }
-/// # fn first_value(arg: Expr) -> Expr { todo!{} }
-/// # fn main() -> Result<()> {
-/// use datafusion_expr::AggregateExt;
-///
-/// // Create COUNT(x FILTER y > 5)
-/// let agg = count(col("x"))
-///    .filter(col("y").gt(lit(5)))
-///    .build()?;
-///  // Create FIRST_VALUE(x ORDER BY y IGNORE NULLS)
-/// let sort_expr = col("y").sort(true, true);
-/// let agg = first_value(col("x"))
-///   .order_by(vec![sort_expr])
-///   .null_treatment(NullTreatment::IgnoreNulls)
-///   .build()?;
-/// # Ok(())
-/// # }
-/// ```
-pub trait AggregateExt {
-    /// Add `ORDER BY <order_by>`
-    ///
-    /// Note: `order_by` must be [`Expr::Sort`]
-    fn order_by(self, order_by: Vec<Expr>) -> AggregateBuilder;
-    /// Add `FILTER <filter>`
-    fn filter(self, filter: Expr) -> AggregateBuilder;
-    /// Add `DISTINCT`
-    fn distinct(self) -> AggregateBuilder;
-    /// Add `RESPECT NULLS` or `IGNORE NULLS`
-    fn null_treatment(self, null_treatment: NullTreatment) -> AggregateBuilder;
-}
-
-/// Implementation of [`AggregateExt`].
-///
-/// See [`AggregateExt`] for usage and examples
-#[derive(Debug, Clone)]
-pub struct AggregateBuilder {
-    udaf: Option<AggregateFunction>,
-    order_by: Option<Vec<Expr>>,
-    filter: Option<Expr>,
-    distinct: bool,
-    null_treatment: Option<NullTreatment>,
-}
-
-impl AggregateBuilder {
-    /// Create a new `AggregateBuilder`, see [`AggregateExt`]
-
-    fn new(udaf: Option<AggregateFunction>) -> Self {
-        Self {
-            udaf,
-            order_by: None,
-            filter: None,
-            distinct: false,
-            null_treatment: None,
-        }
-    }
-
-    /// Updates and returns the in progress [`Expr::AggregateFunction`]
-    ///
-    /// # Errors:
-    ///
-    /// Returns an error of this builder  [`AggregateExt`] was used with an
-    /// `Expr` variant other than [`Expr::AggregateFunction`]
-    pub fn build(self) -> Result<Expr> {
-        let Self {
-            udaf,
-            order_by,
-            filter,
-            distinct,
-            null_treatment,
-        } = self;
-
-        let Some(mut udaf) = udaf else {
-            return plan_err!(
-                "AggregateExt can only be used with Expr::AggregateFunction"
-            );
-        };
-
-        if let Some(order_by) = &order_by {
-            for expr in order_by.iter() {
-                if !matches!(expr, Expr::Sort(_)) {
-                    return plan_err!(
-                        "ORDER BY expressions must be Expr::Sort, found {expr:?}"
-                    );
-                }
-            }
-        }
-
-        udaf.order_by = order_by;
-        udaf.filter = filter.map(Box::new);
-        udaf.distinct = distinct;
-        udaf.null_treatment = null_treatment;
-        Ok(Expr::AggregateFunction(udaf))
-    }
-
-    /// Add `ORDER BY <order_by>`
-    ///
-    /// Note: `order_by` must be [`Expr::Sort`]
-    pub fn order_by(mut self, order_by: Vec<Expr>) -> AggregateBuilder {
-        self.order_by = Some(order_by);
-        self
-    }
-
-    /// Add `FILTER <filter>`
-    pub fn filter(mut self, filter: Expr) -> AggregateBuilder {
-        self.filter = Some(filter);
-        self
-    }
-
-    /// Add `DISTINCT`
-    pub fn distinct(mut self) -> AggregateBuilder {
-        self.distinct = true;
-        self
-    }
-
-    /// Add `RESPECT NULLS` or `IGNORE NULLS`
-    pub fn null_treatment(mut self, null_treatment: NullTreatment) -> AggregateBuilder {
-        self.null_treatment = Some(null_treatment);
-        self
-    }
-}
-
-impl AggregateExt for Expr {
-    fn order_by(self, order_by: Vec<Expr>) -> AggregateBuilder {
-        match self {
-            Expr::AggregateFunction(udaf) => {
-                let mut builder = AggregateBuilder::new(Some(udaf));
-                builder.order_by = Some(order_by);
-                builder
-            }
-            _ => AggregateBuilder::new(None),
-        }
-    }
-    fn filter(self, filter: Expr) -> AggregateBuilder {
-        match self {
-            Expr::AggregateFunction(udaf) => {
-                let mut builder = AggregateBuilder::new(Some(udaf));
-                builder.filter = Some(filter);
-                builder
-            }
-            _ => AggregateBuilder::new(None),
-        }
-    }
-    fn distinct(self) -> AggregateBuilder {
-        match self {
-            Expr::AggregateFunction(udaf) => {
-                let mut builder = AggregateBuilder::new(Some(udaf));
-                builder.distinct = true;
-                builder
-            }
-            _ => AggregateBuilder::new(None),
-        }
-    }
-    fn null_treatment(self, null_treatment: NullTreatment) -> AggregateBuilder {
-        match self {
-            Expr::AggregateFunction(udaf) => {
-                let mut builder = AggregateBuilder::new(Some(udaf));
-                builder.null_treatment = Some(null_treatment);
-                builder
-            }
-            _ => AggregateBuilder::new(None),
-        }
-    }
-}
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 1a6b21e3dd294..5abce013dfb6f 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -28,9 +28,10 @@ use arrow::datatypes::DataType;
 
 use datafusion_common::Result;
 
+use crate::expr::WindowFunction;
 use crate::{
     function::WindowFunctionSimplification, Expr, PartitionEvaluator,
-    PartitionEvaluatorFactory, ReturnTypeFunction, Signature, WindowFrame,
+    PartitionEvaluatorFactory, ReturnTypeFunction, Signature,
 };
 
 /// Logical representation of a user-defined window function (UDWF)
@@ -123,28 +124,19 @@ impl WindowUDF {
         Self::new_from_impl(AliasedWindowUDFImpl::new(Arc::clone(&self.inner), aliases))
     }
 
-    /// creates a [`Expr`] that calls the window function given
-    /// the `partition_by`, `order_by`, and `window_frame` definition
+    /// creates a [`Expr`] that calls the window function with default
+    /// values for `order_by`, `partition_by`, `window_frame`.
     ///
-    /// This utility allows using the UDWF without requiring access to
-    /// the registry, such as with the DataFrame API.
-    pub fn call(
-        &self,
-        args: Vec<Expr>,
-        partition_by: Vec<Expr>,
-        order_by: Vec<Expr>,
-        window_frame: WindowFrame,
-    ) -> Expr {
+    /// See [`ExprFunctionExt`] for details on setting these values.
+    ///
+    /// This utility allows using a user defined window function without
+    /// requiring access to the registry, such as with the DataFrame API.
+    ///
+    /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
+    pub fn call(&self, args: Vec<Expr>) -> Expr {
         let fun = crate::WindowFunctionDefinition::WindowUDF(Arc::new(self.clone()));
 
-        Expr::WindowFunction(crate::expr::WindowFunction {
-            fun,
-            args,
-            partition_by,
-            order_by,
-            window_frame,
-            null_treatment: None,
-        })
+        Expr::WindowFunction(WindowFunction::new(fun, args))
     }
 
     /// Returns this function's name
@@ -210,7 +202,7 @@ where
 /// # use std::any::Any;
 /// # use arrow::datatypes::DataType;
 /// # use datafusion_common::{DataFusionError, plan_err, Result};
-/// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame};
+/// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt};
 /// # use datafusion_expr::{WindowUDFImpl, WindowUDF};
 /// #[derive(Debug, Clone)]
 /// struct SmoothIt {
@@ -244,12 +236,13 @@ where
 /// let smooth_it = WindowUDF::from(SmoothIt::new());
 ///
 /// // Call the function `add_one(col)`
-/// let expr = smooth_it.call(
-///     vec![col("speed")],                 // smooth_it(speed)
-///     vec![col("car")],                   // PARTITION BY car
-///     vec![col("time").sort(true, true)], // ORDER BY time ASC
-///     WindowFrame::new(None),
-/// );
+/// // smooth_it(speed) OVER (PARTITION BY car ORDER BY time ASC)
+/// let expr = smooth_it.call(vec![col("speed")])
+///     .partition_by(vec![col("car")])
+///     .order_by(vec![col("time").sort(true, true)])
+///     .window_frame(WindowFrame::new(None))
+///     .build()
+///     .unwrap();
 /// ```
 pub trait WindowUDFImpl: Debug + Send + Sync {
     /// Returns this object as an [`Any`] trait object
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 889aa0952e51e..2ef1597abfd1d 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -1253,8 +1253,8 @@ mod tests {
     use super::*;
     use crate::{
         col, cube, expr, expr_vec_fmt, grouping_set, lit, rollup,
-        test::function_stub::sum_udaf, AggregateFunction, Cast, WindowFrame,
-        WindowFunctionDefinition,
+        test::function_stub::sum_udaf, AggregateFunction, Cast, ExprFunctionExt,
+        WindowFrame, WindowFunctionDefinition,
     };
 
     #[test]
@@ -1270,34 +1270,18 @@ mod tests {
         let max1 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
             vec![col("name")],
-            vec![],
-            vec![],
-            WindowFrame::new(None),
-            None,
         ));
         let max2 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
             vec![col("name")],
-            vec![],
-            vec![],
-            WindowFrame::new(None),
-            None,
         ));
         let min3 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Min),
             vec![col("name")],
-            vec![],
-            vec![],
-            WindowFrame::new(None),
-            None,
         ));
         let sum4 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateUDF(sum_udaf()),
             vec![col("age")],
-            vec![],
-            vec![],
-            WindowFrame::new(None),
-            None,
         ));
         let exprs = &[max1.clone(), max2.clone(), min3.clone(), sum4.clone()];
         let result = group_window_expr_by_sort_keys(exprs.to_vec())?;
@@ -1317,35 +1301,32 @@ mod tests {
         let max1 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
             vec![col("name")],
-            vec![],
-            vec![age_asc.clone(), name_desc.clone()],
-            WindowFrame::new(Some(false)),
-            None,
-        ));
+        ))
+        .order_by(vec![age_asc.clone(), name_desc.clone()])
+        .build()
+        .unwrap();
         let max2 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
             vec![col("name")],
-            vec![],
-            vec![],
-            WindowFrame::new(None),
-            None,
         ));
         let min3 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Min),
             vec![col("name")],
-            vec![],
-            vec![age_asc.clone(), name_desc.clone()],
-            WindowFrame::new(Some(false)),
-            None,
-        ));
+        ))
+        .order_by(vec![age_asc.clone(), name_desc.clone()])
+        .build()
+        .unwrap();
         let sum4 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateUDF(sum_udaf()),
             vec![col("age")],
-            vec![],
-            vec![name_desc.clone(), age_asc.clone(), created_at_desc.clone()],
-            WindowFrame::new(Some(false)),
-            None,
-        ));
+        ))
+        .order_by(vec![
+            name_desc.clone(),
+            age_asc.clone(),
+            created_at_desc.clone(),
+        ])
+        .build()
+        .unwrap();
         // FIXME use as_ref
         let exprs = &[max1.clone(), max2.clone(), min3.clone(), sum4.clone()];
         let result = group_window_expr_by_sort_keys(exprs.to_vec())?;
@@ -1373,26 +1354,26 @@ mod tests {
             Expr::WindowFunction(expr::WindowFunction::new(
                 WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
                 vec![col("name")],
-                vec![],
-                vec![
-                    Expr::Sort(expr::Sort::new(Box::new(col("age")), true, true)),
-                    Expr::Sort(expr::Sort::new(Box::new(col("name")), false, true)),
-                ],
-                WindowFrame::new(Some(false)),
-                None,
-            )),
+            ))
+            .order_by(vec![
+                Expr::Sort(expr::Sort::new(Box::new(col("age")), true, true)),
+                Expr::Sort(expr::Sort::new(Box::new(col("name")), false, true)),
+            ])
+            .window_frame(WindowFrame::new(Some(false)))
+            .build()
+            .unwrap(),
             Expr::WindowFunction(expr::WindowFunction::new(
                 WindowFunctionDefinition::AggregateUDF(sum_udaf()),
                 vec![col("age")],
-                vec![],
-                vec![
-                    Expr::Sort(expr::Sort::new(Box::new(col("name")), false, true)),
-                    Expr::Sort(expr::Sort::new(Box::new(col("age")), true, true)),
-                    Expr::Sort(expr::Sort::new(Box::new(col("created_at")), false, true)),
-                ],
-                WindowFrame::new(Some(false)),
-                None,
-            )),
+            ))
+            .order_by(vec![
+                Expr::Sort(expr::Sort::new(Box::new(col("name")), false, true)),
+                Expr::Sort(expr::Sort::new(Box::new(col("age")), true, true)),
+                Expr::Sort(expr::Sort::new(Box::new(col("created_at")), false, true)),
+            ])
+            .window_frame(WindowFrame::new(Some(false)))
+            .build()
+            .unwrap(),
         ];
         let expected = vec![
             Expr::Sort(expr::Sort::new(Box::new(col("age")), true, true)),
diff --git a/datafusion/expr/src/window_function.rs b/datafusion/expr/src/window_function.rs
new file mode 100644
index 0000000000000..5e81464d39c25
--- /dev/null
+++ b/datafusion/expr/src/window_function.rs
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::ScalarValue;
+
+use crate::{expr::WindowFunction, BuiltInWindowFunction, Expr, Literal};
+
+/// Create an expression to represent the `row_number` window function
+pub fn row_number() -> Expr {
+    Expr::WindowFunction(WindowFunction::new(
+        BuiltInWindowFunction::RowNumber,
+        vec![],
+    ))
+}
+
+/// Create an expression to represent the `rank` window function
+pub fn rank() -> Expr {
+    Expr::WindowFunction(WindowFunction::new(BuiltInWindowFunction::Rank, vec![]))
+}
+
+/// Create an expression to represent the `dense_rank` window function
+pub fn dense_rank() -> Expr {
+    Expr::WindowFunction(WindowFunction::new(
+        BuiltInWindowFunction::DenseRank,
+        vec![],
+    ))
+}
+
+/// Create an expression to represent the `percent_rank` window function
+pub fn percent_rank() -> Expr {
+    Expr::WindowFunction(WindowFunction::new(
+        BuiltInWindowFunction::PercentRank,
+        vec![],
+    ))
+}
+
+/// Create an expression to represent the `cume_dist` window function
+pub fn cume_dist() -> Expr {
+    Expr::WindowFunction(WindowFunction::new(BuiltInWindowFunction::CumeDist, vec![]))
+}
+
+/// Create an expression to represent the `ntile` window function
+pub fn ntile(arg: Expr) -> Expr {
+    Expr::WindowFunction(WindowFunction::new(BuiltInWindowFunction::Ntile, vec![arg]))
+}
+
+/// Create an expression to represent the `lag` window function
+pub fn lag(
+    arg: Expr,
+    shift_offset: Option<i64>,
+    default_value: Option<ScalarValue>,
+) -> Expr {
+    let shift_offset_lit = shift_offset
+        .map(|v| v.lit())
+        .unwrap_or(ScalarValue::Null.lit());
+    let default_lit = default_value.unwrap_or(ScalarValue::Null).lit();
+    Expr::WindowFunction(WindowFunction::new(
+        BuiltInWindowFunction::Lag,
+        vec![arg, shift_offset_lit, default_lit],
+    ))
+}
+
+/// Create an expression to represent the `lead` window function
+pub fn lead(
+    arg: Expr,
+    shift_offset: Option<i64>,
+    default_value: Option<ScalarValue>,
+) -> Expr {
+    let shift_offset_lit = shift_offset
+        .map(|v| v.lit())
+        .unwrap_or(ScalarValue::Null.lit());
+    let default_lit = default_value.unwrap_or(ScalarValue::Null).lit();
+    Expr::WindowFunction(WindowFunction::new(
+        BuiltInWindowFunction::Lead,
+        vec![arg, shift_offset_lit, default_lit],
+    ))
+}
+
+/// Create an expression to represent the `nth_value` window function
+pub fn nth_value(arg: Expr, n: i64) -> Expr {
+    Expr::WindowFunction(WindowFunction::new(
+        BuiltInWindowFunction::NthValue,
+        vec![arg, n.lit()],
+    ))
+}
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
index ba11f7e91e070..8969937d377c4 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -31,8 +31,8 @@ use datafusion_common::{
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
 use datafusion_expr::{
-    Accumulator, AggregateExt, AggregateUDFImpl, ArrayFunctionSignature, Expr, Signature,
-    TypeSignature, Volatility,
+    Accumulator, AggregateUDFImpl, ArrayFunctionSignature, Expr, ExprFunctionExt,
+    Signature, TypeSignature, Volatility,
 };
 use datafusion_physical_expr_common::aggregate::utils::get_sort_options;
 use datafusion_physical_expr_common::sort_expr::{
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index fa8aeb86ed31e..338268e299da7 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -101,6 +101,7 @@ mod tests {
     use arrow::datatypes::DataType;
     use datafusion_common::ScalarValue;
     use datafusion_expr::expr::Sort;
+    use datafusion_expr::ExprFunctionExt;
     use datafusion_expr::{
         col, exists, expr, in_subquery, logical_plan::LogicalPlanBuilder, max,
         out_ref_col, scalar_subquery, wildcard, WindowFrame, WindowFrameBound,
@@ -223,15 +224,14 @@ mod tests {
             .window(vec![Expr::WindowFunction(expr::WindowFunction::new(
                 WindowFunctionDefinition::AggregateUDF(count_udaf()),
                 vec![wildcard()],
-                vec![],
-                vec![Expr::Sort(Sort::new(Box::new(col("a")), false, true))],
-                WindowFrame::new_bounds(
-                    WindowFrameUnits::Range,
-                    WindowFrameBound::Preceding(ScalarValue::UInt32(Some(6))),
-                    WindowFrameBound::Following(ScalarValue::UInt32(Some(2))),
-                ),
-                None,
-            ))])?
+            ))
+            .order_by(vec![Expr::Sort(Sort::new(Box::new(col("a")), false, true))])
+            .window_frame(WindowFrame::new_bounds(
+                WindowFrameUnits::Range,
+                WindowFrameBound::Preceding(ScalarValue::UInt32(Some(6))),
+                WindowFrameBound::Following(ScalarValue::UInt32(Some(2))),
+            ))
+            .build()?])?
             .project(vec![count(wildcard())])?
             .build()?;
 
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 50fb1b8193ceb..75dbb4d1adcd3 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -47,8 +47,9 @@ use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_large_utf8};
 use datafusion_expr::utils::merge_schema;
 use datafusion_expr::{
     is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not,
-    type_coercion, AggregateFunction, AggregateUDF, Expr, ExprSchemable, LogicalPlan,
-    Operator, ScalarUDF, Signature, WindowFrame, WindowFrameBound, WindowFrameUnits,
+    type_coercion, AggregateFunction, AggregateUDF, Expr, ExprFunctionExt, ExprSchemable,
+    LogicalPlan, Operator, ScalarUDF, Signature, WindowFrame, WindowFrameBound,
+    WindowFrameUnits,
 };
 
 use crate::analyzer::AnalyzerRule;
@@ -466,14 +467,14 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> {
                     _ => args,
                 };
 
-                Ok(Transformed::yes(Expr::WindowFunction(WindowFunction::new(
-                    fun,
-                    args,
-                    partition_by,
-                    order_by,
-                    window_frame,
-                    null_treatment,
-                ))))
+                Ok(Transformed::yes(
+                    Expr::WindowFunction(WindowFunction::new(fun, args))
+                        .partition_by(partition_by)
+                        .order_by(order_by)
+                        .window_frame(window_frame)
+                        .null_treatment(null_treatment)
+                        .build()?,
+                ))
             }
             Expr::Alias(_)
             | Expr::Column(_)
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 58c1ae297b02e..16abf93f38073 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -806,7 +806,7 @@ mod tests {
     use datafusion_common::{
         Column, DFSchema, DFSchemaRef, JoinType, Result, TableReference,
     };
-    use datafusion_expr::AggregateExt;
+    use datafusion_expr::ExprFunctionExt;
     use datafusion_expr::{
         binary_expr, build_join_schema,
         builder::table_scan_with_filters,
@@ -815,7 +815,7 @@ mod tests {
         lit,
         logical_plan::{builder::LogicalPlanBuilder, table_scan},
         max, min, not, try_cast, when, AggregateFunction, BinaryExpr, Expr, Extension,
-        Like, LogicalPlan, Operator, Projection, UserDefinedLogicalNodeCore, WindowFrame,
+        Like, LogicalPlan, Operator, Projection, UserDefinedLogicalNodeCore,
         WindowFunctionDefinition,
     };
 
@@ -1919,19 +1919,14 @@ mod tests {
         let max1 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
             vec![col("test.a")],
-            vec![col("test.b")],
-            vec![],
-            WindowFrame::new(None),
-            None,
-        ));
+        ))
+        .partition_by(vec![col("test.b")])
+        .build()
+        .unwrap();
 
         let max2 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
             vec![col("test.b")],
-            vec![],
-            vec![],
-            WindowFrame::new(None),
-            None,
         ));
         let col1 = col(max1.display_name()?);
         let col2 = col(max2.display_name()?);
diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs
index fcd33be618f7c..430517121f2ac 100644
--- a/datafusion/optimizer/src/replace_distinct_aggregate.rs
+++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -23,7 +23,7 @@ use datafusion_common::tree_node::Transformed;
 use datafusion_common::{Column, Result};
 use datafusion_expr::expr_rewriter::normalize_cols;
 use datafusion_expr::utils::expand_wildcard;
-use datafusion_expr::{col, AggregateExt, LogicalPlanBuilder};
+use datafusion_expr::{col, ExprFunctionExt, LogicalPlanBuilder};
 use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
 
 /// Optimizer that replaces logical [[Distinct]] with a logical [[Aggregate]]
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 56556f387d1ba..38dfbb3ed5514 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -3855,15 +3855,9 @@ mod tests {
         let udwf = WindowFunctionDefinition::WindowUDF(
             WindowUDF::new_from_impl(SimplifyMockUdwf::new_with_simplify()).into(),
         );
-        let window_function_expr =
-            Expr::WindowFunction(datafusion_expr::expr::WindowFunction::new(
-                udwf,
-                vec![],
-                vec![],
-                vec![],
-                WindowFrame::new(None),
-                None,
-            ));
+        let window_function_expr = Expr::WindowFunction(
+            datafusion_expr::expr::WindowFunction::new(udwf, vec![]),
+        );
 
         let expected = col("result_column");
         assert_eq!(simplify(window_function_expr), expected);
@@ -3871,15 +3865,9 @@ mod tests {
         let udwf = WindowFunctionDefinition::WindowUDF(
             WindowUDF::new_from_impl(SimplifyMockUdwf::new_without_simplify()).into(),
         );
-        let window_function_expr =
-            Expr::WindowFunction(datafusion_expr::expr::WindowFunction::new(
-                udwf,
-                vec![],
-                vec![],
-                vec![],
-                WindowFrame::new(None),
-                None,
-            ));
+        let window_function_expr = Expr::WindowFunction(
+            datafusion_expr::expr::WindowFunction::new(udwf, vec![]),
+        );
 
         let expected = window_function_expr.clone();
         assert_eq!(simplify(window_function_expr), expected);
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index f2b4abdd6cbd5..d776e6598cbe7 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -354,7 +354,7 @@ mod tests {
     use super::*;
     use crate::test::*;
     use datafusion_expr::expr::{self, GroupingSet};
-    use datafusion_expr::AggregateExt;
+    use datafusion_expr::ExprFunctionExt;
     use datafusion_expr::{
         lit, logical_plan::builder::LogicalPlanBuilder, max, min, AggregateFunction,
     };
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index aea8e454a31c4..7b717add3311a 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -25,6 +25,7 @@ use datafusion_common::{
 use datafusion_expr::expr::Unnest;
 use datafusion_expr::expr::{Alias, Placeholder};
 use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
+use datafusion_expr::ExprFunctionExt;
 use datafusion_expr::{
     expr::{self, InList, Sort, WindowFunction},
     logical_plan::{PlanType, StringifiedPlan},
@@ -299,7 +300,6 @@ pub fn parse_expr(
                     )
                 })?;
             // TODO: support proto for null treatment
-            let null_treatment = None;
             regularize_window_order_by(&window_frame, &mut order_by)?;
 
             match window_function {
@@ -314,11 +314,12 @@ pub fn parse_expr(
                             "expr",
                             codec,
                         )?],
-                        partition_by,
-                        order_by,
-                        window_frame,
-                        None,
-                    )))
+                    ))
+                    .partition_by(partition_by)
+                    .order_by(order_by)
+                    .window_frame(window_frame)
+                    .build()
+                    .unwrap())
                 }
                 window_expr_node::WindowFunction::BuiltInFunction(i) => {
                     let built_in_function = protobuf::BuiltInWindowFunction::try_from(*i)
@@ -335,11 +336,12 @@ pub fn parse_expr(
                             built_in_function,
                         ),
                         args,
-                        partition_by,
-                        order_by,
-                        window_frame,
-                        null_treatment,
-                    )))
+                    ))
+                    .partition_by(partition_by)
+                    .order_by(order_by)
+                    .window_frame(window_frame)
+                    .build()
+                    .unwrap())
                 }
                 window_expr_node::WindowFunction::Udaf(udaf_name) => {
                     let udaf_function = match &expr.fun_definition {
@@ -354,11 +356,12 @@ pub fn parse_expr(
                     Ok(Expr::WindowFunction(WindowFunction::new(
                         expr::WindowFunctionDefinition::AggregateUDF(udaf_function),
                         args,
-                        partition_by,
-                        order_by,
-                        window_frame,
-                        None,
-                    )))
+                    ))
+                    .partition_by(partition_by)
+                    .order_by(order_by)
+                    .window_frame(window_frame)
+                    .build()
+                    .unwrap())
                 }
                 window_expr_node::WindowFunction::Udwf(udwf_name) => {
                     let udwf_function = match &expr.fun_definition {
@@ -373,11 +376,12 @@ pub fn parse_expr(
                     Ok(Expr::WindowFunction(WindowFunction::new(
                         expr::WindowFunctionDefinition::WindowUDF(udwf_function),
                         args,
-                        partition_by,
-                        order_by,
-                        window_frame,
-                        None,
-                    )))
+                    ))
+                    .partition_by(partition_by)
+                    .order_by(order_by)
+                    .window_frame(window_frame)
+                    .build()
+                    .unwrap())
                 }
             }
         }
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 25223c3731bef..7a4de4f61a380 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -60,7 +60,7 @@ use datafusion_expr::expr::{
 };
 use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore};
 use datafusion_expr::{
-    Accumulator, AggregateExt, AggregateFunction, AggregateUDF, ColumnarValue,
+    Accumulator, AggregateFunction, AggregateUDF, ColumnarValue, ExprFunctionExt,
     ExprSchemable, Literal, LogicalPlan, Operator, PartitionEvaluator, ScalarUDF,
     Signature, TryCast, Volatility, WindowFrame, WindowFrameBound, WindowFrameUnits,
     WindowFunctionDefinition, WindowUDF, WindowUDFImpl,
@@ -2073,11 +2073,12 @@ fn roundtrip_window() {
             datafusion_expr::BuiltInWindowFunction::Rank,
         ),
         vec![],
-        vec![col("col1")],
-        vec![col("col2")],
-        WindowFrame::new(Some(false)),
-        None,
-    ));
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, false)])
+    .window_frame(WindowFrame::new(Some(false)))
+    .build()
+    .unwrap();
 
     // 2. with default window_frame
     let test_expr2 = Expr::WindowFunction(expr::WindowFunction::new(
@@ -2085,11 +2086,12 @@ fn roundtrip_window() {
             datafusion_expr::BuiltInWindowFunction::Rank,
         ),
         vec![],
-        vec![col("col1")],
-        vec![col("col2")],
-        WindowFrame::new(Some(false)),
-        None,
-    ));
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(false, true)])
+    .window_frame(WindowFrame::new(Some(false)))
+    .build()
+    .unwrap();
 
     // 3. with window_frame with row numbers
     let range_number_frame = WindowFrame::new_bounds(
@@ -2103,11 +2105,12 @@ fn roundtrip_window() {
             datafusion_expr::BuiltInWindowFunction::Rank,
         ),
         vec![],
-        vec![col("col1")],
-        vec![col("col2")],
-        range_number_frame,
-        None,
-    ));
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(false, false)])
+    .window_frame(range_number_frame)
+    .build()
+    .unwrap();
 
     // 4. test with AggregateFunction
     let row_number_frame = WindowFrame::new_bounds(
@@ -2119,11 +2122,12 @@ fn roundtrip_window() {
     let test_expr4 = Expr::WindowFunction(expr::WindowFunction::new(
         WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
         vec![col("col1")],
-        vec![col("col1")],
-        vec![col("col2")],
-        row_number_frame.clone(),
-        None,
-    ));
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, true)])
+    .window_frame(row_number_frame.clone())
+    .build()
+    .unwrap();
 
     // 5. test with AggregateUDF
     #[derive(Debug)]
@@ -2168,11 +2172,12 @@ fn roundtrip_window() {
     let test_expr5 = Expr::WindowFunction(expr::WindowFunction::new(
         WindowFunctionDefinition::AggregateUDF(Arc::new(dummy_agg.clone())),
         vec![col("col1")],
-        vec![col("col1")],
-        vec![col("col2")],
-        row_number_frame.clone(),
-        None,
-    ));
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, true)])
+    .window_frame(row_number_frame.clone())
+    .build()
+    .unwrap();
     ctx.register_udaf(dummy_agg);
 
     // 6. test with WindowUDF
@@ -2244,20 +2249,20 @@ fn roundtrip_window() {
     let test_expr6 = Expr::WindowFunction(expr::WindowFunction::new(
         WindowFunctionDefinition::WindowUDF(Arc::new(dummy_window_udf.clone())),
         vec![col("col1")],
-        vec![col("col1")],
-        vec![col("col2")],
-        row_number_frame.clone(),
-        None,
-    ));
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, true)])
+    .window_frame(row_number_frame.clone())
+    .build()
+    .unwrap();
 
     let text_expr7 = Expr::WindowFunction(expr::WindowFunction::new(
         WindowFunctionDefinition::AggregateUDF(avg_udaf()),
         vec![col("col1")],
-        vec![],
-        vec![],
-        row_number_frame.clone(),
-        None,
-    ));
+    ))
+    .window_frame(row_number_frame.clone())
+    .build()
+    .unwrap();
 
     ctx.register_udwf(dummy_window_udf);
 
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index 0c4b125e76d0a..fd759c1613814 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -24,7 +24,8 @@ use datafusion_common::{
 use datafusion_expr::planner::PlannerResult;
 use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
 use datafusion_expr::{
-    expr, AggregateFunction, Expr, ExprSchemable, WindowFrame, WindowFunctionDefinition,
+    expr, AggregateFunction, Expr, ExprFunctionExt, ExprSchemable, WindowFrame,
+    WindowFunctionDefinition,
 };
 use datafusion_expr::{
     expr::{ScalarFunction, Unnest},
@@ -329,20 +330,24 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                         Expr::WindowFunction(expr::WindowFunction::new(
                             WindowFunctionDefinition::AggregateFunction(aggregate_fun),
                             args,
-                            partition_by,
-                            order_by,
-                            window_frame,
-                            null_treatment,
                         ))
+                        .partition_by(partition_by)
+                        .order_by(order_by)
+                        .window_frame(window_frame)
+                        .null_treatment(null_treatment)
+                        .build()
+                        .unwrap()
                     }
                     _ => Expr::WindowFunction(expr::WindowFunction::new(
                         fun,
                         self.function_args_to_expr(args, schema, planner_context)?,
-                        partition_by,
-                        order_by,
-                        window_frame,
-                        null_treatment,
-                    )),
+                    ))
+                    .partition_by(partition_by)
+                    .order_by(order_by)
+                    .window_frame(window_frame)
+                    .null_treatment(null_treatment)
+                    .build()
+                    .unwrap(),
                 };
                 return Ok(expr);
             }
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index f4ea44f37d788..3f7a85da276be 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -1507,7 +1507,7 @@ mod tests {
         table_scan, try_cast, when, wildcard, ColumnarValue, ScalarUDF, ScalarUDFImpl,
         Signature, Volatility, WindowFrame, WindowFunctionDefinition,
     };
-    use datafusion_expr::{interval_month_day_nano_lit, AggregateExt};
+    use datafusion_expr::{interval_month_day_nano_lit, ExprFunctionExt};
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_functions_aggregate::expr_fn::sum;
 
diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md
index 6e693a0e70879..60036e440ffb7 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -308,7 +308,7 @@ select log(-1), log(0), sqrt(-1);
 
 ## Aggregate Function Builder
 
-You can also use the `AggregateExt` trait to more easily build Aggregate arguments `Expr`.
+You can also use the `ExprFunctionExt` trait to more easily build Aggregate arguments `Expr`.
 
 See `datafusion-examples/examples/expr_api.rs` for example usage.
 

From c9518245fa8138b19402ac7f124d9091adad2426 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Wed, 24 Jul 2024 19:42:47 -0700
Subject: [PATCH 142/357] Parsing SQL strings to Exprs with the qualified
 schema (#11562)

* Parsing SQL strings to Exprs wtih the qualified schema

* refactor code
---
 .../core/tests/expr_api/parse_sql_expr.rs     | 16 ++++-
 .../optimizer/tests/optimizer_integration.rs  |  2 +-
 datafusion/sql/src/expr/identifier.rs         | 60 +++++++++----------
 datafusion/sql/tests/sql_integration.rs       |  2 +-
 .../sqllogictest/test_files/group_by.slt      |  2 +-
 5 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/datafusion/core/tests/expr_api/parse_sql_expr.rs b/datafusion/core/tests/expr_api/parse_sql_expr.rs
index 991579b5a350e..a3defceee247c 100644
--- a/datafusion/core/tests/expr_api/parse_sql_expr.rs
+++ b/datafusion/core/tests/expr_api/parse_sql_expr.rs
@@ -17,10 +17,12 @@
 
 use arrow_schema::{DataType, Field, Schema};
 use datafusion::prelude::{CsvReadOptions, SessionContext};
+use datafusion_common::DFSchema;
 use datafusion_common::{DFSchemaRef, Result, ToDFSchema};
+use datafusion_expr::col;
+use datafusion_expr::lit;
 use datafusion_expr::Expr;
 use datafusion_sql::unparser::Unparser;
-
 /// A schema like:
 ///
 /// a: Int32 (possibly with nulls)
@@ -85,6 +87,18 @@ async fn round_trip_dataframe(sql: &str) -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn roundtrip_qualified_schema() -> Result<()> {
+    let sql = "a < 5 OR a = 8";
+    let expr = col("t.a").lt(lit(5_i64)).or(col("t.a").eq(lit(8_i64)));
+    let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
+    let df_schema = DFSchema::try_from_qualified_schema("t", &schema).unwrap();
+    let ctx = SessionContext::new();
+    let parsed_expr = ctx.parse_sql_expr(sql, &df_schema)?;
+    assert_eq!(parsed_expr, expr);
+    Ok(())
+}
+
 fn unparse_sql_expr(expr: &Expr) -> Result<String> {
     let unparser = Unparser::default();
 
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs
index c0863839dba17..3c77ffaa17f6c 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -109,7 +109,7 @@ fn distribute_by() -> Result<()> {
     // regression test for https://github.com/apache/datafusion/issues/3234
     let sql = "SELECT col_int32, col_utf8 FROM test DISTRIBUTE BY (col_utf8)";
     let plan = test_sql(sql)?;
-    let expected = "Repartition: DistributeBy(col_utf8)\
+    let expected = "Repartition: DistributeBy(test.col_utf8)\
     \n  TableScan: test projection=[col_int32, col_utf8]";
     assert_eq!(expected, format!("{plan:?}"));
     Ok(())
diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs
index f8979bde30867..9b8356701a40b 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -26,6 +26,7 @@ use datafusion_expr::planner::PlannerResult;
 use datafusion_expr::{Case, Expr};
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
+use datafusion_expr::UNNAMED_TABLE;
 
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     pub(super) fn sql_identifier_to_expr(
@@ -50,40 +51,35 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             // compound identifiers, but this is not a compound
             // identifier. (e.g. it is "foo.bar" not foo.bar)
             let normalize_ident = self.normalizer.normalize(id);
-            match schema.field_with_unqualified_name(normalize_ident.as_str()) {
-                Ok(_) => {
-                    // found a match without a qualified name, this is a inner table column
-                    Ok(Expr::Column(Column {
-                        relation: None,
-                        name: normalize_ident,
-                    }))
-                }
-                Err(_) => {
-                    // check the outer_query_schema and try to find a match
-                    if let Some(outer) = planner_context.outer_query_schema() {
-                        match outer.qualified_field_with_unqualified_name(
-                            normalize_ident.as_str(),
-                        ) {
-                            Ok((qualifier, field)) => {
-                                // found an exact match on a qualified name in the outer plan schema, so this is an outer reference column
-                                Ok(Expr::OuterReferenceColumn(
-                                    field.data_type().clone(),
-                                    Column::from((qualifier, field)),
-                                ))
-                            }
-                            Err(_) => Ok(Expr::Column(Column {
-                                relation: None,
-                                name: normalize_ident,
-                            })),
-                        }
-                    } else {
-                        Ok(Expr::Column(Column {
-                            relation: None,
-                            name: normalize_ident,
-                        }))
-                    }
+
+            // Check for qualified field with unqualified name
+            if let Ok((qualifier, _)) =
+                schema.qualified_field_with_unqualified_name(normalize_ident.as_str())
+            {
+                return Ok(Expr::Column(Column {
+                    relation: qualifier.filter(|q| q.table() != UNNAMED_TABLE).cloned(),
+                    name: normalize_ident,
+                }));
+            }
+
+            // Check the outer query schema
+            if let Some(outer) = planner_context.outer_query_schema() {
+                if let Ok((qualifier, field)) =
+                    outer.qualified_field_with_unqualified_name(normalize_ident.as_str())
+                {
+                    // Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column
+                    return Ok(Expr::OuterReferenceColumn(
+                        field.data_type().clone(),
+                        Column::from((qualifier, field)),
+                    ));
                 }
             }
+
+            // Default case
+            Ok(Expr::Column(Column {
+                relation: None,
+                name: normalize_ident,
+            }))
         }
     }
 
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 3291560383dfd..511f97c4750e3 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -3274,7 +3274,7 @@ fn test_offset_before_limit() {
 #[test]
 fn test_distribute_by() {
     let sql = "select id from person distribute by state";
-    let expected = "Repartition: DistributeBy(state)\
+    let expected = "Repartition: DistributeBy(person.state)\
         \n  Projection: person.id\
         \n    TableScan: person";
     quick_test(sql, expected);
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index a3cc10e1eeb8e..b7d466d8bf827 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -4077,7 +4077,7 @@ FROM (SELECT c, b, a, SUM(d) as sum1
 DISTRIBUTE BY a
 ----
 logical_plan
-01)Repartition: DistributeBy(a)
+01)Repartition: DistributeBy(multiple_ordered_table_with_pk.a)
 02)--Projection: multiple_ordered_table_with_pk.a, multiple_ordered_table_with_pk.b, sum(multiple_ordered_table_with_pk.d) AS sum1
 03)----Aggregate: groupBy=[[multiple_ordered_table_with_pk.c, multiple_ordered_table_with_pk.a, multiple_ordered_table_with_pk.b]], aggr=[[sum(CAST(multiple_ordered_table_with_pk.d AS Int64))]]
 04)------TableScan: multiple_ordered_table_with_pk projection=[a, b, c, d]

From f12b3db9c3a0507ed0bc7984ce0e290be0ca9e2d Mon Sep 17 00:00:00 2001
From: Michael J Ward <Michael-J-Ward@users.noreply.github.com>
Date: Thu, 25 Jul 2024 03:20:54 -0500
Subject: [PATCH 143/357] fix: expose the fluent API fn for approx_distinct
 instead of the module (#11644)

* fix: expose the fluent API fn for approx_distinct instead of the module

Fixes: https://github.com/apache/datafusion/issues/11643

* add approx_distinct to roundtrip_expr_api test

* lint: cargo fmt
---
 datafusion/functions-aggregate/src/lib.rs              | 2 +-
 datafusion/proto/tests/cases/roundtrip_logical_plan.rs | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs
index b39b1955bb07b..32ca05b8cdd93 100644
--- a/datafusion/functions-aggregate/src/lib.rs
+++ b/datafusion/functions-aggregate/src/lib.rs
@@ -90,7 +90,7 @@ use std::sync::Arc;
 
 /// Fluent-style API for creating `Expr`s
 pub mod expr_fn {
-    pub use super::approx_distinct;
+    pub use super::approx_distinct::approx_distinct;
     pub use super::approx_median::approx_median;
     pub use super::approx_percentile_cont::approx_percentile_cont;
     pub use super::approx_percentile_cont_with_weight::approx_percentile_cont_with_weight;
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 7a4de4f61a380..9c81c48527833 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -67,7 +67,7 @@ use datafusion_expr::{
 };
 use datafusion_functions_aggregate::average::avg_udaf;
 use datafusion_functions_aggregate::expr_fn::{
-    array_agg, avg, bit_and, bit_or, bit_xor, bool_and, bool_or, corr,
+    approx_distinct, array_agg, avg, bit_and, bit_or, bit_xor, bool_and, bool_or, corr,
 };
 use datafusion_functions_aggregate::string_agg::string_agg;
 use datafusion_proto::bytes::{
@@ -717,6 +717,7 @@ async fn roundtrip_expr_api() -> Result<()> {
         var_pop(lit(2.2)),
         stddev(lit(2.2)),
         stddev_pop(lit(2.2)),
+        approx_distinct(lit(2)),
         approx_median(lit(2)),
         approx_percentile_cont(lit(2), lit(0.5)),
         approx_percentile_cont_with_weight(lit(2), lit(1), lit(0.5)),

From 49d9d45f36989cd448ed6513af65948b6b0100ec Mon Sep 17 00:00:00 2001
From: kf zheng <100595273+Kev1n8@users.noreply.github.com>
Date: Thu, 25 Jul 2024 16:53:05 +0800
Subject: [PATCH 144/357] Add some zero column tests covering LIMIT, GROUP BY,
 WHERE, JOIN, and WINDOW (#11624)

* add zero column tests covering LIMIT, GROUP BY, WHERE, JOIN, and WINDOW

* change from statement to query to be explicit about no rows

* Revert "change from statement to query to be explicit about no rows"

This reverts commit fd381fca5e9d80f62062c41c4326e4cbe50b2129.

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/sqllogictest/test_files/select.slt | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index 6884efc07e159..a5f31cb9b4664 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1225,6 +1225,63 @@ statement ok
 SELECT * EXCEPT(a, b, c, d)
 FROM table1
 
+# try zero column with LIMIT, 1 row but empty
+statement ok
+SELECT * EXCEPT (a, b, c, d)
+FROM table1
+LIMIT 1
+
+# try zero column with GROUP BY, 2 row but empty
+statement ok
+SELECT * EXCEPT (a, b, c, d)
+FROM table1
+GROUP BY a
+
+# try zero column with WHERE, 1 row but empty
+statement ok
+SELECT * EXCEPT (a, b, c, d)
+FROM table1
+WHERE a = 1
+
+# create table2 the same with table1
+statement ok
+CREATE TABLE table2 (
+  a int,
+  b int,
+  c int,
+  d int
+) as values
+  (1, 10, 100, 1000),
+  (2, 20, 200, 2000);
+
+# try zero column with inner JOIN, 2 row but empty
+statement ok
+WITH t1 AS (SELECT a AS t1_a FROM table1), t2 AS (SELECT a AS t2_a FROM table2)
+SELECT * EXCEPT (t1_a, t2_a)
+FROM t1
+JOIN t2 ON (t1_a = t2_a)
+
+# try zero column with more JOIN, 2 row but empty
+statement ok
+SELECT * EXCEPT (b1, b2)
+FROM (
+  SELECT b AS b1 FROM table1
+)
+JOIN (
+  SELECT b AS b2 FROM table2
+) ON b1 = b2
+
+# try zero column with Window, 2 row but empty
+statement ok
+SELECT * EXCEPT (a, b, row_num)
+FROM (
+    SELECT
+        a,
+        b,
+        ROW_NUMBER() OVER (ORDER BY b) AS row_num
+    FROM table1
+)
+
 # EXCLUDE order shouldn't matter
 query II
 SELECT * EXCLUDE(b, a)

From 7db4213b71ed9e914c5a4f16954abfa20b091ae3 Mon Sep 17 00:00:00 2001
From: Mehmet Ozan Kabak <ozankabak@gmail.com>
Date: Thu, 25 Jul 2024 14:55:58 +0300
Subject: [PATCH 145/357] Refactor/simplify window frame utils (#11648)

* Simplify window frame utils

* Remove unwrap calls

* Fix format

* Incorporate review feedback
---
 .../core/tests/fuzz_cases/window_fuzz.rs      | 64 ++++++++-----
 datafusion/expr/src/window_frame.rs           | 89 +++++++++----------
 .../proto/src/logical_plan/from_proto.rs      | 38 ++++----
 datafusion/sql/src/expr/function.rs           | 19 ++--
 4 files changed, 105 insertions(+), 105 deletions(-)

diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index 5bd19850cacc8..c97621ec4d019 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, Int32Array};
+use arrow::array::{ArrayRef, Int32Array, StringArray};
 use arrow::compute::{concat_batches, SortOptions};
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
@@ -45,6 +45,7 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
 use test_utils::add_empty_batches;
 
 use hashbrown::HashMap;
+use rand::distributions::Alphanumeric;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 
@@ -607,25 +608,6 @@ fn convert_bound_to_current_row_if_applicable(
     }
 }
 
-/// This utility determines whether a given window frame can be executed with
-/// multiple ORDER BY expressions. As an example, range frames with offset (such
-/// as `RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING`) cannot have ORDER BY clauses
-/// of the form `\[ORDER BY a ASC, b ASC, ...]`
-fn can_accept_multi_orderby(window_frame: &WindowFrame) -> bool {
-    match window_frame.units {
-        WindowFrameUnits::Rows => true,
-        WindowFrameUnits::Range => {
-            // Range can only accept multi ORDER BY clauses when bounds are
-            // CURRENT ROW or UNBOUNDED PRECEDING/FOLLOWING:
-            (window_frame.start_bound.is_unbounded()
-                || window_frame.start_bound == WindowFrameBound::CurrentRow)
-                && (window_frame.end_bound.is_unbounded()
-                    || window_frame.end_bound == WindowFrameBound::CurrentRow)
-        }
-        WindowFrameUnits::Groups => true,
-    }
-}
-
 /// Perform batch and running window same input
 /// and verify outputs of `WindowAggExec` and `BoundedWindowAggExec` are equal
 async fn run_window_test(
@@ -649,7 +631,7 @@ async fn run_window_test(
             options: SortOptions::default(),
         })
     }
-    if orderby_exprs.len() > 1 && !can_accept_multi_orderby(&window_frame) {
+    if orderby_exprs.len() > 1 && !window_frame.can_accept_multi_orderby() {
         orderby_exprs = orderby_exprs[0..1].to_vec();
     }
     let mut partitionby_exprs = vec![];
@@ -733,11 +715,30 @@ async fn run_window_test(
     )?) as _;
     let task_ctx = ctx.task_ctx();
     let collected_usual = collect(usual_window_exec, task_ctx.clone()).await?;
-    let collected_running = collect(running_window_exec, task_ctx).await?;
+    let collected_running = collect(running_window_exec, task_ctx)
+        .await?
+        .into_iter()
+        .filter(|b| b.num_rows() > 0)
+        .collect::<Vec<_>>();
 
     // BoundedWindowAggExec should produce more chunk than the usual WindowAggExec.
     // Otherwise it means that we cannot generate result in running mode.
-    assert!(collected_running.len() > collected_usual.len());
+    let err_msg = format!("Inconsistent result for window_frame: {window_frame:?}, window_fn: {window_fn:?}, args:{args:?}, random_seed: {random_seed:?}, search_mode: {search_mode:?}, partition_by_columns:{partition_by_columns:?}, orderby_columns: {orderby_columns:?}");
+    // Below check makes sure that, streaming execution generates more chunks than the bulk execution.
+    // Since algorithms and operators works on sliding windows in the streaming execution.
+    // However, in the current test setup for some random generated window frame clauses: It is not guaranteed
+    // for streaming execution to generate more chunk than its non-streaming counter part in the Linear mode.
+    // As an example window frame `OVER(PARTITION BY d ORDER BY a RANGE BETWEEN CURRENT ROW AND 9 FOLLOWING)`
+    // needs to receive a=10 to generate result for the rows where a=0. If the input data generated is between the range [0, 9].
+    // even in streaming mode, generated result will be single bulk as in the non-streaming version.
+    if search_mode != Linear {
+        assert!(
+            collected_running.len() > collected_usual.len(),
+            "{}",
+            err_msg
+        );
+    }
+
     // compare
     let usual_formatted = pretty_format_batches(&collected_usual)?.to_string();
     let running_formatted = pretty_format_batches(&collected_running)?.to_string();
@@ -767,10 +768,17 @@ async fn run_window_test(
     Ok(())
 }
 
+fn generate_random_string(rng: &mut StdRng, length: usize) -> String {
+    rng.sample_iter(&Alphanumeric)
+        .take(length)
+        .map(char::from)
+        .collect()
+}
+
 /// Return randomly sized record batches with:
 /// three sorted int32 columns 'a', 'b', 'c' ranged from 0..DISTINCT as columns
 /// one random int32 column x
-fn make_staggered_batches<const STREAM: bool>(
+pub(crate) fn make_staggered_batches<const STREAM: bool>(
     len: usize,
     n_distinct: usize,
     random_seed: u64,
@@ -779,6 +787,7 @@ fn make_staggered_batches<const STREAM: bool>(
     let mut rng = StdRng::seed_from_u64(random_seed);
     let mut input123: Vec<(i32, i32, i32)> = vec![(0, 0, 0); len];
     let mut input4: Vec<i32> = vec![0; len];
+    let mut input5: Vec<String> = vec!["".to_string(); len];
     input123.iter_mut().for_each(|v| {
         *v = (
             rng.gen_range(0..n_distinct) as i32,
@@ -788,10 +797,15 @@ fn make_staggered_batches<const STREAM: bool>(
     });
     input123.sort();
     rng.fill(&mut input4[..]);
+    input5.iter_mut().for_each(|v| {
+        *v = generate_random_string(&mut rng, 1);
+    });
+    input5.sort();
     let input1 = Int32Array::from_iter_values(input123.iter().map(|k| k.0));
     let input2 = Int32Array::from_iter_values(input123.iter().map(|k| k.1));
     let input3 = Int32Array::from_iter_values(input123.iter().map(|k| k.2));
     let input4 = Int32Array::from_iter_values(input4);
+    let input5 = StringArray::from_iter_values(input5);
 
     // split into several record batches
     let mut remainder = RecordBatch::try_from_iter(vec![
@@ -799,6 +813,7 @@ fn make_staggered_batches<const STREAM: bool>(
         ("b", Arc::new(input2) as ArrayRef),
         ("c", Arc::new(input3) as ArrayRef),
         ("x", Arc::new(input4) as ArrayRef),
+        ("string_field", Arc::new(input5) as ArrayRef),
     ])
     .unwrap();
 
@@ -807,6 +822,7 @@ fn make_staggered_batches<const STREAM: bool>(
         while remainder.num_rows() > 0 {
             let batch_size = rng.gen_range(0..50);
             if remainder.num_rows() < batch_size {
+                batches.push(remainder);
                 break;
             }
             batches.push(remainder.slice(0, batch_size));
diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs
index c0617eaf4ed43..5b2f8982a559e 100644
--- a/datafusion/expr/src/window_frame.rs
+++ b/datafusion/expr/src/window_frame.rs
@@ -26,8 +26,7 @@
 use std::fmt::{self, Formatter};
 use std::hash::Hash;
 
-use crate::expr::Sort;
-use crate::Expr;
+use crate::{lit, Expr};
 
 use datafusion_common::{plan_err, sql_err, DataFusionError, Result, ScalarValue};
 use sqlparser::ast;
@@ -246,59 +245,51 @@ impl WindowFrame {
             causal,
         }
     }
-}
 
-/// Regularizes ORDER BY clause for window definition for implicit corner cases.
-pub fn regularize_window_order_by(
-    frame: &WindowFrame,
-    order_by: &mut Vec<Expr>,
-) -> Result<()> {
-    if frame.units == WindowFrameUnits::Range && order_by.len() != 1 {
-        // Normally, RANGE frames require an ORDER BY clause with exactly one
-        // column. However, an ORDER BY clause may be absent or present but with
-        // more than one column in two edge cases:
-        // 1. start bound is UNBOUNDED or CURRENT ROW
-        // 2. end bound is CURRENT ROW or UNBOUNDED.
-        // In these cases, we regularize the ORDER BY clause if the ORDER BY clause
-        // is absent. If an ORDER BY clause is present but has more than one column,
-        // the ORDER BY clause is unchanged. Note that this follows Postgres behavior.
-        if (frame.start_bound.is_unbounded()
-            || frame.start_bound == WindowFrameBound::CurrentRow)
-            && (frame.end_bound == WindowFrameBound::CurrentRow
-                || frame.end_bound.is_unbounded())
-        {
-            // If an ORDER BY clause is absent, it is equivalent to a ORDER BY clause
-            // with constant value as sort key.
-            // If an ORDER BY clause is present but has more than one column, it is
-            // unchanged.
-            if order_by.is_empty() {
-                order_by.push(Expr::Sort(Sort::new(
-                    Box::new(Expr::Literal(ScalarValue::UInt64(Some(1)))),
-                    true,
-                    false,
-                )));
+    /// Regularizes the ORDER BY clause of the window frame.
+    pub fn regularize_order_bys(&self, order_by: &mut Vec<Expr>) -> Result<()> {
+        match self.units {
+            // Normally, RANGE frames require an ORDER BY clause with exactly
+            // one column. However, an ORDER BY clause may be absent or have
+            // more than one column when the start/end bounds are UNBOUNDED or
+            // CURRENT ROW.
+            WindowFrameUnits::Range if self.free_range() => {
+                // If an ORDER BY clause is absent, it is equivalent to an
+                // ORDER BY clause with constant value as sort key. If an
+                // ORDER BY clause is present but has more than one column,
+                // it is unchanged. Note that this follows PostgreSQL behavior.
+                if order_by.is_empty() {
+                    order_by.push(lit(1u64).sort(true, false));
+                }
+            }
+            WindowFrameUnits::Range if order_by.len() != 1 => {
+                return plan_err!("RANGE requires exactly one ORDER BY column");
             }
+            WindowFrameUnits::Groups if order_by.is_empty() => {
+                return plan_err!("GROUPS requires an ORDER BY clause");
+            }
+            _ => {}
         }
+        Ok(())
     }
-    Ok(())
-}
 
-/// Checks if given window frame is valid. In particular, if the frame is RANGE
-/// with offset PRECEDING/FOLLOWING, it must have exactly one ORDER BY column.
-pub fn check_window_frame(frame: &WindowFrame, order_bys: usize) -> Result<()> {
-    if frame.units == WindowFrameUnits::Range && order_bys != 1 {
-        // See `regularize_window_order_by`.
-        if !(frame.start_bound.is_unbounded()
-            || frame.start_bound == WindowFrameBound::CurrentRow)
-            || !(frame.end_bound == WindowFrameBound::CurrentRow
-                || frame.end_bound.is_unbounded())
-        {
-            plan_err!("RANGE requires exactly one ORDER BY column")?
+    /// Returns whether the window frame can accept multiple ORDER BY expressons.
+    pub fn can_accept_multi_orderby(&self) -> bool {
+        match self.units {
+            WindowFrameUnits::Rows => true,
+            WindowFrameUnits::Range => self.free_range(),
+            WindowFrameUnits::Groups => true,
         }
-    } else if frame.units == WindowFrameUnits::Groups && order_bys == 0 {
-        plan_err!("GROUPS requires an ORDER BY clause")?
-    };
-    Ok(())
+    }
+
+    /// Returns whether the window frame is "free range"; i.e. its start/end
+    /// bounds are UNBOUNDED or CURRENT ROW.
+    fn free_range(&self) -> bool {
+        (self.start_bound.is_unbounded()
+            || self.start_bound == WindowFrameBound::CurrentRow)
+            && (self.end_bound.is_unbounded()
+                || self.end_bound == WindowFrameBound::CurrentRow)
+    }
 }
 
 /// There are five ways to describe starting and ending frame boundaries:
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 7b717add3311a..5e9b9af49ae9c 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -19,18 +19,14 @@ use std::sync::Arc;
 
 use datafusion::execution::registry::FunctionRegistry;
 use datafusion_common::{
-    internal_err, plan_datafusion_err, DataFusionError, Result, ScalarValue,
+    exec_datafusion_err, internal_err, plan_datafusion_err, Result, ScalarValue,
     TableReference, UnnestOptions,
 };
-use datafusion_expr::expr::Unnest;
-use datafusion_expr::expr::{Alias, Placeholder};
-use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
-use datafusion_expr::ExprFunctionExt;
 use datafusion_expr::{
-    expr::{self, InList, Sort, WindowFunction},
+    expr::{self, Alias, InList, Placeholder, Sort, Unnest, WindowFunction},
     logical_plan::{PlanType, StringifiedPlan},
     AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, Case, Cast, Expr,
-    GroupingSet,
+    ExprFunctionExt, GroupingSet,
     GroupingSet::GroupingSets,
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
     WindowFrameUnits,
@@ -289,24 +285,22 @@ pub fn parse_expr(
                 .window_frame
                 .as_ref()
                 .map::<Result<WindowFrame, _>, _>(|window_frame| {
-                    let window_frame = window_frame.clone().try_into()?;
-                    check_window_frame(&window_frame, order_by.len())
+                    let window_frame: WindowFrame = window_frame.clone().try_into()?;
+                    window_frame
+                        .regularize_order_bys(&mut order_by)
                         .map(|_| window_frame)
                 })
                 .transpose()?
                 .ok_or_else(|| {
-                    DataFusionError::Execution(
-                        "missing window frame during deserialization".to_string(),
-                    )
+                    exec_datafusion_err!("missing window frame during deserialization")
                 })?;
-            // TODO: support proto for null treatment
-            regularize_window_order_by(&window_frame, &mut order_by)?;
 
+            // TODO: support proto for null treatment
             match window_function {
                 window_expr_node::WindowFunction::AggrFunction(i) => {
                     let aggr_function = parse_i32_to_aggregate_function(i)?;
 
-                    Ok(Expr::WindowFunction(WindowFunction::new(
+                    Expr::WindowFunction(WindowFunction::new(
                         expr::WindowFunctionDefinition::AggregateFunction(aggr_function),
                         vec![parse_required_expr(
                             expr.expr.as_deref(),
@@ -319,7 +313,7 @@ pub fn parse_expr(
                     .order_by(order_by)
                     .window_frame(window_frame)
                     .build()
-                    .unwrap())
+                    .map_err(Error::DataFusionError)
                 }
                 window_expr_node::WindowFunction::BuiltInFunction(i) => {
                     let built_in_function = protobuf::BuiltInWindowFunction::try_from(*i)
@@ -331,7 +325,7 @@ pub fn parse_expr(
                             .map(|e| vec![e])
                             .unwrap_or_else(Vec::new);
 
-                    Ok(Expr::WindowFunction(WindowFunction::new(
+                    Expr::WindowFunction(WindowFunction::new(
                         expr::WindowFunctionDefinition::BuiltInWindowFunction(
                             built_in_function,
                         ),
@@ -341,7 +335,7 @@ pub fn parse_expr(
                     .order_by(order_by)
                     .window_frame(window_frame)
                     .build()
-                    .unwrap())
+                    .map_err(Error::DataFusionError)
                 }
                 window_expr_node::WindowFunction::Udaf(udaf_name) => {
                     let udaf_function = match &expr.fun_definition {
@@ -353,7 +347,7 @@ pub fn parse_expr(
                         parse_optional_expr(expr.expr.as_deref(), registry, codec)?
                             .map(|e| vec![e])
                             .unwrap_or_else(Vec::new);
-                    Ok(Expr::WindowFunction(WindowFunction::new(
+                    Expr::WindowFunction(WindowFunction::new(
                         expr::WindowFunctionDefinition::AggregateUDF(udaf_function),
                         args,
                     ))
@@ -361,7 +355,7 @@ pub fn parse_expr(
                     .order_by(order_by)
                     .window_frame(window_frame)
                     .build()
-                    .unwrap())
+                    .map_err(Error::DataFusionError)
                 }
                 window_expr_node::WindowFunction::Udwf(udwf_name) => {
                     let udwf_function = match &expr.fun_definition {
@@ -373,7 +367,7 @@ pub fn parse_expr(
                         parse_optional_expr(expr.expr.as_deref(), registry, codec)?
                             .map(|e| vec![e])
                             .unwrap_or_else(Vec::new);
-                    Ok(Expr::WindowFunction(WindowFunction::new(
+                    Expr::WindowFunction(WindowFunction::new(
                         expr::WindowFunctionDefinition::WindowUDF(udwf_function),
                         args,
                     ))
@@ -381,7 +375,7 @@ pub fn parse_expr(
                     .order_by(order_by)
                     .window_frame(window_frame)
                     .build()
-                    .unwrap())
+                    .map_err(Error::DataFusionError)
                 }
             }
         }
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index fd759c1613814..2506ef740fded 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -15,14 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::str::FromStr;
+
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
+
 use arrow_schema::DataType;
 use datafusion_common::{
     internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema,
     Dependency, Result,
 };
 use datafusion_expr::planner::PlannerResult;
-use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
 use datafusion_expr::{
     expr, AggregateFunction, Expr, ExprFunctionExt, ExprSchemable, WindowFrame,
     WindowFunctionDefinition,
@@ -36,7 +38,7 @@ use sqlparser::ast::{
     FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments,
     NullTreatment, ObjectName, OrderByExpr, WindowType,
 };
-use std::str::FromStr;
+
 use strum::IntoEnumIterator;
 
 /// Suggest a valid function based on an invalid input function name
@@ -306,14 +308,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 .window_frame
                 .as_ref()
                 .map(|window_frame| {
-                    let window_frame = window_frame.clone().try_into()?;
-                    check_window_frame(&window_frame, order_by.len())
+                    let window_frame: WindowFrame = window_frame.clone().try_into()?;
+                    window_frame
+                        .regularize_order_bys(&mut order_by)
                         .map(|_| window_frame)
                 })
                 .transpose()?;
 
             let window_frame = if let Some(window_frame) = window_frame {
-                regularize_window_order_by(&window_frame, &mut order_by)?;
                 window_frame
             } else if let Some(is_ordering_strict) = is_ordering_strict {
                 WindowFrame::new(Some(is_ordering_strict))
@@ -322,7 +324,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             };
 
             if let Ok(fun) = self.find_window_func(&name) {
-                let expr = match fun {
+                return match fun {
                     WindowFunctionDefinition::AggregateFunction(aggregate_fun) => {
                         let args =
                             self.function_args_to_expr(args, schema, planner_context)?;
@@ -336,7 +338,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                         .window_frame(window_frame)
                         .null_treatment(null_treatment)
                         .build()
-                        .unwrap()
                     }
                     _ => Expr::WindowFunction(expr::WindowFunction::new(
                         fun,
@@ -346,10 +347,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     .order_by(order_by)
                     .window_frame(window_frame)
                     .null_treatment(null_treatment)
-                    .build()
-                    .unwrap(),
+                    .build(),
                 };
-                return Ok(expr);
             }
         } else {
             // User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function

From 71903e1b2c62cda9a92808a71f8b63bcdd43762d Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 25 Jul 2024 14:04:33 -0400
Subject: [PATCH 146/357] Minor: use `ready!` macro to simplify FilterExec poll
 loop (#11649)

---
 datafusion/physical-plan/src/filter.rs | 35 +++++++++++---------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index a9d78d059f5c1..67de0989649e6 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -18,7 +18,7 @@
 use std::any::Any;
 use std::pin::Pin;
 use std::sync::Arc;
-use std::task::{Context, Poll};
+use std::task::{ready, Context, Poll};
 
 use super::{
     ColumnStatistics, DisplayAs, ExecutionPlanProperties, PlanProperties,
@@ -59,6 +59,7 @@ pub struct FilterExec {
     metrics: ExecutionPlanMetricsSet,
     /// Selectivity for statistics. 0 = no rows, 100 = all rows
     default_selectivity: u8,
+    /// Properties equivalence properties, partitioning, etc.
     cache: PlanProperties,
 }
 
@@ -375,26 +376,20 @@ impl Stream for FilterExecStream {
     ) -> Poll<Option<Self::Item>> {
         let poll;
         loop {
-            match self.input.poll_next_unpin(cx) {
-                Poll::Ready(value) => match value {
-                    Some(Ok(batch)) => {
-                        let timer = self.baseline_metrics.elapsed_compute().timer();
-                        let filtered_batch = batch_filter(&batch, &self.predicate)?;
-                        // skip entirely filtered batches
-                        if filtered_batch.num_rows() == 0 {
-                            continue;
-                        }
-                        timer.done();
-                        poll = Poll::Ready(Some(Ok(filtered_batch)));
-                        break;
+            match ready!(self.input.poll_next_unpin(cx)) {
+                Some(Ok(batch)) => {
+                    let timer = self.baseline_metrics.elapsed_compute().timer();
+                    let filtered_batch = batch_filter(&batch, &self.predicate)?;
+                    // skip entirely filtered batches
+                    if filtered_batch.num_rows() == 0 {
+                        continue;
                     }
-                    _ => {
-                        poll = Poll::Ready(value);
-                        break;
-                    }
-                },
-                Poll::Pending => {
-                    poll = Poll::Pending;
+                    timer.done();
+                    poll = Poll::Ready(Some(Ok(filtered_batch)));
+                    break;
+                }
+                value => {
+                    poll = Poll::Ready(value);
                     break;
                 }
             }

From 5b7e0aae22c2d5a5be19e4b877e878199b9f6c43 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Thu, 25 Jul 2024 21:42:37 +0200
Subject: [PATCH 147/357] Temporarily pin toolchain version to avoid clippy
 (#11655)

Temporarily pin toolchain version until we solve problems reported by
newer clippy release.
---
 rust-toolchain.toml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 rust-toolchain.toml

diff --git a/rust-toolchain.toml b/rust-toolchain.toml
new file mode 100644
index 0000000000000..0ef859ae8db3b
--- /dev/null
+++ b/rust-toolchain.toml
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[toolchain]
+# Temporarily pin toolchain version until we solve problems reported by newer clippy release.
+channel = "1.79.0"
\ No newline at end of file

From 6fd57b2e385e841f04a6b0bb05f259bc82c4c48c Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Thu, 25 Jul 2024 22:03:04 +0200
Subject: [PATCH 148/357] Fix clippy errors for Rust 1.80 (#11654)

* Fix some new warnings

Signed-off-by: Nick Cameron <nrc@ncameron.org>

* Fix formatting errors reported by clippy

Newest version of clippy complains when list item continuation is not
indented.

* Remove redundant reference

Reported by clippy

* Suppress non-trivial clippy suggestion

To be revisited

---------

Signed-off-by: Nick Cameron <nrc@ncameron.org>
Co-authored-by: Nick Cameron <nrc@ncameron.org>
---
 Cargo.toml                                    |  1 +
 datafusion/common/Cargo.toml                  |  1 +
 datafusion/common/src/config.rs               |  2 +-
 datafusion/common/src/hash_utils.rs           | 20 +++++++++-
 datafusion/common/src/tree_node.rs            |  8 ++--
 datafusion/common/src/utils/memory.rs         |  2 +-
 datafusion/common/src/utils/mod.rs            |  2 +-
 datafusion/core/Cargo.toml                    |  2 +-
 datafusion/core/benches/sort.rs               |  2 +-
 datafusion/core/src/catalog/mod.rs            |  4 +-
 datafusion/core/src/dataframe/mod.rs          |  6 +--
 .../core/src/datasource/listing/helpers.rs    |  2 +-
 .../core/src/datasource/listing/table.rs      | 14 +++----
 .../datasource/physical_plan/parquet/mod.rs   | 40 +++++++++----------
 .../physical_plan/parquet/statistics.rs       | 14 +++----
 datafusion/core/src/execution/context/mod.rs  | 33 +++++++--------
 .../core/src/execution/session_state.rs       |  4 +-
 datafusion/core/src/lib.rs                    | 34 ++++++++--------
 .../aggregate_statistics.rs                   |  1 +
 .../enforce_distribution.rs                   |  1 +
 .../src/physical_optimizer/enforce_sorting.rs |  1 +
 .../core/src/physical_optimizer/pruning.rs    | 14 +++----
 datafusion/execution/src/memory_pool/mod.rs   |  4 +-
 datafusion/execution/src/object_store.rs      | 16 ++++----
 datafusion/expr/src/accumulator.rs            |  8 ++--
 datafusion/expr/src/groups_accumulator.rs     |  6 +--
 datafusion/expr/src/interval_arithmetic.rs    |  3 +-
 datafusion/expr/src/logical_plan/plan.rs      | 18 ++++-----
 datafusion/expr/src/simplify.rs               |  2 +-
 datafusion/expr/src/udaf.rs                   |  6 +--
 datafusion/expr/src/udf.rs                    |  2 +-
 datafusion/expr/src/udwf.rs                   |  4 +-
 datafusion/expr/src/window_frame.rs           | 10 ++---
 datafusion/functions-aggregate/src/lib.rs     |  2 +-
 datafusion/functions/src/lib.rs               |  2 +-
 datafusion/optimizer/src/analyzer/subquery.rs |  4 +-
 .../optimizer/src/common_subexpr_eliminate.rs |  2 +-
 datafusion/optimizer/src/lib.rs               |  6 +--
 datafusion/optimizer/src/push_down_filter.rs  |  2 +-
 .../src/simplify_expressions/guarantees.rs    |  2 +-
 .../src/simplify_expressions/regex.rs         |  1 +
 .../src/aggregate/utils.rs                    |  2 +-
 .../physical-expr-common/src/binary_map.rs    | 20 +++++-----
 .../physical-expr-common/src/physical_expr.rs |  2 +-
 .../src/equivalence/properties.rs             | 22 +++++-----
 .../physical-expr/src/utils/guarantee.rs      | 13 +++---
 datafusion/physical-plan/Cargo.toml           |  3 ++
 .../physical-plan/src/aggregates/order/mod.rs |  2 +-
 .../physical-plan/src/joins/hash_join.rs      | 17 +++++---
 .../src/joins/symmetric_hash_join.rs          |  4 +-
 datafusion/physical-plan/src/sorts/sort.rs    |  8 ++--
 datafusion/physical-plan/src/unnest.rs        |  3 +-
 datafusion/physical-plan/src/windows/mod.rs   |  1 +
 datafusion/sql/src/lib.rs                     |  6 +--
 datafusion/sql/src/parser.rs                  |  4 +-
 datafusion/sql/src/utils.rs                   |  1 +
 .../sqllogictest/test_files/parquet.slt       | 37 ++++++++---------
 .../test_files/sort_merge_join.slt            | 19 ++++-----
 .../substrait/src/logical_plan/consumer.rs    |  1 +
 59 files changed, 257 insertions(+), 216 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index cb27a8761a8e4..9e7971bdc1e8d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -154,4 +154,5 @@ rpath = false
 large_futures = "warn"
 
 [workspace.lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] }
 unused_imports = "deny"
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 62ea85a4a33d7..85dfb2e8f73ab 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -39,6 +39,7 @@ path = "src/lib.rs"
 avro = ["apache-avro"]
 backtrace = []
 pyarrow = ["pyo3", "arrow/pyarrow", "parquet"]
+force_hash_collisions = []
 
 [dependencies]
 ahash = { workspace = true }
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 6e007ded03888..1f20bd255027a 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1207,7 +1207,7 @@ impl ConfigField for TableOptions {
     /// # Parameters
     ///
     /// * `key`: The configuration key specifying which setting to adjust, prefixed with the format (e.g., "format.delimiter")
-    /// for CSV format.
+    ///   for CSV format.
     /// * `value`: The value to set for the specified configuration key.
     ///
     /// # Returns
diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs
index 010221b0485f9..5e1324e80702c 100644
--- a/datafusion/common/src/hash_utils.rs
+++ b/datafusion/common/src/hash_utils.rs
@@ -17,22 +17,27 @@
 
 //! Functionality used both on logical and physical plans
 
+#[cfg(not(feature = "force_hash_collisions"))]
 use std::sync::Arc;
 
 use ahash::RandomState;
 use arrow::array::*;
 use arrow::datatypes::*;
 use arrow::row::Rows;
+#[cfg(not(feature = "force_hash_collisions"))]
 use arrow::{downcast_dictionary_array, downcast_primitive_array};
 use arrow_buffer::IntervalDayTime;
 use arrow_buffer::IntervalMonthDayNano;
 
+#[cfg(not(feature = "force_hash_collisions"))]
 use crate::cast::{
     as_boolean_array, as_fixed_size_list_array, as_generic_binary_array,
     as_large_list_array, as_list_array, as_map_array, as_primitive_array,
     as_string_array, as_struct_array,
 };
-use crate::error::{Result, _internal_err};
+use crate::error::Result;
+#[cfg(not(feature = "force_hash_collisions"))]
+use crate::error::_internal_err;
 
 // Combines two hashes into one hash
 #[inline]
@@ -41,6 +46,7 @@ pub fn combine_hashes(l: u64, r: u64) -> u64 {
     hash.wrapping_mul(37).wrapping_add(r)
 }
 
+#[cfg(not(feature = "force_hash_collisions"))]
 fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col: bool) {
     if mul_col {
         hashes_buffer.iter_mut().for_each(|hash| {
@@ -90,6 +96,7 @@ hash_float_value!((half::f16, u16), (f32, u32), (f64, u64));
 /// Builds hash values of PrimitiveArray and writes them into `hashes_buffer`
 /// If `rehash==true` this combines the previous hash value in the buffer
 /// with the new hash using `combine_hashes`
+#[cfg(not(feature = "force_hash_collisions"))]
 fn hash_array_primitive<T>(
     array: &PrimitiveArray<T>,
     random_state: &RandomState,
@@ -135,6 +142,7 @@ fn hash_array_primitive<T>(
 /// Hashes one array into the `hashes_buffer`
 /// If `rehash==true` this combines the previous hash value in the buffer
 /// with the new hash using `combine_hashes`
+#[cfg(not(feature = "force_hash_collisions"))]
 fn hash_array<T>(
     array: T,
     random_state: &RandomState,
@@ -180,6 +188,7 @@ fn hash_array<T>(
 }
 
 /// Hash the values in a dictionary array
+#[cfg(not(feature = "force_hash_collisions"))]
 fn hash_dictionary<K: ArrowDictionaryKeyType>(
     array: &DictionaryArray<K>,
     random_state: &RandomState,
@@ -210,6 +219,7 @@ fn hash_dictionary<K: ArrowDictionaryKeyType>(
     Ok(())
 }
 
+#[cfg(not(feature = "force_hash_collisions"))]
 fn hash_struct_array(
     array: &StructArray,
     random_state: &RandomState,
@@ -270,6 +280,7 @@ fn hash_map_array(
     Ok(())
 }
 
+#[cfg(not(feature = "force_hash_collisions"))]
 fn hash_list_array<OffsetSize>(
     array: &GenericListArray<OffsetSize>,
     random_state: &RandomState,
@@ -303,6 +314,7 @@ where
     Ok(())
 }
 
+#[cfg(not(feature = "force_hash_collisions"))]
 fn hash_fixed_list_array(
     array: &FixedSizeListArray,
     random_state: &RandomState,
@@ -488,7 +500,11 @@ pub fn create_row_hashes_v2<'a>(
 
 #[cfg(test)]
 mod tests {
-    use arrow::{array::*, datatypes::*};
+    use std::sync::Arc;
+
+    use arrow::array::*;
+    #[cfg(not(feature = "force_hash_collisions"))]
+    use arrow::datatypes::*;
 
     use super::*;
 
diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs
index bb54f4e13af98..bcf4d7664acc2 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -43,14 +43,14 @@ macro_rules! handle_transform_recursion {
 /// There are three categories of TreeNode APIs:
 ///
 /// 1. "Inspecting" APIs to traverse a tree of `&TreeNodes`:
-/// [`apply`], [`visit`], [`exists`].
+///    [`apply`], [`visit`], [`exists`].
 ///
 /// 2. "Transforming" APIs that traverse and consume a tree of `TreeNode`s
-/// producing possibly changed `TreeNode`s: [`transform`], [`transform_up`],
-/// [`transform_down`], [`transform_down_up`], and [`rewrite`].
+///    producing possibly changed `TreeNode`s: [`transform`], [`transform_up`],
+///    [`transform_down`], [`transform_down_up`], and [`rewrite`].
 ///
 /// 3. Internal APIs used to implement the `TreeNode` API: [`apply_children`],
-/// and [`map_children`].
+///    and [`map_children`].
 ///
 /// | Traversal Order | Inspecting | Transforming |
 /// | --- | --- | --- |
diff --git a/datafusion/common/src/utils/memory.rs b/datafusion/common/src/utils/memory.rs
index 17668cf93d99b..2c34b61bd0930 100644
--- a/datafusion/common/src/utils/memory.rs
+++ b/datafusion/common/src/utils/memory.rs
@@ -24,7 +24,7 @@ use crate::{DataFusionError, Result};
 /// # Parameters
 /// - `num_elements`: The number of elements expected in the hash table.
 /// - `fixed_size`: A fixed overhead size associated with the collection
-/// (e.g., HashSet or HashTable).
+///    (e.g., HashSet or HashTable).
 /// - `T`: The type of elements stored in the hash table.
 ///
 /// # Details
diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index 8264b48725929..8b025255f5df7 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -335,7 +335,7 @@ pub fn get_at_indices<T: Clone, I: Borrow<usize>>(
 /// This function finds the longest prefix of the form 0, 1, 2, ... within the
 /// collection `sequence`. Examples:
 /// - For 0, 1, 2, 4, 5; we would produce 3, meaning 0, 1, 2 is the longest satisfying
-/// prefix.
+///   prefix.
 /// - For 1, 2, 3, 4; we would produce 0, meaning there is no such prefix.
 pub fn longest_consecutive_prefix<T: Borrow<usize>>(
     sequence: impl IntoIterator<Item = T>,
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index bed9265ff0163..98d501794f775 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -62,7 +62,7 @@ default = [
 ]
 encoding_expressions = ["datafusion-functions/encoding_expressions"]
 # Used for testing ONLY: causes all values to hash to the same value (test for collisions)
-force_hash_collisions = []
+force_hash_collisions = ["datafusion-physical-plan/force_hash_collisions", "datafusion-common/force_hash_collisions"]
 math_expressions = ["datafusion-functions/math_expressions"]
 parquet = ["datafusion-common/parquet", "dep:parquet"]
 pyarrow = ["datafusion-common/pyarrow", "parquet"]
diff --git a/datafusion/core/benches/sort.rs b/datafusion/core/benches/sort.rs
index 94a39bbb2af31..99a74b61b3e0a 100644
--- a/datafusion/core/benches/sort.rs
+++ b/datafusion/core/benches/sort.rs
@@ -21,7 +21,7 @@
 //! 1. Creates a list of tuples (sorted if necessary)
 //!
 //! 2. Divides those tuples across some number of streams of [`RecordBatch`]
-//! preserving any ordering
+//!    preserving any ordering
 //!
 //! 3. Times how long it takes for a given sort plan to process the input
 //!
diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs
index 531adc4b210cf..fc50b4214d6dc 100644
--- a/datafusion/core/src/catalog/mod.rs
+++ b/datafusion/core/src/catalog/mod.rs
@@ -141,12 +141,12 @@ pub trait CatalogList: CatalogProviderList {}
 /// Here are some examples of how to implement custom catalogs:
 ///
 /// * [`datafusion-cli`]: [`DynamicFileCatalogProvider`] catalog provider
-/// that treats files and directories on a filesystem as tables.
+///   that treats files and directories on a filesystem as tables.
 ///
 /// * The [`catalog.rs`]:  a simple directory based catalog.
 ///
 ///  * [delta-rs]:  [`UnityCatalogProvider`] implementation that can
-///  read from Delta Lake tables
+///    read from Delta Lake tables
 ///
 /// [`datafusion-cli`]: https://datafusion.apache.org/user-guide/cli/index.html
 /// [`DynamicFileCatalogProvider`]: https://github.com/apache/datafusion/blob/31b9b48b08592b7d293f46e75707aad7dadd7cbc/datafusion-cli/src/catalog.rs#L75
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index ea437cc99a336..e1021d06261f2 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -114,15 +114,15 @@ impl Default for DataFrameWriteOptions {
 /// The typical workflow using DataFrames looks like
 ///
 /// 1. Create a DataFrame via methods on [SessionContext], such as [`read_csv`]
-/// and [`read_parquet`].
+///    and [`read_parquet`].
 ///
 /// 2. Build a desired calculation by calling methods such as [`filter`],
-/// [`select`], [`aggregate`], and [`limit`]
+///    [`select`], [`aggregate`], and [`limit`]
 ///
 /// 3. Execute into [`RecordBatch`]es by calling [`collect`]
 ///
 /// A `DataFrame` is a wrapper around a [`LogicalPlan`] and the [`SessionState`]
-/// required for execution.
+///    required for execution.
 ///
 /// DataFrames are "lazy" in the sense that most methods do not actually compute
 /// anything, they just build up a plan. Calling [`collect`] executes the plan
diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs
index bfc33ce0bd73f..29b593a70ca06 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -49,7 +49,7 @@ use object_store::{ObjectMeta, ObjectStore};
 /// This means that if this function returns true:
 /// - the table provider can filter the table partition values with this expression
 /// - the expression can be marked as `TableProviderFilterPushDown::Exact` once this filtering
-/// was performed
+///   was performed
 pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool {
     let mut is_applicable = true;
     expr.apply(|expr| {
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 4d0a7738b0392..b91a4bd09c550 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -287,17 +287,17 @@ impl ListingOptions {
     ///# Notes
     ///
     /// - If only one level (e.g. `year` in the example above) is
-    /// specified, the other levels are ignored but the files are
-    /// still read.
+    ///   specified, the other levels are ignored but the files are
+    ///   still read.
     ///
     /// - Files that don't follow this partitioning scheme will be
-    /// ignored.
+    ///   ignored.
     ///
     /// - Since the columns have the same value for all rows read from
-    /// each individual file (such as dates), they are typically
-    /// dictionary encoded for efficiency. You may use
-    /// [`wrap_partition_type_in_dict`] to request a
-    /// dictionary-encoded type.
+    ///   each individual file (such as dates), they are typically
+    ///   dictionary encoded for efficiency. You may use
+    ///   [`wrap_partition_type_in_dict`] to request a
+    ///   dictionary-encoded type.
     ///
     /// - The partition columns are solely extracted from the file path. Especially they are NOT part of the parquet files itself.
     ///
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 1eea4eab8ba20..7f764059218cc 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -119,32 +119,32 @@ pub use writer::plan_to_parquet;
 /// Supports the following optimizations:
 ///
 /// * Concurrent reads: Can read from one or more files in parallel as multiple
-/// partitions, including concurrently reading multiple row groups from a single
-/// file.
+///   partitions, including concurrently reading multiple row groups from a single
+///   file.
 ///
 /// * Predicate push down: skips row groups and pages based on
-/// min/max/null_counts in the row group metadata, the page index and bloom
-/// filters.
+///   min/max/null_counts in the row group metadata, the page index and bloom
+///   filters.
 ///
 /// * Projection pushdown: reads and decodes only the columns required.
 ///
 /// * Limit pushdown: stop execution early after some number of rows are read.
 ///
 /// * Custom readers: customize reading  parquet files, e.g. to cache metadata,
-/// coalesce I/O operations, etc. See [`ParquetFileReaderFactory`] for more
-/// details.
+///   coalesce I/O operations, etc. See [`ParquetFileReaderFactory`] for more
+///   details.
 ///
 /// * Schema adapters: read parquet files with different schemas into a unified
-/// table schema. This can be used to implement "schema evolution". See
-/// [`SchemaAdapterFactory`] for more details.
+///   table schema. This can be used to implement "schema evolution". See
+///   [`SchemaAdapterFactory`] for more details.
 ///
 /// * metadata_size_hint: controls the number of bytes read from the end of the
-/// file in the initial I/O when the default [`ParquetFileReaderFactory`]. If a
-/// custom reader is used, it supplies the metadata directly and this parameter
-/// is ignored. [`ParquetExecBuilder::with_metadata_size_hint`] for more details.
+///   file in the initial I/O when the default [`ParquetFileReaderFactory`]. If a
+///   custom reader is used, it supplies the metadata directly and this parameter
+///   is ignored. [`ParquetExecBuilder::with_metadata_size_hint`] for more details.
 ///
 /// * User provided  [`ParquetAccessPlan`]s to skip row groups and/or pages
-/// based on external information. See "Implementing External Indexes" below
+///   based on external information. See "Implementing External Indexes" below
 ///
 /// # Implementing External Indexes
 ///
@@ -191,22 +191,22 @@ pub use writer::plan_to_parquet;
 /// # Execution Overview
 ///
 /// * Step 1: [`ParquetExec::execute`] is called, returning a [`FileStream`]
-/// configured to open parquet files with a [`ParquetOpener`].
+///   configured to open parquet files with a [`ParquetOpener`].
 ///
 /// * Step 2: When the stream is polled, the [`ParquetOpener`] is called to open
-/// the file.
+///   the file.
 ///
 /// * Step 3: The `ParquetOpener` gets the [`ParquetMetaData`] (file metadata)
-/// via [`ParquetFileReaderFactory`], creating a [`ParquetAccessPlan`] by
-/// applying predicates to metadata. The plan and projections are used to
-/// determine what pages must be read.
+///   via [`ParquetFileReaderFactory`], creating a [`ParquetAccessPlan`] by
+///   applying predicates to metadata. The plan and projections are used to
+///   determine what pages must be read.
 ///
 /// * Step 4: The stream begins reading data, fetching the required pages
-/// and incrementally decoding them.
+///   and incrementally decoding them.
 ///
 /// * Step 5: As each [`RecordBatch]` is read, it may be adapted by a
-/// [`SchemaAdapter`] to match the table schema. By default missing columns are
-/// filled with nulls, but this can be customized via [`SchemaAdapterFactory`].
+///   [`SchemaAdapter`] to match the table schema. By default missing columns are
+///   filled with nulls, but this can be customized via [`SchemaAdapterFactory`].
 ///
 /// [`RecordBatch`]: arrow::record_batch::RecordBatch
 /// [`SchemaAdapter`]: crate::datasource::schema_adapter::SchemaAdapter
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
index 3d250718f736c..11b8f5fc6c79a 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
@@ -1358,14 +1358,14 @@ impl<'a> StatisticsConverter<'a> {
     /// # Parameters:
     ///
     /// * `column_page_index`: The parquet column page indices, read from
-    /// `ParquetMetaData` column_index
+    ///   `ParquetMetaData` column_index
     ///
     /// * `column_offset_index`: The parquet column offset indices, read from
-    /// `ParquetMetaData` offset_index
+    ///   `ParquetMetaData` offset_index
     ///
     /// * `row_group_indices`: The indices of the row groups, that are used to
-    /// extract the column page index and offset index on a per row group
-    /// per column basis.
+    ///   extract the column page index and offset index on a per row group
+    ///   per column basis.
     ///
     /// # Return Value
     ///
@@ -1486,13 +1486,13 @@ impl<'a> StatisticsConverter<'a> {
     /// # Parameters:
     ///
     /// * `column_offset_index`: The parquet column offset indices, read from
-    /// `ParquetMetaData` offset_index
+    ///   `ParquetMetaData` offset_index
     ///
     /// * `row_group_metadatas`: The metadata slice of the row groups, read
-    /// from `ParquetMetaData` row_groups
+    ///   from `ParquetMetaData` row_groups
     ///
     /// * `row_group_indices`: The indices of the row groups, that are used to
-    /// extract the column offset index on a per row group per column basis.
+    ///   extract the column offset index on a per row group per column basis.
     ///
     /// See docs on [`Self::data_page_mins`] for details.
     pub fn data_page_row_counts<I>(
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index ac48788edb197..18db4dc8eb0a1 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -205,21 +205,21 @@ where
 /// The objects are:
 ///
 /// 1. [`SessionContext`]: Most users should use a `SessionContext`. It contains
-/// all information required to execute queries including  high level APIs such
-/// as [`SessionContext::sql`]. All queries run with the same `SessionContext`
-/// share the same configuration and resources (e.g. memory limits).
+///    all information required to execute queries including  high level APIs such
+///    as [`SessionContext::sql`]. All queries run with the same `SessionContext`
+///    share the same configuration and resources (e.g. memory limits).
 ///
 /// 2. [`SessionState`]: contains information required to plan and execute an
-/// individual query (e.g. creating a [`LogicalPlan`] or [`ExecutionPlan`]).
-/// Each query is planned and executed using its own `SessionState`, which can
-/// be created with [`SessionContext::state`]. `SessionState` allows finer
-/// grained control over query execution, for example disallowing DDL operations
-/// such as `CREATE TABLE`.
+///    individual query (e.g. creating a [`LogicalPlan`] or [`ExecutionPlan`]).
+///    Each query is planned and executed using its own `SessionState`, which can
+///    be created with [`SessionContext::state`]. `SessionState` allows finer
+///    grained control over query execution, for example disallowing DDL operations
+///    such as `CREATE TABLE`.
 ///
 /// 3. [`TaskContext`] contains the state required for query execution (e.g.
-/// [`ExecutionPlan::execute`]). It contains a subset of information in
-/// [`SessionState`]. `TaskContext` allows executing [`ExecutionPlan`]s
-/// [`PhysicalExpr`]s without requiring a full [`SessionState`].
+///    [`ExecutionPlan::execute`]). It contains a subset of information in
+///    [`SessionState`]. `TaskContext` allows executing [`ExecutionPlan`]s
+///    [`PhysicalExpr`]s without requiring a full [`SessionState`].
 ///
 /// [`PhysicalExpr`]: crate::physical_expr::PhysicalExpr
 #[derive(Clone)]
@@ -578,8 +578,8 @@ impl SessionContext {
     /// Create a [`PhysicalExpr`] from an [`Expr`] after applying type
     /// coercion and function rewrites.
     ///
-    /// Note: The expression is not [simplified] or otherwise optimized:  `a = 1
-    /// + 2` will not be simplified to `a = 3` as this is a more involved process.
+    /// Note: The expression is not [simplified] or otherwise optimized:
+    /// `a = 1 + 2` will not be simplified to `a = 3` as this is a more involved process.
     /// See the [expr_api] example for how to simplify expressions.
     ///
     /// # Example
@@ -980,6 +980,7 @@ impl SessionContext {
     ///
     /// - `SELECT MY_FUNC(x)...` will look for a function named `"my_func"`
     /// - `SELECT "my_FUNC"(x)` will look for a function named `"my_FUNC"`
+    ///
     /// Any functions registered with the udf name or its aliases will be overwritten with this new function
     pub fn register_udf(&self, f: ScalarUDF) {
         let mut state = self.state.write();
@@ -1324,11 +1325,11 @@ impl SessionContext {
     /// Notes:
     ///
     /// 1. `query_execution_start_time` is set to the current time for the
-    /// returned state.
+    ///    returned state.
     ///
     /// 2. The returned state is not shared with the current session state
-    /// and this changes to the returned `SessionState` such as changing
-    /// [`ConfigOptions`] will not be reflected in this `SessionContext`.
+    ///    and this changes to the returned `SessionState` such as changing
+    ///    [`ConfigOptions`] will not be reflected in this `SessionContext`.
     ///
     /// [`ConfigOptions`]: crate::config::ConfigOptions
     pub fn state(&self) -> SessionState {
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 59cc620dae4d0..515888519fce9 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -657,8 +657,8 @@ impl SessionState {
     /// Create a [`PhysicalExpr`] from an [`Expr`] after applying type
     /// coercion, and function rewrites.
     ///
-    /// Note: The expression is not [simplified] or otherwise optimized:  `a = 1
-    /// + 2` will not be simplified to `a = 3` as this is a more involved process.
+    /// Note: The expression is not [simplified] or otherwise optimized:
+    /// `a = 1 + 2` will not be simplified to `a = 3` as this is a more involved process.
     /// See the [expr_api] example for how to simplify expressions.
     ///
     /// # See Also:
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index d9ab9e1c07dd8..59a1106462766 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -145,13 +145,13 @@
 //! DataFusion's Architecture Goals are:
 //!
 //! 1. Work “out of the box”: Provide a very fast, world class query engine with
-//! minimal setup or required configuration.
+//!    minimal setup or required configuration.
 //!
 //! 2. Customizable everything: All behavior should be customizable by
-//! implementing traits.
+//!    implementing traits.
 //!
 //! 3. Architecturally boring 🥱: Follow industrial best practice rather than
-//! trying cutting edge, but unproven, techniques.
+//!    trying cutting edge, but unproven, techniques.
 //!
 //! With these principles, users start with a basic, high-performance engine
 //! and specialize it over time to suit their needs and available engineering
@@ -219,11 +219,11 @@
 //! ```
 //!
 //! 1. The query string is parsed to an Abstract Syntax Tree (AST)
-//! [`Statement`] using [sqlparser].
+//!    [`Statement`] using [sqlparser].
 //!
 //! 2. The AST is converted to a [`LogicalPlan`] and logical
-//! expressions [`Expr`]s to compute the desired result by the
-//! [`SqlToRel`] planner.
+//!    expressions [`Expr`]s to compute the desired result by the
+//!    [`SqlToRel`] planner.
 //!
 //! [`Statement`]: https://docs.rs/sqlparser/latest/sqlparser/ast/enum.Statement.html
 //!
@@ -255,17 +255,17 @@
 //! optimizing, in the following manner:
 //!
 //! 1. The [`LogicalPlan`] is checked and rewritten to enforce
-//! semantic rules, such as type coercion, by [`AnalyzerRule`]s
+//!    semantic rules, such as type coercion, by [`AnalyzerRule`]s
 //!
 //! 2. The [`LogicalPlan`] is rewritten by [`OptimizerRule`]s, such as
-//! projection and filter pushdown, to improve its efficiency.
+//!    projection and filter pushdown, to improve its efficiency.
 //!
 //! 3. The [`LogicalPlan`] is converted to an [`ExecutionPlan`] by a
-//! [`PhysicalPlanner`]
+//!    [`PhysicalPlanner`]
 //!
 //! 4. The [`ExecutionPlan`] is rewritten by
-//! [`PhysicalOptimizerRule`]s, such as sort and join selection, to
-//! improve its efficiency.
+//!    [`PhysicalOptimizerRule`]s, such as sort and join selection, to
+//!    improve its efficiency.
 //!
 //! ## Data Sources
 //!
@@ -291,9 +291,9 @@
 //! an [`ExecutionPlan`]s for execution.
 //!
 //! 1. [`ListingTable`]: Reads data from Parquet, JSON, CSV, or AVRO
-//! files.  Supports single files or multiple files with HIVE style
-//! partitioning, optional compression, directly reading from remote
-//! object store and more.
+//!    files.  Supports single files or multiple files with HIVE style
+//!    partitioning, optional compression, directly reading from remote
+//!    object store and more.
 //!
 //! 2. [`MemTable`]: Reads data from in memory [`RecordBatch`]es.
 //!
@@ -425,13 +425,13 @@
 //! structures:
 //!
 //! 1. [`SessionContext`]: State needed for create [`LogicalPlan`]s such
-//! as the table definitions, and the function registries.
+//!    as the table definitions, and the function registries.
 //!
 //! 2. [`TaskContext`]: State needed for execution such as the
-//! [`MemoryPool`], [`DiskManager`], and [`ObjectStoreRegistry`].
+//!    [`MemoryPool`], [`DiskManager`], and [`ObjectStoreRegistry`].
 //!
 //! 3. [`ExecutionProps`]: Per-execution properties and data (such as
-//! starting timestamps, etc).
+//!    starting timestamps, etc).
 //!
 //! [`SessionContext`]: crate::execution::context::SessionContext
 //! [`TaskContext`]: crate::execution::context::TaskContext
diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
index 5f08e4512b3a0..a8332d1d55e46 100644
--- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
@@ -103,6 +103,7 @@ impl PhysicalOptimizerRule for AggregateStatistics {
 /// assert if the node passed as argument is a final `AggregateExec` node that can be optimized:
 /// - its child (with possible intermediate layers) is a partial `AggregateExec` node
 /// - they both have no grouping expression
+///
 /// If this is the case, return a ref to the partial `AggregateExec`, else `None`.
 /// We would have preferred to return a casted ref to AggregateExec but the recursion requires
 /// the `ExecutionPlan.children()` method that returns an owned reference.
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 62ac9089e2b4d..acca2ed8d997c 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -856,6 +856,7 @@ fn add_roundrobin_on_top(
 /// Adds a hash repartition operator:
 /// - to increase parallelism, and/or
 /// - to satisfy requirements of the subsequent operators.
+///
 /// Repartition(Hash) is added on top of operator `input`.
 ///
 /// # Arguments
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index e577c5336086a..cf9d33252ad9d 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -20,6 +20,7 @@
 //! - Adds a [`SortExec`] when a requirement is not met,
 //! - Removes an already-existing [`SortExec`] if it is possible to prove
 //!   that this sort is unnecessary
+//!
 //! The rule can work on valid *and* invalid physical plans with respect to
 //! sorting requirements, but always produces a valid physical plan in this sense.
 //!
diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs
index 3c18e53497fd1..0ef390fff45c7 100644
--- a/datafusion/core/src/physical_optimizer/pruning.rs
+++ b/datafusion/core/src/physical_optimizer/pruning.rs
@@ -173,10 +173,10 @@ pub trait PruningStatistics {
 /// 1. Arbitrary expressions (including user defined functions)
 ///
 /// 2. Vectorized evaluation (provide more than one set of statistics at a time)
-/// so it is suitable for pruning 1000s of containers.
+///    so it is suitable for pruning 1000s of containers.
 ///
 /// 3. Any source of information that implements the [`PruningStatistics`] trait
-/// (not just Parquet metadata).
+///    (not just Parquet metadata).
 ///
 /// # Example
 ///
@@ -278,17 +278,17 @@ pub trait PruningStatistics {
 /// 2. A predicate (expression that evaluates to a boolean)
 ///
 /// 3. [`PruningStatistics`] that provides information about columns in that
-/// schema, for multiple “containers”. For each column in each container, it
-/// provides optional information on contained values, min_values, max_values,
-/// null_counts counts, and row_counts counts.
+///    schema, for multiple “containers”. For each column in each container, it
+///    provides optional information on contained values, min_values, max_values,
+///    null_counts counts, and row_counts counts.
 ///
 /// **Outputs**:
 /// A (non null) boolean value for each container:
 /// * `true`: There MAY be rows that match the predicate
 ///
 /// * `false`: There are no rows that could possibly match the predicate (the
-/// predicate can never possibly be true). The container can be pruned (skipped)
-/// entirely.
+///   predicate can never possibly be true). The container can be pruned (skipped)
+///   entirely.
 ///
 /// Note that in order to be correct, `PruningPredicate` must return false
 /// **only** if it can determine that for all rows in the container, the
diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs
index 92ed1b2918de0..3df212d466c9f 100644
--- a/datafusion/execution/src/memory_pool/mod.rs
+++ b/datafusion/execution/src/memory_pool/mod.rs
@@ -78,10 +78,10 @@ pub use pool::*;
 /// * [`UnboundedMemoryPool`]: no memory limits (the default)
 ///
 /// * [`GreedyMemoryPool`]: Limits memory usage to a fixed size using a "first
-/// come first served" policy
+///   come first served" policy
 ///
 /// * [`FairSpillPool`]: Limits memory usage to a fixed size, allocating memory
-/// to all spilling operators fairly
+///   to all spilling operators fairly
 pub trait MemoryPool: Send + Sync + std::fmt::Debug {
     /// Registers a new [`MemoryConsumer`]
     ///
diff --git a/datafusion/execution/src/object_store.rs b/datafusion/execution/src/object_store.rs
index 9e1d94b346eb4..cd75c9f3c49ee 100644
--- a/datafusion/execution/src/object_store.rs
+++ b/datafusion/execution/src/object_store.rs
@@ -110,11 +110,11 @@ impl std::fmt::Display for ObjectStoreUrl {
 /// instances. For example DataFusion might be configured so that
 ///
 /// 1. `s3://my_bucket/lineitem/` mapped to the `/lineitem` path on an
-/// AWS S3 object store bound to `my_bucket`
+///    AWS S3 object store bound to `my_bucket`
 ///
 /// 2. `s3://my_other_bucket/lineitem/` mapped to the (same)
-/// `/lineitem` path on a *different* AWS S3 object store bound to
-/// `my_other_bucket`
+///    `/lineitem` path on a *different* AWS S3 object store bound to
+///    `my_other_bucket`
 ///
 /// When given a [`ListingTableUrl`], DataFusion tries to find an
 /// appropriate [`ObjectStore`]. For example
@@ -127,21 +127,21 @@ impl std::fmt::Display for ObjectStoreUrl {
 /// [`ObjectStoreRegistry::get_store`] and one of three things will happen:
 ///
 /// - If an [`ObjectStore`] has been registered with [`ObjectStoreRegistry::register_store`] with
-/// `s3://my_bucket`, that [`ObjectStore`] will be returned
+///   `s3://my_bucket`, that [`ObjectStore`] will be returned
 ///
 /// - If an AWS S3 object store can be ad-hoc discovered by the url `s3://my_bucket/lineitem/`, this
-/// object store will be registered with key `s3://my_bucket` and returned.
+///   object store will be registered with key `s3://my_bucket` and returned.
 ///
 /// - Otherwise an error will be returned, indicating that no suitable [`ObjectStore`] could
-/// be found
+///   be found
 ///
 /// This allows for two different use-cases:
 ///
 /// 1. Systems where object store buckets are explicitly created using DDL, can register these
-/// buckets using [`ObjectStoreRegistry::register_store`]
+///    buckets using [`ObjectStoreRegistry::register_store`]
 ///
 /// 2. Systems relying on ad-hoc discovery, without corresponding DDL, can create [`ObjectStore`]
-/// lazily by providing a custom implementation of [`ObjectStoreRegistry`]
+///    lazily by providing a custom implementation of [`ObjectStoreRegistry`]
 ///
 /// <!-- is in a different crate so normal rustdoc links don't work -->
 /// [`ListingTableUrl`]: https://docs.rs/datafusion/latest/datafusion/datasource/listing/struct.ListingTableUrl.html
diff --git a/datafusion/expr/src/accumulator.rs b/datafusion/expr/src/accumulator.rs
index 031348269a38a..f9af7850cb924 100644
--- a/datafusion/expr/src/accumulator.rs
+++ b/datafusion/expr/src/accumulator.rs
@@ -35,12 +35,12 @@ use std::fmt::Debug;
 /// * compute the final value from its internal state via [`evaluate`]
 ///
 /// * retract an update to its state from given inputs via
-/// [`retract_batch`] (when used as a window aggregate [window
-/// function])
+///   [`retract_batch`] (when used as a window aggregate [window
+///   function])
 ///
 /// * convert its internal state to a vector of aggregate values via
-/// [`state`] and combine the state from multiple accumulators'
-/// via [`merge_batch`], as part of efficient multi-phase grouping.
+///   [`state`] and combine the state from multiple accumulators'
+///   via [`merge_batch`], as part of efficient multi-phase grouping.
 ///
 /// [`GroupsAccumulator`]: crate::GroupsAccumulator
 /// [`update_batch`]: Self::update_batch
diff --git a/datafusion/expr/src/groups_accumulator.rs b/datafusion/expr/src/groups_accumulator.rs
index 2ffbfb266e9ca..0d57c403bbe0b 100644
--- a/datafusion/expr/src/groups_accumulator.rs
+++ b/datafusion/expr/src/groups_accumulator.rs
@@ -84,13 +84,13 @@ pub trait GroupsAccumulator: Send {
     /// * `values`: the input arguments to the accumulator
     ///
     /// * `group_indices`: To which groups do the rows in `values`
-    /// belong, group id)
+    ///   belong, group id)
     ///
     /// * `opt_filter`: if present, only update aggregate state using
-    /// `values[i]` if `opt_filter[i]` is true
+    ///   `values[i]` if `opt_filter[i]` is true
     ///
     /// * `total_num_groups`: the number of groups (the largest
-    /// group_index is thus `total_num_groups - 1`).
+    ///   group_index is thus `total_num_groups - 1`).
     ///
     /// Note that subsequent calls to update_batch may have larger
     /// total_num_groups as new groups are seen.
diff --git a/datafusion/expr/src/interval_arithmetic.rs b/datafusion/expr/src/interval_arithmetic.rs
index d0dd418c78e7e..553cdd8c87097 100644
--- a/datafusion/expr/src/interval_arithmetic.rs
+++ b/datafusion/expr/src/interval_arithmetic.rs
@@ -989,7 +989,8 @@ fn div_bounds<const UPPER: bool>(
 /// results are converted to an *unbounded endpoint* if:
 ///   - We are calculating an upper bound and we have a positive overflow.
 ///   - We are calculating a lower bound and we have a negative overflow.
-/// Otherwise; the function sets the endpoint as:
+///
+/// Otherwise, the function sets the endpoint as:
 ///   - The minimum representable number with the given datatype (`dt`) if
 ///     we are calculating an upper bound and we have a negative overflow.
 ///   - The maximum representable number with the given datatype (`dt`) if
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index d4fe233cac06e..54c857a2b7013 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -1910,19 +1910,19 @@ pub struct EmptyRelation {
 /// From the [Postgres Docs]:
 ///
 /// 1. Evaluate the non-recursive term. For `UNION` (but not `UNION ALL`),
-/// discard duplicate rows. Include all remaining rows in the result of the
-/// recursive query, and also place them in a temporary working table.
-//
+///    discard duplicate rows. Include all remaining rows in the result of the
+///    recursive query, and also place them in a temporary working table.
+///
 /// 2. So long as the working table is not empty, repeat these steps:
 ///
 /// * Evaluate the recursive term, substituting the current contents of the
-/// working table for the recursive self-reference. For `UNION` (but not `UNION
-/// ALL`), discard duplicate rows and rows that duplicate any previous result
-/// row. Include all remaining rows in the result of the recursive query, and
-/// also place them in a temporary intermediate table.
+///   working table for the recursive self-reference. For `UNION` (but not `UNION
+///   ALL`), discard duplicate rows and rows that duplicate any previous result
+///   row. Include all remaining rows in the result of the recursive query, and
+///   also place them in a temporary intermediate table.
 ///
 /// * Replace the contents of the working table with the contents of the
-/// intermediate table, then empty the intermediate table.
+///   intermediate table, then empty the intermediate table.
 ///
 /// [Postgres Docs]: https://www.postgresql.org/docs/current/queries-with.html#QUERIES-WITH-RECURSIVE
 #[derive(Clone, PartialEq, Eq, Hash)]
@@ -2003,7 +2003,7 @@ impl Projection {
 /// # Arguments
 ///
 /// * `input`: A reference to the input `LogicalPlan` for which the projection schema
-/// will be computed.
+///   will be computed.
 /// * `exprs`: A slice of `Expr` expressions representing the projection operation to apply.
 ///
 /// # Returns
diff --git a/datafusion/expr/src/simplify.rs b/datafusion/expr/src/simplify.rs
index ccf45ff0d0486..a55cb49b1f402 100644
--- a/datafusion/expr/src/simplify.rs
+++ b/datafusion/expr/src/simplify.rs
@@ -74,7 +74,7 @@ impl<'a> SimplifyContext<'a> {
 impl<'a> SimplifyInfo for SimplifyContext<'a> {
     /// returns true if this Expr has boolean type
     fn is_boolean_type(&self, expr: &Expr) -> Result<bool> {
-        for schema in &self.schema {
+        if let Some(schema) = &self.schema {
             if let Ok(DataType::Boolean) = expr.get_type(schema) {
                 return Ok(true);
             }
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index 8867a478f790a..3f4a99749cf65 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -56,7 +56,7 @@ use crate::{AccumulatorFactoryFunction, ReturnTypeFunction, Signature};
 /// 1. For simple use cases, use [`create_udaf`] (examples in [`simple_udaf.rs`]).
 ///
 /// 2. For advanced use cases, use [`AggregateUDFImpl`] which provides full API
-/// access (examples in [`advanced_udaf.rs`]).
+///    access (examples in [`advanced_udaf.rs`]).
 ///
 /// # API Note
 /// This is a separate struct from `AggregateUDFImpl` to maintain backwards
@@ -346,9 +346,9 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
     /// # Arguments:
     /// 1. `name`: the name of the expression (e.g. AVG, SUM, etc)
     /// 2. `value_type`: Aggregate function output returned by [`Self::return_type`] if defined, otherwise
-    /// it is equivalent to the data type of the first arguments
+    ///    it is equivalent to the data type of the first arguments
     /// 3. `ordering_fields`: the fields used to order the input arguments, if any.
-    ///     Empty if no ordering expression is provided.
+    ///    Empty if no ordering expression is provided.
     ///
     /// # Notes:
     ///
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 1fbb3cc584b34..487b098ea201b 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -44,7 +44,7 @@ use crate::{
 /// 1. For simple use cases, use [`create_udf`] (examples in [`simple_udf.rs`]).
 ///
 /// 2. For advanced use cases, use [`ScalarUDFImpl`] which provides full API
-/// access (examples in  [`advanced_udf.rs`]).
+///    access (examples in  [`advanced_udf.rs`]).
 ///
 /// # API Note
 ///
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 5abce013dfb6f..9e6d963ccf7f4 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -40,10 +40,10 @@ use crate::{
 /// See the documentation on [`PartitionEvaluator`] for more details
 ///
 /// 1. For simple use cases, use [`create_udwf`] (examples in
-/// [`simple_udwf.rs`]).
+///    [`simple_udwf.rs`]).
 ///
 /// 2. For advanced use cases, use [`WindowUDFImpl`] which provides full API
-/// access (examples in [`advanced_udwf.rs`]).
+///    access (examples in [`advanced_udwf.rs`]).
 ///
 /// # API Note
 /// This is a separate struct from `WindowUDFImpl` to maintain backwards
diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs
index 5b2f8982a559e..0e1d917419f8d 100644
--- a/datafusion/expr/src/window_frame.rs
+++ b/datafusion/expr/src/window_frame.rs
@@ -303,11 +303,11 @@ impl WindowFrame {
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum WindowFrameBound {
     /// 1. UNBOUNDED PRECEDING
-    /// The frame boundary is the first row in the partition.
+    ///    The frame boundary is the first row in the partition.
     ///
     /// 2. `<expr>` PRECEDING
-    /// `<expr>` must be a non-negative constant numeric expression. The boundary is a row that
-    /// is `<expr>` "units" prior to the current row.
+    ///    `<expr>` must be a non-negative constant numeric expression. The boundary is a row that
+    ///    is `<expr>` "units" prior to the current row.
     Preceding(ScalarValue),
     /// 3. The current row.
     ///
@@ -317,10 +317,10 @@ pub enum WindowFrameBound {
     /// boundary.
     CurrentRow,
     /// 4. This is the same as "`<expr>` PRECEDING" except that the boundary is `<expr>` units after the
-    /// current rather than before the current row.
+    ///    current rather than before the current row.
     ///
     /// 5. UNBOUNDED FOLLOWING
-    /// The frame boundary is the last row in the partition.
+    ///    The frame boundary is the last row in the partition.
     Following(ScalarValue),
 }
 
diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs
index 32ca05b8cdd93..1711869666440 100644
--- a/datafusion/functions-aggregate/src/lib.rs
+++ b/datafusion/functions-aggregate/src/lib.rs
@@ -52,7 +52,7 @@
 //! 3. Add a new feature to `Cargo.toml`, with any optional dependencies
 //!
 //! 4. Use the `make_package!` macro to expose the module when the
-//! feature is enabled.
+//!    feature is enabled.
 
 #[macro_use]
 pub mod macros;
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index b1c55c843f71d..81be5552666d9 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -76,7 +76,7 @@
 //! 3. Add a new feature to `Cargo.toml`, with any optional dependencies
 //!
 //! 4. Use the `make_package!` macro to expose the module when the
-//! feature is enabled.
+//!    feature is enabled.
 //!
 //! [`ScalarUDF`]: datafusion_expr::ScalarUDF
 use datafusion_common::Result;
diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs
index 9856ea271ca54..78be48a189f78 100644
--- a/datafusion/optimizer/src/analyzer/subquery.rs
+++ b/datafusion/optimizer/src/analyzer/subquery.rs
@@ -35,8 +35,8 @@ use datafusion_expr::{
 ///    the allowed while list: [Projection, Filter, Window, Aggregate, Join].
 /// 2) Check whether the inner plan is in the allowed inner plans list to use correlated(outer) expressions.
 /// 3) Check and validate unsupported cases to use the correlated(outer) expressions inside the subquery(inner) plans/inner expressions.
-/// For example, we do not want to support to use correlated expressions as the Join conditions in the subquery plan when the Join
-/// is a Full Out Join
+///    For example, we do not want to support to use correlated expressions as the Join conditions in the subquery plan when the Join
+///    is a Full Out Join
 pub fn check_subquery_expr(
     outer_plan: &LogicalPlan,
     inner_plan: &LogicalPlan,
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index bbf2091c22175..70ca6f5304ad5 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -599,7 +599,7 @@ impl CommonSubexprEliminate {
     ///
     /// 1. The original `input` of no common subexpressions were extracted
     /// 2. A newly added projection on top of the original input
-    /// that computes the common subexpressions
+    ///    that computes the common subexpressions
     fn try_unary_plan(
         &self,
         expr: Vec<Expr>,
diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs
index 332d3e9fe54e9..3b1df3510d2a4 100644
--- a/datafusion/optimizer/src/lib.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -22,11 +22,11 @@
 //! Contains rules for rewriting [`LogicalPlan`]s
 //!
 //! 1. [`Analyzer`] applies [`AnalyzerRule`]s to transform `LogicalPlan`s
-//! to make the plan valid prior to the rest of the DataFusion optimization
-//! process (for example, [`TypeCoercion`]).
+//!    to make the plan valid prior to the rest of the DataFusion optimization
+//!    process (for example, [`TypeCoercion`]).
 //!
 //! 2. [`Optimizer`] applies [`OptimizerRule`]s to transform `LogicalPlan`s
-//! into equivalent, but more efficient plans.
+//!    into equivalent, but more efficient plans.
 //!
 //! [`LogicalPlan`]: datafusion_expr::LogicalPlan
 //! [`TypeCoercion`]: analyzer::type_coercion::TypeCoercion
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index ad9be449d9ab7..ecad3404d5e03 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -557,7 +557,7 @@ fn push_down_join(
 /// * `predicates` the pushed down filter expression
 ///
 /// * `on_filters` filters from the join ON clause that have not already been
-/// identified as join predicates
+///   identified as join predicates
 ///
 fn infer_join_predicates(
     join: &Join,
diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
index 2ccf93e2d5b32..09fdd7685a9cd 100644
--- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs
+++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
@@ -170,7 +170,7 @@ impl<'a> TreeNodeRewriter for GuaranteeRewriter<'a> {
                         .filter_map(|expr| {
                             if let Expr::Literal(item) = expr {
                                 match interval
-                                    .contains(&NullableInterval::from(item.clone()))
+                                    .contains(NullableInterval::from(item.clone()))
                                 {
                                     // If we know for certain the value isn't in the column's interval,
                                     // we can skip checking it.
diff --git a/datafusion/optimizer/src/simplify_expressions/regex.rs b/datafusion/optimizer/src/simplify_expressions/regex.rs
index 9a78298b10a7c..6c99f18ab0f64 100644
--- a/datafusion/optimizer/src/simplify_expressions/regex.rs
+++ b/datafusion/optimizer/src/simplify_expressions/regex.rs
@@ -216,6 +216,7 @@ fn is_anchored_capture(v: &[Hir]) -> bool {
 /// Returns the `LIKE` pattern if the `Concat` pattern is partial anchored:
 /// - `[Look::Start, Literal(_)]`
 /// - `[Literal(_), Look::End]`
+///
 /// Full anchored patterns are handled by [`anchored_literal_to_expr`].
 fn partial_anchored_literal_to_like(v: &[Hir]) -> Option<String> {
     if v.len() != 2 {
diff --git a/datafusion/physical-expr-common/src/aggregate/utils.rs b/datafusion/physical-expr-common/src/aggregate/utils.rs
index bcd0d05be054d..9e380bd820ff4 100644
--- a/datafusion/physical-expr-common/src/aggregate/utils.rs
+++ b/datafusion/physical-expr-common/src/aggregate/utils.rs
@@ -207,7 +207,7 @@ impl<T: DecimalType> DecimalAverager<T> {
     /// target_scale and target_precision and reporting overflow.
     ///
     /// * sum: The total sum value stored as Decimal128 with sum_scale
-    /// (passed to `Self::try_new`)
+    ///   (passed to `Self::try_new`)
     /// * count: total count, stored as a i128/i256 (*NOT* a Decimal128/Decimal256 value)
     #[inline(always)]
     pub fn avg(&self, sum: T::Native, count: T::Native) -> Result<T::Native> {
diff --git a/datafusion/physical-expr-common/src/binary_map.rs b/datafusion/physical-expr-common/src/binary_map.rs
index 23280701013de..a5da05d2a5351 100644
--- a/datafusion/physical-expr-common/src/binary_map.rs
+++ b/datafusion/physical-expr-common/src/binary_map.rs
@@ -113,13 +113,13 @@ impl<O: OffsetSizeTrait> ArrowBytesSet<O> {
 /// This is a specialized HashMap with the following properties:
 ///
 /// 1. Optimized for storing and emitting Arrow byte types  (e.g.
-/// `StringArray` / `BinaryArray`) very efficiently by minimizing copying of
-/// the string values themselves, both when inserting and when emitting the
-/// final array.
+///    `StringArray` / `BinaryArray`) very efficiently by minimizing copying of
+///    the string values themselves, both when inserting and when emitting the
+///    final array.
 ///
 ///
 /// 2. Retains the insertion order of entries in the final array. The values are
-/// in the same order as they were inserted.
+///    in the same order as they were inserted.
 ///
 /// Note this structure can be used as a `HashSet` by specifying the value type
 /// as `()`, as is done by [`ArrowBytesSet`].
@@ -134,18 +134,18 @@ impl<O: OffsetSizeTrait> ArrowBytesSet<O> {
 /// "Foo", NULL, "Bar", "TheQuickBrownFox":
 ///
 /// * `hashtable` stores entries for each distinct string that has been
-/// inserted. The entries contain the payload as well as information about the
-/// value (either an offset or the actual bytes, see `Entry` docs for more
-/// details)
+///   inserted. The entries contain the payload as well as information about the
+///   value (either an offset or the actual bytes, see `Entry` docs for more
+///   details)
 ///
 /// * `offsets` stores offsets into `buffer` for each distinct string value,
-/// following the same convention as the offsets in a `StringArray` or
-/// `LargeStringArray`.
+///   following the same convention as the offsets in a `StringArray` or
+///   `LargeStringArray`.
 ///
 /// * `buffer` stores the actual byte data
 ///
 /// * `null`: stores the index and payload of the null value, in this case the
-/// second value (index 1)
+///   second value (index 1)
 ///
 /// ```text
 /// ┌───────────────────────────────────┐    ┌─────┐    ┌────┐
diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs
index c74fb9c2d1b79..e62606a42e6fb 100644
--- a/datafusion/physical-expr-common/src/physical_expr.rs
+++ b/datafusion/physical-expr-common/src/physical_expr.rs
@@ -81,7 +81,7 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq<dyn Any> {
     /// # Arguments
     ///
     /// * `children` are the intervals for the children (inputs) of this
-    /// expression.
+    ///   expression.
     ///
     /// # Example
     ///
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs
index 64c22064d4b79..a6e9fba281676 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -1056,22 +1056,22 @@ impl EquivalenceProperties {
 ///
 /// Order information can be retrieved as:
 /// - If it is a leaf node, we directly find the order of the node by looking
-/// at the given sort expression and equivalence properties if it is a `Column`
-/// leaf, or we mark it as unordered. In the case of a `Literal` leaf, we mark
-/// it as singleton so that it can cooperate with all ordered columns.
+///   at the given sort expression and equivalence properties if it is a `Column`
+///   leaf, or we mark it as unordered. In the case of a `Literal` leaf, we mark
+///   it as singleton so that it can cooperate with all ordered columns.
 /// - If it is an intermediate node, the children states matter. Each `PhysicalExpr`
-/// and operator has its own rules on how to propagate the children orderings.
-/// However, before we engage in recursion, we check whether this intermediate
-/// node directly matches with the sort expression. If there is a match, the
-/// sort expression emerges at that node immediately, discarding the recursive
-/// result coming from its children.
+///   and operator has its own rules on how to propagate the children orderings.
+///   However, before we engage in recursion, we check whether this intermediate
+///   node directly matches with the sort expression. If there is a match, the
+///   sort expression emerges at that node immediately, discarding the recursive
+///   result coming from its children.
 ///
 /// Range information is calculated as:
 /// - If it is a `Literal` node, we set the range as a point value. If it is a
-/// `Column` node, we set the datatype of the range, but cannot give an interval
-/// for the range, yet.
+///   `Column` node, we set the datatype of the range, but cannot give an interval
+///   for the range, yet.
 /// - If it is an intermediate node, the children states matter. Each `PhysicalExpr`
-/// and operator has its own rules on how to propagate the children range.
+///   and operator has its own rules on how to propagate the children range.
 fn update_properties(
     mut node: ExprPropertiesNode,
     eq_properties: &EquivalenceProperties,
diff --git a/datafusion/physical-expr/src/utils/guarantee.rs b/datafusion/physical-expr/src/utils/guarantee.rs
index 993ff56100630..4385066529e74 100644
--- a/datafusion/physical-expr/src/utils/guarantee.rs
+++ b/datafusion/physical-expr/src/utils/guarantee.rs
@@ -62,14 +62,14 @@ use std::sync::Arc;
 /// A guarantee can be one of two forms:
 ///
 /// 1. The column must be one the values for the predicate to be `true`. If the
-/// column takes on any other value, the predicate can not evaluate to `true`.
-/// For example,
-/// `(a = 1)`, `(a = 1 OR a = 2)` or `a IN (1, 2, 3)`
+///    column takes on any other value, the predicate can not evaluate to `true`.
+///    For example,
+///    `(a = 1)`, `(a = 1 OR a = 2)` or `a IN (1, 2, 3)`
 ///
 /// 2. The column must NOT be one of the values for the predicate to be `true`.
-/// If the column can ONLY take one of these values, the predicate can not
-/// evaluate to `true`. For example,
-/// `(a != 1)`, `(a != 1 AND a != 2)` or `a NOT IN (1, 2, 3)`
+///    If the column can ONLY take one of these values, the predicate can not
+///    evaluate to `true`. For example,
+///    `(a != 1)`, `(a != 1 AND a != 2)` or `a NOT IN (1, 2, 3)`
 #[derive(Debug, Clone, PartialEq)]
 pub struct LiteralGuarantee {
     pub column: Column,
@@ -374,6 +374,7 @@ impl<'a> ColOpLit<'a> {
     /// 1. `col <op> literal`
     /// 2. `literal <op> col`
     /// 3. operator is `=` or `!=`
+    ///
     /// Returns None otherwise
     fn try_new(expr: &'a Arc<dyn PhysicalExpr>) -> Option<Self> {
         let binary_expr = expr
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 00fc81ebde978..d3f66bdea93d5 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [lints]
 workspace = true
 
+[features]
+force_hash_collisions = []
+
 [lib]
 name = "datafusion_physical_plan"
 path = "src/lib.rs"
diff --git a/datafusion/physical-plan/src/aggregates/order/mod.rs b/datafusion/physical-plan/src/aggregates/order/mod.rs
index 556103e1e2228..1d94d56df1383 100644
--- a/datafusion/physical-plan/src/aggregates/order/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/order/mod.rs
@@ -87,7 +87,7 @@ impl GroupOrdering {
     /// Called when new groups are added in a batch
     ///
     /// * `total_num_groups`: total number of groups (so max
-    /// group_index is total_num_groups - 1).
+    ///   group_index is total_num_groups - 1).
     ///
     /// * `group_values`: group key values for *each row* in the batch
     ///
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 16b3a4f2febd9..4ed4d9ba2534f 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -180,9 +180,9 @@ impl JoinLeftData {
 /// Execution proceeds in 2 stages:
 ///
 /// 1. the **build phase** creates a hash table from the tuples of the build side,
-/// and single concatenated batch containing data from all fetched record batches.
-/// Resulting hash table stores hashed join-key fields for each row as a key, and
-/// indices of corresponding rows in concatenated batch.
+///    and single concatenated batch containing data from all fetched record batches.
+///    Resulting hash table stores hashed join-key fields for each row as a key, and
+///    indices of corresponding rows in concatenated batch.
 ///
 /// Hash join uses LIFO data structure as a hash table, and in order to retain
 /// original build-side input order while obtaining data during probe phase, hash
@@ -223,7 +223,7 @@ impl JoinLeftData {
 /// ```
 ///
 /// 2. the **probe phase** where the tuples of the probe side are streamed
-/// through, checking for matches of the join keys in the hash table.
+///    through, checking for matches of the join keys in the hash table.
 ///
 /// ```text
 ///                 ┌────────────────┐          ┌────────────────┐
@@ -1092,7 +1092,7 @@ impl ProcessProbeBatchState {
 /// 1. Reads the entire left input (build) and constructs a hash table
 ///
 /// 2. Streams [RecordBatch]es as they arrive from the right input (probe) and joins
-/// them with the contents of the hash table
+///    them with the contents of the hash table
 struct HashJoinStream {
     /// Input schema
     schema: Arc<Schema>,
@@ -1583,6 +1583,7 @@ mod tests {
     use rstest::*;
     use rstest_reuse::*;
 
+    #[cfg(not(feature = "force_hash_collisions"))]
     fn div_ceil(a: usize, b: usize) -> usize {
         (a + b - 1) / b
     }
@@ -1930,6 +1931,8 @@ mod tests {
         Ok(())
     }
 
+    // FIXME(#TODO) test fails with feature `force_hash_collisions`
+    #[cfg(not(feature = "force_hash_collisions"))]
     #[apply(batch_sizes)]
     #[tokio::test]
     async fn join_inner_two(batch_size: usize) -> Result<()> {
@@ -1985,6 +1988,8 @@ mod tests {
     }
 
     /// Test where the left has 2 parts, the right with 1 part => 1 part
+    // FIXME(#TODO) test fails with feature `force_hash_collisions`
+    #[cfg(not(feature = "force_hash_collisions"))]
     #[apply(batch_sizes)]
     #[tokio::test]
     async fn join_inner_one_two_parts_left(batch_size: usize) -> Result<()> {
@@ -2097,6 +2102,8 @@ mod tests {
     }
 
     /// Test where the left has 1 part, the right has 2 parts => 2 parts
+    // FIXME(#TODO) test fails with feature `force_hash_collisions`
+    #[cfg(not(feature = "force_hash_collisions"))]
     #[apply(batch_sizes)]
     #[tokio::test]
     async fn join_inner_one_two_parts_right(batch_size: usize) -> Result<()> {
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 2299b7ff07f13..1bf2ef2fd5f72 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -94,7 +94,7 @@ const HASHMAP_SHRINK_SCALE_FACTOR: usize = 4;
 ///   - If so record the visited rows. If the matched row results must be produced (INNER, LEFT), output the [RecordBatch].
 ///   - Try to prune other side (probe) with new [RecordBatch].
 ///   - If the join type indicates that the unmatched rows results must be produced (LEFT, FULL etc.),
-/// output the [RecordBatch] when a pruning happens or at the end of the data.
+///     output the [RecordBatch] when a pruning happens or at the end of the data.
 ///
 ///
 /// ``` text
@@ -584,7 +584,7 @@ impl Stream for SymmetricHashJoinStream {
 ///
 /// * `buffer`: The record batch to be pruned.
 /// * `build_side_filter_expr`: The filter expression on the build side used
-/// to determine the pruning length.
+///   to determine the pruning length.
 ///
 /// # Returns
 ///
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 13ff63c174055..5e82c6dab8fa1 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -90,14 +90,14 @@ impl ExternalSorterMetrics {
 /// 1. get a non-empty new batch from input
 ///
 /// 2. check with the memory manager there is sufficient space to
-///   buffer the batch in memory 2.1 if memory sufficient, buffer
-///   batch in memory, go to 1.
+///    buffer the batch in memory 2.1 if memory sufficient, buffer
+///    batch in memory, go to 1.
 ///
 /// 2.2 if no more memory is available, sort all buffered batches and
 ///     spill to file.  buffer the next batch in memory, go to 1.
 ///
 /// 3. when input is exhausted, merge all in memory batches and spills
-/// to get a total order.
+///    to get a total order.
 ///
 /// # When data fits in available memory
 ///
@@ -321,7 +321,7 @@ impl ExternalSorter {
     /// 1. An in-memory sort/merge (if the input fit in memory)
     ///
     /// 2. A combined streaming merge incorporating both in-memory
-    /// batches and data from spill files on disk.
+    ///    batches and data from spill files on disk.
     fn sort(&mut self) -> Result<SendableRecordBatchStream> {
         if self.spilled_before() {
             let mut streams = vec![];
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index bdd56f4b5aa41..67c2aaedbebfa 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -341,8 +341,9 @@ fn flatten_struct_cols(
 
 /// For each row in a `RecordBatch`, some list/struct columns need to be unnested.
 /// - For list columns: We will expand the values in each list into multiple rows,
-/// taking the longest length among these lists, and shorter lists are padded with NULLs.
+///   taking the longest length among these lists, and shorter lists are padded with NULLs.
 /// - For struct columns: We will expand the struct columns into multiple subfield columns.
+///
 /// For columns that don't need to be unnested, repeat their values until reaching the longest length.
 fn build_batch(
     batch: &RecordBatch,
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index ffe558e215839..a462430ca3819 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -579,6 +579,7 @@ pub fn get_best_fitting_window(
 ///   (input ordering is not sufficient to run current window executor).
 /// - A `Some((bool, InputOrderMode))` value indicates that the window operator
 ///   can run with existing input ordering, so we can remove `SortExec` before it.
+///
 /// The `bool` field in the return value represents whether we should reverse window
 /// operator to remove `SortExec` before it. The `InputOrderMode` field represents
 /// the mode this window operator should work in to accommodate the existing ordering.
diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs
index f53cab5df8482..956f5e17e26f5 100644
--- a/datafusion/sql/src/lib.rs
+++ b/datafusion/sql/src/lib.rs
@@ -20,13 +20,13 @@
 //! This crate provides:
 //!
 //! 1. A SQL parser, [`DFParser`], that translates SQL query text into
-//! an abstract syntax tree (AST), [`Statement`].
+//!    an abstract syntax tree (AST), [`Statement`].
 //!
 //! 2. A SQL query planner [`SqlToRel`] that creates [`LogicalPlan`]s
-//! from [`Statement`]s.
+//!    from [`Statement`]s.
 //!
 //! 3. A SQL [`unparser`] that converts [`Expr`]s and [`LogicalPlan`]s
-//! into SQL query text.
+//!    into SQL query text.
 //!
 //! [`DFParser`]: parser::DFParser
 //! [`Statement`]: parser::Statement
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index a743aa72829de..5a774a2397b34 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -1526,10 +1526,10 @@ mod tests {
     /// that:
     ///
     /// 1. parsing `sql` results in the same [`Statement`] as parsing
-    /// `canonical`.
+    ///    `canonical`.
     ///
     /// 2. re-serializing the result of parsing `sql` produces the same
-    /// `canonical` sql string
+    ///    `canonical` sql string
     fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
         let mut statements = DFParser::parse_sql(sql).unwrap();
         assert_eq!(statements.len(), 1);
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index a70e3e9be9303..483b8093a0332 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -268,6 +268,7 @@ pub(crate) fn normalize_ident(id: Ident) -> String {
 /// Try transform depends on unnest type
 /// - For list column: unnest(col) with type list -> unnest(col) with type list::item
 /// - For struct column: unnest(struct(field1, field2)) -> unnest(struct).field1, unnest(struct).field2
+///
 /// The transformed exprs will be used in the outer projection
 /// If along the path from root to bottom, there are multiple unnest expressions, the transformation
 /// is done only for the bottom expression
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index e70f800bde749..553cdeee908cc 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -251,25 +251,26 @@ SELECT COUNT(*) FROM timestamp_with_tz;
 ----
 131072
 
+# FIXME(#TODO) fails with feature `force_hash_collisions`
 # Perform the query:
-query IPT
-SELECT
-  count,
-  LAG(timestamp, 1) OVER (ORDER BY timestamp),
-  arrow_typeof(LAG(timestamp, 1) OVER (ORDER BY timestamp))
-FROM timestamp_with_tz
-LIMIT 10;
-----
-0 NULL Timestamp(Millisecond, Some("UTC"))
-0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-4 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-14 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+# query IPT
+# SELECT
+#   count,
+#   LAG(timestamp, 1) OVER (ORDER BY timestamp),
+#   arrow_typeof(LAG(timestamp, 1) OVER (ORDER BY timestamp))
+# FROM timestamp_with_tz
+# LIMIT 10;
+# ----
+# 0 NULL Timestamp(Millisecond, Some("UTC"))
+# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+# 4 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+# 14 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
 
 # Test config listing_table_ignore_subdirectory:
 
diff --git a/datafusion/sqllogictest/test_files/sort_merge_join.slt b/datafusion/sqllogictest/test_files/sort_merge_join.slt
index 5a6334602c22f..bebec31b90c09 100644
--- a/datafusion/sqllogictest/test_files/sort_merge_join.slt
+++ b/datafusion/sqllogictest/test_files/sort_merge_join.slt
@@ -238,16 +238,17 @@ SELECT * FROM t1 FULL JOIN t2 ON t1_id = t2_id
 44 d 4 44 x 3
 NULL NULL NULL 55 w 3
 
+# FIXME(#TODO) fails with feature `force_hash_collisions`
 # equijoin_full_and_condition_from_both
-query ITIITI rowsort
-SELECT * FROM t1 FULL JOIN t2 ON t1_id = t2_id AND t2_int <= t1_int
-----
-11 a 1 NULL NULL NULL
-22 b 2 22 y 1
-33 c 3 NULL NULL NULL
-44 d 4 44 x 3
-NULL NULL NULL 11 z 3
-NULL NULL NULL 55 w 3
+# query ITIITI rowsort
+# SELECT * FROM t1 FULL JOIN t2 ON t1_id = t2_id AND t2_int <= t1_int
+# ----
+# 11 a 1 NULL NULL NULL
+# 22 b 2 22 y 1
+# 33 c 3 NULL NULL NULL
+# 44 d 4 44 x 3
+# NULL NULL NULL 11 z 3
+# NULL NULL NULL 55 w 3
 
 statement ok
 DROP TABLE t1;
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 15c447114819e..eebadb239d564 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -151,6 +151,7 @@ fn split_eq_and_noneq_join_predicate_with_nulls_equality(
     let mut nulls_equal_nulls = false;
 
     for expr in exprs {
+        #[allow(clippy::collapsible_match)]
         match expr {
             Expr::BinaryExpr(binary_expr) => match binary_expr {
                 x @ (BinaryExpr {

From d452d51bac7dbac118bc51d11cf7bb33f224f086 Mon Sep 17 00:00:00 2001
From: Xin Li <33629085+xinlifoobar@users.noreply.github.com>
Date: Thu, 25 Jul 2024 13:15:28 -0700
Subject: [PATCH 149/357] Add parser option enable_options_value_normalization
 (#11330)

* draft option enable_options_value_normalization

* Add unit tests

* Fix ci

* Fix bad merge

* Update configs.md

* Fix ci 2

* Fix doc gen

* Fix comments

* Fix ut

* fix format

* fix fmt

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/common/src/config.rs               |   3 +
 .../core/src/execution/session_state.rs       |   2 +
 datafusion/sql/src/cte.rs                     |   2 +-
 datafusion/sql/src/expr/identifier.rs         |   6 +-
 datafusion/sql/src/planner.rs                 |  58 ++++++++--
 datafusion/sql/src/relation/join.rs           |   2 +-
 datafusion/sql/src/select.rs                  |   2 +-
 datafusion/sql/src/statement.rs               | 108 ++++++------------
 datafusion/sql/src/utils.rs                   |  26 ++++-
 datafusion/sql/tests/sql_integration.rs       |  70 +++++++++++-
 .../test_files/information_schema.slt         |   2 +
 docs/source/user-guide/configs.md             |   1 +
 12 files changed, 191 insertions(+), 91 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 1f20bd255027a..c3082546b4971 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -210,6 +210,9 @@ config_namespace! {
         /// When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
         pub enable_ident_normalization: bool, default = true
 
+        /// When set to true, SQL parser will normalize options value (convert value to lowercase)
+        pub enable_options_value_normalization: bool, default = true
+
         /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic,
         /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
         pub dialect: String, default = "generic".to_string()
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 515888519fce9..cc2b44cf1933b 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -512,6 +512,8 @@ impl SessionState {
         ParserOptions {
             parse_float_as_decimal: sql_parser_options.parse_float_as_decimal,
             enable_ident_normalization: sql_parser_options.enable_ident_normalization,
+            enable_options_value_normalization: sql_parser_options
+                .enable_options_value_normalization,
             support_varchar_with_length: sql_parser_options.support_varchar_with_length,
         }
     }
diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs
index 3dfe00e3c5e0b..4c380f0b37a31 100644
--- a/datafusion/sql/src/cte.rs
+++ b/datafusion/sql/src/cte.rs
@@ -38,7 +38,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // Process CTEs from top to bottom
         for cte in with.cte_tables {
             // A `WITH` block can't use the same name more than once
-            let cte_name = self.normalizer.normalize(cte.alias.name.clone());
+            let cte_name = self.ident_normalizer.normalize(cte.alias.name.clone());
             if planner_context.contains_cte(&cte_name) {
                 return plan_err!(
                     "WITH query name {cte_name:?} specified more than once"
diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs
index 9b8356701a40b..049600799f3ce 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -50,7 +50,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             // interpret names with '.' as if they were
             // compound identifiers, but this is not a compound
             // identifier. (e.g. it is "foo.bar" not foo.bar)
-            let normalize_ident = self.normalizer.normalize(id);
+            let normalize_ident = self.ident_normalizer.normalize(id);
 
             // Check for qualified field with unqualified name
             if let Ok((qualifier, _)) =
@@ -96,7 +96,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         if ids[0].value.starts_with('@') {
             let var_names: Vec<_> = ids
                 .into_iter()
-                .map(|id| self.normalizer.normalize(id))
+                .map(|id| self.ident_normalizer.normalize(id))
                 .collect();
             let ty = self
                 .context_provider
@@ -110,7 +110,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         } else {
             let ids = ids
                 .into_iter()
-                .map(|id| self.normalizer.normalize(id))
+                .map(|id| self.ident_normalizer.normalize(id))
                 .collect::<Vec<_>>();
 
             // Currently not supporting more than one nested level
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 901a2ad38d8cc..bf7c3fe0be4f6 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -24,10 +24,10 @@ use arrow_schema::*;
 use datafusion_common::{
     field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, SchemaError,
 };
-use sqlparser::ast::TimezoneInfo;
 use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo};
 use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
 use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias};
+use sqlparser::ast::{TimezoneInfo, Value};
 
 use datafusion_common::TableReference;
 use datafusion_common::{
@@ -38,8 +38,7 @@ use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder};
 use datafusion_expr::utils::find_column_exprs;
 use datafusion_expr::{col, Expr};
 
-use crate::utils::make_decimal_type;
-
+use crate::utils::{make_decimal_type, value_to_string};
 pub use datafusion_expr::planner::ContextProvider;
 
 /// SQL parser options
@@ -48,6 +47,7 @@ pub struct ParserOptions {
     pub parse_float_as_decimal: bool,
     pub enable_ident_normalization: bool,
     pub support_varchar_with_length: bool,
+    pub enable_options_value_normalization: bool,
 }
 
 impl Default for ParserOptions {
@@ -56,6 +56,7 @@ impl Default for ParserOptions {
             parse_float_as_decimal: false,
             enable_ident_normalization: true,
             support_varchar_with_length: true,
+            enable_options_value_normalization: true,
         }
     }
 }
@@ -86,6 +87,32 @@ impl IdentNormalizer {
     }
 }
 
+/// Value Normalizer
+#[derive(Debug)]
+pub struct ValueNormalizer {
+    normalize: bool,
+}
+
+impl Default for ValueNormalizer {
+    fn default() -> Self {
+        Self { normalize: true }
+    }
+}
+
+impl ValueNormalizer {
+    pub fn new(normalize: bool) -> Self {
+        Self { normalize }
+    }
+
+    pub fn normalize(&self, value: Value) -> Option<String> {
+        match (value_to_string(&value), self.normalize) {
+            (Some(s), true) => Some(s.to_ascii_lowercase()),
+            (Some(s), false) => Some(s),
+            (None, _) => None,
+        }
+    }
+}
+
 /// Struct to store the states used by the Planner. The Planner will leverage the states to resolve
 /// CTEs, Views, subqueries and PREPARE statements. The states include
 /// Common Table Expression (CTE) provided with WITH clause and
@@ -184,7 +211,8 @@ impl PlannerContext {
 pub struct SqlToRel<'a, S: ContextProvider> {
     pub(crate) context_provider: &'a S,
     pub(crate) options: ParserOptions,
-    pub(crate) normalizer: IdentNormalizer,
+    pub(crate) ident_normalizer: IdentNormalizer,
+    pub(crate) value_normalizer: ValueNormalizer,
 }
 
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -195,12 +223,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
     /// Create a new query planner
     pub fn new_with_options(context_provider: &'a S, options: ParserOptions) -> Self {
-        let normalize = options.enable_ident_normalization;
+        let ident_normalize = options.enable_ident_normalization;
+        let options_value_normalize = options.enable_options_value_normalization;
 
         SqlToRel {
             context_provider,
             options,
-            normalizer: IdentNormalizer::new(normalize),
+            ident_normalizer: IdentNormalizer::new(ident_normalize),
+            value_normalizer: ValueNormalizer::new(options_value_normalize),
         }
     }
 
@@ -214,7 +244,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 .iter()
                 .any(|x| x.option == ColumnOption::NotNull);
             fields.push(Field::new(
-                self.normalizer.normalize(column.name),
+                self.ident_normalizer.normalize(column.name),
                 data_type,
                 !not_nullable,
             ));
@@ -252,8 +282,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 let default_expr = self
                     .sql_to_expr(default_sql_expr.clone(), &empty_schema, planner_context)
                     .map_err(error_desc)?;
-                column_defaults
-                    .push((self.normalizer.normalize(column.name.clone()), default_expr));
+                column_defaults.push((
+                    self.ident_normalizer.normalize(column.name.clone()),
+                    default_expr,
+                ));
             }
         }
         Ok(column_defaults)
@@ -268,7 +300,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let plan = self.apply_expr_alias(plan, alias.columns)?;
 
         LogicalPlanBuilder::from(plan)
-            .alias(TableReference::bare(self.normalizer.normalize(alias.name)))?
+            .alias(TableReference::bare(
+                self.ident_normalizer.normalize(alias.name),
+            ))?
             .build()
     }
 
@@ -289,7 +323,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             let fields = plan.schema().fields().clone();
             LogicalPlanBuilder::from(plan)
                 .project(fields.iter().zip(idents.into_iter()).map(|(field, ident)| {
-                    col(field.name()).alias(self.normalizer.normalize(ident))
+                    col(field.name()).alias(self.ident_normalizer.normalize(ident))
                 }))?
                 .build()
         }
@@ -415,7 +449,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                             None => Ident::new(format!("c{idx}"))
                         };
                         Ok(Arc::new(Field::new(
-                            self.normalizer.normalize(field_name),
+                            self.ident_normalizer.normalize(field_name),
                             data_type,
                             true,
                         )))
diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs
index ee2e35b550f6b..fb1d00b7e48a5 100644
--- a/datafusion/sql/src/relation/join.rs
+++ b/datafusion/sql/src/relation/join.rs
@@ -115,7 +115,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             JoinConstraint::Using(idents) => {
                 let keys: Vec<Column> = idents
                     .into_iter()
-                    .map(|x| Column::from_name(self.normalizer.normalize(x)))
+                    .map(|x| Column::from_name(self.ident_normalizer.normalize(x)))
                     .collect();
                 LogicalPlanBuilder::from(left)
                     .join_using(right, join_type, keys)?
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 9b105117af157..4de3952dc7ea1 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -576,7 +576,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     &[&[plan.schema()]],
                     &plan.using_columns()?,
                 )?;
-                let name = self.normalizer.normalize(alias);
+                let name = self.ident_normalizer.normalize(alias);
                 // avoiding adding an alias if the column name is the same.
                 let expr = match &col {
                     Expr::Column(column) if column.name.eq(&name) => col,
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 67107bae0202b..218ff54a1a090 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -66,30 +66,6 @@ fn ident_to_string(ident: &Ident) -> String {
     normalize_ident(ident.to_owned())
 }
 
-fn value_to_string(value: &Value) -> Option<String> {
-    match value {
-        Value::SingleQuotedString(s) => Some(s.to_string()),
-        Value::DollarQuotedString(s) => Some(s.to_string()),
-        Value::Number(_, _) | Value::Boolean(_) => Some(value.to_string()),
-        Value::DoubleQuotedString(_)
-        | Value::EscapedStringLiteral(_)
-        | Value::NationalStringLiteral(_)
-        | Value::SingleQuotedByteStringLiteral(_)
-        | Value::DoubleQuotedByteStringLiteral(_)
-        | Value::TripleSingleQuotedString(_)
-        | Value::TripleDoubleQuotedString(_)
-        | Value::TripleSingleQuotedByteStringLiteral(_)
-        | Value::TripleDoubleQuotedByteStringLiteral(_)
-        | Value::SingleQuotedRawStringLiteral(_)
-        | Value::DoubleQuotedRawStringLiteral(_)
-        | Value::TripleSingleQuotedRawStringLiteral(_)
-        | Value::TripleDoubleQuotedRawStringLiteral(_)
-        | Value::HexStringLiteral(_)
-        | Value::Null
-        | Value::Placeholder(_) => None,
-    }
-}
-
 fn object_name_to_string(object_name: &ObjectName) -> String {
     object_name
         .0
@@ -881,25 +857,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
         };
 
-        let mut options = HashMap::new();
-        for (key, value) in statement.options {
-            let value_string = match value_to_string(&value) {
-                None => {
-                    return plan_err!("Unsupported Value in COPY statement {}", value);
-                }
-                Some(v) => v,
-            };
-
-            if !(&key.contains('.')) {
-                // If config does not belong to any namespace, assume it is
-                // a format option and apply the format prefix for backwards
-                // compatibility.
-                let renamed_key = format!("format.{}", key);
-                options.insert(renamed_key.to_lowercase(), value_string.to_lowercase());
-            } else {
-                options.insert(key.to_lowercase(), value_string.to_lowercase());
-            }
-        }
+        let options_map = self.parse_options_map(statement.options, true)?;
 
         let maybe_file_type = if let Some(stored_as) = &statement.stored_as {
             if let Ok(ext_file_type) = self.context_provider.get_file_type(stored_as) {
@@ -946,7 +904,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             output_url: statement.target,
             file_type,
             partition_by,
-            options,
+            options: options_map,
         }))
     }
 
@@ -1007,29 +965,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let inline_constraints = calc_inline_constraints_from_columns(&columns);
         all_constraints.extend(inline_constraints);
 
-        let mut options_map = HashMap::<String, String>::new();
-        for (key, value) in options {
-            if options_map.contains_key(&key) {
-                return plan_err!("Option {key} is specified multiple times");
-            }
-
-            let Some(value_string) = value_to_string(&value) else {
-                return plan_err!(
-                    "Unsupported Value in CREATE EXTERNAL TABLE statement {}",
-                    value
-                );
-            };
-
-            if !(&key.contains('.')) {
-                // If a config does not belong to any namespace, we assume it is
-                // a format option and apply the format prefix for backwards
-                // compatibility.
-                let renamed_key = format!("format.{}", key.to_lowercase());
-                options_map.insert(renamed_key, value_string.to_lowercase());
-            } else {
-                options_map.insert(key.to_lowercase(), value_string.to_lowercase());
-            }
-        }
+        let options_map = self.parse_options_map(options, false)?;
 
         let compression = options_map
             .get("format.compression")
@@ -1081,6 +1017,36 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         )))
     }
 
+    fn parse_options_map(
+        &self,
+        options: Vec<(String, Value)>,
+        allow_duplicates: bool,
+    ) -> Result<HashMap<String, String>> {
+        let mut options_map = HashMap::new();
+        for (key, value) in options {
+            if !allow_duplicates && options_map.contains_key(&key) {
+                return plan_err!("Option {key} is specified multiple times");
+            }
+
+            let Some(value_string) = self.value_normalizer.normalize(value.clone())
+            else {
+                return plan_err!("Unsupported Value {}", value);
+            };
+
+            if !(&key.contains('.')) {
+                // If config does not belong to any namespace, assume it is
+                // a format option and apply the format prefix for backwards
+                // compatibility.
+                let renamed_key = format!("format.{}", key);
+                options_map.insert(renamed_key.to_lowercase(), value_string);
+            } else {
+                options_map.insert(key.to_lowercase(), value_string);
+            }
+        }
+
+        Ok(options_map)
+    }
+
     /// Generate a plan for EXPLAIN ... that will print out a plan
     ///
     /// Note this is the sqlparser explain statement, not the
@@ -1204,7 +1170,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // parse value string from Expr
         let value_string = match &value[0] {
             SQLExpr::Identifier(i) => ident_to_string(i),
-            SQLExpr::Value(v) => match value_to_string(v) {
+            SQLExpr::Value(v) => match crate::utils::value_to_string(v) {
                 None => {
                     return plan_err!("Unsupported Value {}", value[0]);
                 }
@@ -1365,8 +1331,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     None => {
                         // If the target table has an alias, use it to qualify the column name
                         if let Some(alias) = &table_alias {
-                            Expr::Column(Column::new(
-                                Some(self.normalizer.normalize(alias.name.clone())),
+                            datafusion_expr::Expr::Column(Column::new(
+                                Some(self.ident_normalizer.normalize(alias.name.clone())),
                                 field.name(),
                             ))
                         } else {
@@ -1421,7 +1387,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             let mut value_indices = vec![None; table_schema.fields().len()];
             let fields = columns
                 .into_iter()
-                .map(|c| self.normalizer.normalize(c))
+                .map(|c| self.ident_normalizer.normalize(c))
                 .enumerate()
                 .map(|(i, c)| {
                     let column_index = table_schema
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 483b8093a0332..a9a782902ac95 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -32,7 +32,7 @@ use datafusion_expr::builder::get_unnested_columns;
 use datafusion_expr::expr::{Alias, GroupingSet, Unnest, WindowFunction};
 use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs};
 use datafusion_expr::{expr_vec_fmt, Expr, ExprSchemable, LogicalPlan};
-use sqlparser::ast::Ident;
+use sqlparser::ast::{Ident, Value};
 
 /// Make a best-effort attempt at resolving all columns in the expression tree
 pub(crate) fn resolve_columns(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
@@ -263,6 +263,30 @@ pub(crate) fn normalize_ident(id: Ident) -> String {
     }
 }
 
+pub(crate) fn value_to_string(value: &Value) -> Option<String> {
+    match value {
+        Value::SingleQuotedString(s) => Some(s.to_string()),
+        Value::DollarQuotedString(s) => Some(s.to_string()),
+        Value::Number(_, _) | Value::Boolean(_) => Some(value.to_string()),
+        Value::DoubleQuotedString(_)
+        | Value::EscapedStringLiteral(_)
+        | Value::NationalStringLiteral(_)
+        | Value::SingleQuotedByteStringLiteral(_)
+        | Value::DoubleQuotedByteStringLiteral(_)
+        | Value::TripleSingleQuotedString(_)
+        | Value::TripleDoubleQuotedString(_)
+        | Value::TripleSingleQuotedByteStringLiteral(_)
+        | Value::TripleDoubleQuotedByteStringLiteral(_)
+        | Value::SingleQuotedRawStringLiteral(_)
+        | Value::DoubleQuotedRawStringLiteral(_)
+        | Value::TripleSingleQuotedRawStringLiteral(_)
+        | Value::TripleDoubleQuotedRawStringLiteral(_)
+        | Value::HexStringLiteral(_)
+        | Value::Null
+        | Value::Placeholder(_) => None,
+    }
+}
+
 /// The context is we want to rewrite unnest() into InnerProjection->Unnest->OuterProjection
 /// Given an expression which contains unnest expr as one of its children,
 /// Try transform depends on unnest type
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 511f97c4750e3..40a58827b3886 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -28,9 +28,11 @@ use datafusion_common::{
     assert_contains, DataFusionError, ParamValues, Result, ScalarValue,
 };
 use datafusion_expr::{
+    dml::CopyTo,
     logical_plan::{LogicalPlan, Prepare},
     test::function_stub::sum_udaf,
-    ColumnarValue, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, CreateExternalTable, DdlStatement, ScalarUDF, ScalarUDFImpl,
+    Signature, Volatility,
 };
 use datafusion_functions::{string, unicode};
 use datafusion_sql::{
@@ -87,6 +89,7 @@ fn parse_decimals() {
                 parse_float_as_decimal: true,
                 enable_ident_normalization: false,
                 support_varchar_with_length: false,
+                enable_options_value_normalization: false,
             },
         );
     }
@@ -141,6 +144,7 @@ fn parse_ident_normalization() {
                 parse_float_as_decimal: false,
                 enable_ident_normalization,
                 support_varchar_with_length: false,
+                enable_options_value_normalization: false,
             },
         );
         if plan.is_ok() {
@@ -151,6 +155,70 @@ fn parse_ident_normalization() {
     }
 }
 
+#[test]
+fn test_parse_options_value_normalization() {
+    let test_data = [
+        (
+            "CREATE EXTERNAL TABLE test OPTIONS ('location' 'LoCaTiOn') STORED AS PARQUET LOCATION 'fake_location'",
+            "CreateExternalTable: Bare { table: \"test\" }",
+            HashMap::from([("format.location", "LoCaTiOn")]),
+            false,
+        ),
+        (
+            "CREATE EXTERNAL TABLE test OPTIONS ('location' 'LoCaTiOn') STORED AS PARQUET LOCATION 'fake_location'",
+            "CreateExternalTable: Bare { table: \"test\" }",
+            HashMap::from([("format.location", "location")]),
+            true,
+        ),
+        (
+            "COPY test TO 'fake_location' STORED AS PARQUET OPTIONS ('location' 'LoCaTiOn')",
+            "CopyTo: format=csv output_url=fake_location options: (format.location LoCaTiOn)\n  TableScan: test",
+            HashMap::from([("format.location", "LoCaTiOn")]),
+            false,
+        ),
+        (
+            "COPY test TO 'fake_location' STORED AS PARQUET OPTIONS ('location' 'LoCaTiOn')",
+            "CopyTo: format=csv output_url=fake_location options: (format.location location)\n  TableScan: test",
+            HashMap::from([("format.location", "location")]),
+            true,
+        ),
+    ];
+
+    for (sql, expected_plan, expected_options, enable_options_value_normalization) in
+        test_data
+    {
+        let plan = logical_plan_with_options(
+            sql,
+            ParserOptions {
+                parse_float_as_decimal: false,
+                enable_ident_normalization: false,
+                support_varchar_with_length: false,
+                enable_options_value_normalization,
+            },
+        );
+        if let Ok(plan) = plan {
+            assert_eq!(expected_plan, format!("{plan:?}"));
+
+            match plan {
+                LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
+                    CreateExternalTable { options, .. },
+                ))
+                | LogicalPlan::Copy(CopyTo { options, .. }) => {
+                    expected_options.iter().for_each(|(k, v)| {
+                        assert_eq!(Some(&v.to_string()), options.get(*k));
+                    });
+                }
+                _ => panic!(
+                    "Expected Ddl(CreateExternalTable) or Copy(CopyTo) but got {:?}",
+                    plan
+                ),
+            }
+        } else {
+            assert_eq!(expected_plan, plan.unwrap_err().strip_backtrace());
+        }
+    }
+}
+
 #[test]
 fn select_no_relation() {
     quick_test(
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index ddacf1cc6a79c..431060a1f6f80 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -239,6 +239,7 @@ datafusion.optimizer.skip_failed_rules false
 datafusion.optimizer.top_down_join_key_reordering true
 datafusion.sql_parser.dialect generic
 datafusion.sql_parser.enable_ident_normalization true
+datafusion.sql_parser.enable_options_value_normalization true
 datafusion.sql_parser.parse_float_as_decimal false
 datafusion.sql_parser.support_varchar_with_length true
 
@@ -324,6 +325,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan
 datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
 datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
 datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
+datafusion.sql_parser.enable_options_value_normalization true When set to true, SQL parser will normalize options value (convert value to lowercase)
 datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
 datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.
 
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index e4b849cd28bb5..e992361755d36 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -115,5 +115,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.explain.show_schema                                          | false                     | When set to true, the explain statement will print schema information                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.sql_parser.parse_float_as_decimal                            | false                     | When set to true, SQL parser will parse float as decimal type                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.sql_parser.enable_ident_normalization                        | true                      | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.sql_parser.enable_options_value_normalization                | true                      | When set to true, SQL parser will normalize options value (convert value to lowercase)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | datafusion.sql_parser.dialect                                           | generic                   | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | datafusion.sql_parser.support_varchar_with_length                       | true                      | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.                                                                                                                                                                                                                                                                                                                                   |

From dcdcc250cc15f98ad89d381e9655dc41be182ab4 Mon Sep 17 00:00:00 2001
From: Ariel Marcus <ajmarcus@users.noreply.github.com>
Date: Thu, 25 Jul 2024 16:16:30 -0400
Subject: [PATCH 150/357] Add reference to #comet channel in Arrow Rust Discord
 server (#11637)

* Add reference to #comet channel in Arrow Rust Discord server

* list all channels in slack and discord
---
 .../source/contributor-guide/communication.md | 25 ++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/docs/source/contributor-guide/communication.md b/docs/source/contributor-guide/communication.md
index 40cb28bbde008..43d4122002016 100644
--- a/docs/source/contributor-guide/communication.md
+++ b/docs/source/contributor-guide/communication.md
@@ -37,10 +37,33 @@ We use the Slack and Discord platforms for informal discussions and coordination
 meet other contributors and get guidance on where to contribute. It is important to note that any technical designs and
 decisions are made fully in the open, on GitHub.
 
-Most of us use the `#datafusion` and `#arrow-rust` channels in the [ASF Slack
+Most of us use the [ASF Slack
 workspace](https://s.apache.org/slack-invite) and the [Arrow Rust Discord
 server](https://discord.gg/Qw5gKqHxUM) for discussions.
 
+There are specific channels for Arrow, DataFusion, and the DataFusion subprojects (Ballista, Comet, Python, etc).
+
+In Slack we use these channels:
+
+- #arrow
+- #arrow-rust
+- #datafusion
+- #datafusion-ballista
+- #datafusion-comet
+- #datafusion-python
+
+In Discord we use these channels:
+
+- #ballista
+- #comet
+- #contrib-federation
+- #datafusion
+- #datafusion-python
+- #dolomite-optimizer
+- #general
+- #hiring
+- #incremental-materialized-views
+
 Unfortunately, due to spammers, the ASF Slack workspace requires an invitation
 to join. We are happy to invite you -- please ask for an invitation in the
 Discord server.

From fab7e238e8559bdb2e4e0449354f7efeccbc07a7 Mon Sep 17 00:00:00 2001
From: Chris Connelly <chris@connec.co.uk>
Date: Thu, 25 Jul 2024 21:16:51 +0100
Subject: [PATCH 151/357] Add `CsvExecBuilder` for creating `CsvExec` (#11633)

* feat: add `CsvExecBuilder`, deprecate `CsvExec::new`

This adds the `CsvExecBuilder` struct for building a `CsvExec` instance,
and deprecates the `CsvExec::new` method which has grown too large.

There are some `TODO`s related to the duplication of formatting options
and their defaults coming from multiple places. Uses of the deprecated
`new` method have not been updated yet.

* chore: replace usage of deprecated `CsvExec::new` with `CsvExec::builder`

* Add test that CSVExec options are the same

* fmt

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../core/src/datasource/file_format/csv.rs    |  35 +-
 .../core/src/datasource/physical_plan/csv.rs  | 324 ++++++++++++++----
 .../enforce_distribution.rs                   |  89 ++---
 .../physical_optimizer/projection_pushdown.rs |  83 +++--
 .../replace_with_order_preserving_variants.rs |  33 +-
 datafusion/core/src/test/mod.rs               |  73 ++--
 datafusion/proto/src/physical_plan/mod.rs     |  55 +--
 7 files changed, 454 insertions(+), 238 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index e1b6daac092db..c55f678aef0fb 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -344,22 +344,25 @@ impl FileFormat for CsvFormat {
         conf: FileScanConfig,
         _filters: Option<&Arc<dyn PhysicalExpr>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let exec = CsvExec::new(
-            conf,
-            // If format options does not specify whether there is a header,
-            // we consult configuration options.
-            self.options
-                .has_header
-                .unwrap_or(state.config_options().catalog.has_header),
-            self.options.delimiter,
-            self.options.quote,
-            self.options.escape,
-            self.options.comment,
-            self.options
-                .newlines_in_values
-                .unwrap_or(state.config_options().catalog.newlines_in_values),
-            self.options.compression.into(),
-        );
+        // Consult configuration options for default values
+        let has_header = self
+            .options
+            .has_header
+            .unwrap_or(state.config_options().catalog.has_header);
+        let newlines_in_values = self
+            .options
+            .newlines_in_values
+            .unwrap_or(state.config_options().catalog.newlines_in_values);
+
+        let exec = CsvExec::builder(conf)
+            .with_has_header(has_header)
+            .with_delimeter(self.options.delimiter)
+            .with_quote(self.options.quote)
+            .with_escape(self.options.escape)
+            .with_comment(self.options.comment)
+            .with_newlines_in_values(newlines_in_values)
+            .with_file_compression_type(self.options.compression.into())
+            .build();
         Ok(Arc::new(exec))
     }
 
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index fb0e23c6c1648..be437cfb94448 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -49,7 +49,27 @@ use object_store::{GetOptions, GetResultPayload, ObjectStore};
 use tokio::io::AsyncWriteExt;
 use tokio::task::JoinSet;
 
-/// Execution plan for scanning a CSV file
+/// Execution plan for scanning a CSV file.
+///
+/// # Example: create a `CsvExec`
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow::datatypes::Schema;
+/// # use datafusion::datasource::{
+/// #     physical_plan::{CsvExec, FileScanConfig},
+/// #     listing::PartitionedFile,
+/// # };
+/// # use datafusion_execution::object_store::ObjectStoreUrl;
+/// # let object_store_url = ObjectStoreUrl::local_filesystem();
+/// # let file_schema = Arc::new(Schema::empty());
+/// // Create a CsvExec for reading the first 100MB of `file1.csv`
+/// let file_scan_config = FileScanConfig::new(object_store_url, file_schema)
+///     .with_file(PartitionedFile::new("file1.csv", 100*1024*1024));
+/// let exec = CsvExec::builder(file_scan_config)
+///     .with_has_header(true)         // The file has a header row
+///     .with_newlines_in_values(true) // The file contains newlines in values
+///     .build();
+/// ```
 #[derive(Debug, Clone)]
 pub struct CsvExec {
     base_config: FileScanConfig,
@@ -67,27 +87,124 @@ pub struct CsvExec {
     cache: PlanProperties,
 }
 
-impl CsvExec {
-    /// Create a new CSV reader execution plan provided base and specific configurations
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(
-        base_config: FileScanConfig,
-        has_header: bool,
-        delimiter: u8,
-        quote: u8,
-        escape: Option<u8>,
-        comment: Option<u8>,
-        newlines_in_values: bool,
+/// Builder for [`CsvExec`].
+///
+/// See example on [`CsvExec`].
+#[derive(Debug, Clone)]
+pub struct CsvExecBuilder {
+    file_scan_config: FileScanConfig,
+    file_compression_type: FileCompressionType,
+    // TODO: it seems like these format options could be reused across all the various CSV config
+    has_header: bool,
+    delimiter: u8,
+    quote: u8,
+    escape: Option<u8>,
+    comment: Option<u8>,
+    newlines_in_values: bool,
+}
+
+impl CsvExecBuilder {
+    /// Create a new builder to read the provided file scan configuration.
+    pub fn new(file_scan_config: FileScanConfig) -> Self {
+        Self {
+            file_scan_config,
+            // TODO: these defaults are duplicated from `CsvOptions` - should they be computed?
+            has_header: false,
+            delimiter: b',',
+            quote: b'"',
+            escape: None,
+            comment: None,
+            newlines_in_values: false,
+            file_compression_type: FileCompressionType::UNCOMPRESSED,
+        }
+    }
+
+    /// Set whether the first row defines the column names.
+    ///
+    /// The default value is `false`.
+    pub fn with_has_header(mut self, has_header: bool) -> Self {
+        self.has_header = has_header;
+        self
+    }
+
+    /// Set the column delimeter.
+    ///
+    /// The default is `,`.
+    pub fn with_delimeter(mut self, delimiter: u8) -> Self {
+        self.delimiter = delimiter;
+        self
+    }
+
+    /// Set the quote character.
+    ///
+    /// The default is `"`.
+    pub fn with_quote(mut self, quote: u8) -> Self {
+        self.quote = quote;
+        self
+    }
+
+    /// Set the escape character.
+    ///
+    /// The default is `None` (i.e. quotes cannot be escaped).
+    pub fn with_escape(mut self, escape: Option<u8>) -> Self {
+        self.escape = escape;
+        self
+    }
+
+    /// Set the comment character.
+    ///
+    /// The default is `None` (i.e. comments are not supported).
+    pub fn with_comment(mut self, comment: Option<u8>) -> Self {
+        self.comment = comment;
+        self
+    }
+
+    /// Set whether newlines in (quoted) values are supported.
+    ///
+    /// Parsing newlines in quoted values may be affected by execution behaviour such as
+    /// parallel file scanning. Setting this to `true` ensures that newlines in values are
+    /// parsed successfully, which may reduce performance.
+    ///
+    /// The default value is `false`.
+    pub fn with_newlines_in_values(mut self, newlines_in_values: bool) -> Self {
+        self.newlines_in_values = newlines_in_values;
+        self
+    }
+
+    /// Set the file compression type.
+    ///
+    /// The default is [`FileCompressionType::UNCOMPRESSED`].
+    pub fn with_file_compression_type(
+        mut self,
         file_compression_type: FileCompressionType,
     ) -> Self {
+        self.file_compression_type = file_compression_type;
+        self
+    }
+
+    /// Build a [`CsvExec`].
+    #[must_use]
+    pub fn build(self) -> CsvExec {
+        let Self {
+            file_scan_config: base_config,
+            file_compression_type,
+            has_header,
+            delimiter,
+            quote,
+            escape,
+            comment,
+            newlines_in_values,
+        } = self;
+
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = Self::compute_properties(
+        let cache = CsvExec::compute_properties(
             projected_schema,
             &projected_output_ordering,
             &base_config,
         );
-        Self {
+
+        CsvExec {
             base_config,
             projected_statistics,
             has_header,
@@ -101,6 +218,39 @@ impl CsvExec {
             comment,
         }
     }
+}
+
+impl CsvExec {
+    /// Create a new CSV reader execution plan provided base and specific configurations
+    #[deprecated(since = "41.0.0", note = "use `CsvExec::builder` or `CsvExecBuilder`")]
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        base_config: FileScanConfig,
+        has_header: bool,
+        delimiter: u8,
+        quote: u8,
+        escape: Option<u8>,
+        comment: Option<u8>,
+        newlines_in_values: bool,
+        file_compression_type: FileCompressionType,
+    ) -> Self {
+        CsvExecBuilder::new(base_config)
+            .with_has_header(has_header)
+            .with_delimeter(delimiter)
+            .with_quote(quote)
+            .with_escape(escape)
+            .with_comment(comment)
+            .with_newlines_in_values(newlines_in_values)
+            .with_file_compression_type(file_compression_type)
+            .build()
+    }
+
+    /// Return a [`CsvExecBuilder`].
+    ///
+    /// See example on [`CsvExec`] and [`CsvExecBuilder`] for specifying CSV table options.
+    pub fn builder(file_scan_config: FileScanConfig) -> CsvExecBuilder {
+        CsvExecBuilder::new(file_scan_config)
+    }
 
     /// Ref to the base configs
     pub fn base_config(&self) -> &FileScanConfig {
@@ -557,6 +707,8 @@ mod tests {
     use arrow::datatypes::*;
     use datafusion_common::test_util::arrow_test_data;
 
+    use datafusion_common::config::CsvOptions;
+    use datafusion_execution::object_store::ObjectStoreUrl;
     use object_store::chunked::ChunkedStore;
     use object_store::local::LocalFileSystem;
     use rstest::*;
@@ -597,16 +749,15 @@ mod tests {
         let mut config = partitioned_csv_config(file_schema, file_groups);
         config.projection = Some(vec![0, 2, 4]);
 
-        let csv = CsvExec::new(
-            config,
-            true,
-            b',',
-            b'"',
-            None,
-            None,
-            false,
-            file_compression_type.to_owned(),
-        );
+        let csv = CsvExec::builder(config)
+            .with_has_header(true)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(file_compression_type)
+            .build();
         assert_eq!(13, csv.base_config.file_schema.fields().len());
         assert_eq!(3, csv.schema().fields().len());
 
@@ -667,16 +818,15 @@ mod tests {
         let mut config = partitioned_csv_config(file_schema, file_groups);
         config.projection = Some(vec![4, 0, 2]);
 
-        let csv = CsvExec::new(
-            config,
-            true,
-            b',',
-            b'"',
-            None,
-            None,
-            false,
-            file_compression_type.to_owned(),
-        );
+        let csv = CsvExec::builder(config)
+            .with_has_header(true)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(file_compression_type.to_owned())
+            .build();
         assert_eq!(13, csv.base_config.file_schema.fields().len());
         assert_eq!(3, csv.schema().fields().len());
 
@@ -737,16 +887,15 @@ mod tests {
         let mut config = partitioned_csv_config(file_schema, file_groups);
         config.limit = Some(5);
 
-        let csv = CsvExec::new(
-            config,
-            true,
-            b',',
-            b'"',
-            None,
-            None,
-            false,
-            file_compression_type.to_owned(),
-        );
+        let csv = CsvExec::builder(config)
+            .with_has_header(true)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(file_compression_type.to_owned())
+            .build();
         assert_eq!(13, csv.base_config.file_schema.fields().len());
         assert_eq!(13, csv.schema().fields().len());
 
@@ -804,16 +953,15 @@ mod tests {
         let mut config = partitioned_csv_config(file_schema, file_groups);
         config.limit = Some(5);
 
-        let csv = CsvExec::new(
-            config,
-            true,
-            b',',
-            b'"',
-            None,
-            None,
-            false,
-            file_compression_type.to_owned(),
-        );
+        let csv = CsvExec::builder(config)
+            .with_has_header(true)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(file_compression_type.to_owned())
+            .build();
         assert_eq!(14, csv.base_config.file_schema.fields().len());
         assert_eq!(14, csv.schema().fields().len());
 
@@ -870,16 +1018,15 @@ mod tests {
 
         // we don't have `/date=xx/` in the path but that is ok because
         // partitions are resolved during scan anyway
-        let csv = CsvExec::new(
-            config,
-            true,
-            b',',
-            b'"',
-            None,
-            None,
-            false,
-            file_compression_type.to_owned(),
-        );
+        let csv = CsvExec::builder(config)
+            .with_has_header(true)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(file_compression_type.to_owned())
+            .build();
         assert_eq!(13, csv.base_config.file_schema.fields().len());
         assert_eq!(2, csv.schema().fields().len());
 
@@ -966,16 +1113,15 @@ mod tests {
         .unwrap();
 
         let config = partitioned_csv_config(file_schema, file_groups);
-        let csv = CsvExec::new(
-            config,
-            true,
-            b',',
-            b'"',
-            None,
-            None,
-            false,
-            file_compression_type.to_owned(),
-        );
+        let csv = CsvExec::builder(config)
+            .with_has_header(true)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(file_compression_type.to_owned())
+            .build();
 
         let it = csv.execute(0, task_ctx).unwrap();
         let batches: Vec<_> = it.try_collect().await.unwrap();
@@ -1183,4 +1329,34 @@ mod tests {
 
         Arc::new(schema)
     }
+
+    /// Ensure that the default options are set correctly
+    #[test]
+    fn test_default_options() {
+        let file_scan_config =
+            FileScanConfig::new(ObjectStoreUrl::local_filesystem(), aggr_test_schema())
+                .with_file(PartitionedFile::new("foo", 34));
+
+        let CsvExecBuilder {
+            file_scan_config: _,
+            file_compression_type: _,
+            has_header,
+            delimiter,
+            quote,
+            escape,
+            comment,
+            newlines_in_values,
+        } = CsvExecBuilder::new(file_scan_config);
+
+        let default_options = CsvOptions::default();
+        assert_eq!(has_header, default_options.has_header.unwrap_or(false));
+        assert_eq!(delimiter, default_options.delimiter);
+        assert_eq!(quote, default_options.quote);
+        assert_eq!(escape, default_options.escape);
+        assert_eq!(comment, default_options.comment);
+        assert_eq!(
+            newlines_in_values,
+            default_options.newlines_in_values.unwrap_or(false)
+        );
+    }
 }
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index acca2ed8d997c..1f076e448e600 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -1464,18 +1464,21 @@ pub(crate) mod tests {
     }
 
     fn csv_exec_with_sort(output_ordering: Vec<Vec<PhysicalSortExpr>>) -> Arc<CsvExec> {
-        Arc::new(CsvExec::new(
-            FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
-                .with_file(PartitionedFile::new("x".to_string(), 100))
-                .with_output_ordering(output_ordering),
-            false,
-            b',',
-            b'"',
-            None,
-            None,
-            false,
-            FileCompressionType::UNCOMPRESSED,
-        ))
+        Arc::new(
+            CsvExec::builder(
+                FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
+                    .with_file(PartitionedFile::new("x".to_string(), 100))
+                    .with_output_ordering(output_ordering),
+            )
+            .with_has_header(false)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+            .build(),
+        )
     }
 
     fn csv_exec_multiple() -> Arc<CsvExec> {
@@ -1486,21 +1489,24 @@ pub(crate) mod tests {
     fn csv_exec_multiple_sorted(
         output_ordering: Vec<Vec<PhysicalSortExpr>>,
     ) -> Arc<CsvExec> {
-        Arc::new(CsvExec::new(
-            FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
-                .with_file_groups(vec![
-                    vec![PartitionedFile::new("x".to_string(), 100)],
-                    vec![PartitionedFile::new("y".to_string(), 100)],
-                ])
-                .with_output_ordering(output_ordering),
-            false,
-            b',',
-            b'"',
-            None,
-            None,
-            false,
-            FileCompressionType::UNCOMPRESSED,
-        ))
+        Arc::new(
+            CsvExec::builder(
+                FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
+                    .with_file_groups(vec![
+                        vec![PartitionedFile::new("x".to_string(), 100)],
+                        vec![PartitionedFile::new("y".to_string(), 100)],
+                    ])
+                    .with_output_ordering(output_ordering),
+            )
+            .with_has_header(false)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+            .build(),
+        )
     }
 
     fn projection_exec_with_alias(
@@ -3762,20 +3768,23 @@ pub(crate) mod tests {
             };
 
             let plan = aggregate_exec_with_alias(
-                Arc::new(CsvExec::new(
-                    FileScanConfig::new(
-                        ObjectStoreUrl::parse("test:///").unwrap(),
-                        schema(),
+                Arc::new(
+                    CsvExec::builder(
+                        FileScanConfig::new(
+                            ObjectStoreUrl::parse("test:///").unwrap(),
+                            schema(),
+                        )
+                        .with_file(PartitionedFile::new("x".to_string(), 100)),
                     )
-                    .with_file(PartitionedFile::new("x".to_string(), 100)),
-                    false,
-                    b',',
-                    b'"',
-                    None,
-                    None,
-                    false,
-                    compression_type,
-                )),
+                    .with_has_header(false)
+                    .with_delimeter(b',')
+                    .with_quote(b'"')
+                    .with_escape(None)
+                    .with_comment(None)
+                    .with_newlines_in_values(false)
+                    .with_file_compression_type(compression_type)
+                    .build(),
+                ),
                 vec![("a".to_string(), "a".to_string())],
             );
             assert_optimized!(expected, plan, true, false, 2, true, 10, false);
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index d0d0c985b8b64..9c545c17da3cc 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -179,16 +179,17 @@ fn try_swapping_with_csv(
         );
         file_scan.projection = Some(new_projections);
 
-        Arc::new(CsvExec::new(
-            file_scan,
-            csv.has_header(),
-            csv.delimiter(),
-            csv.quote(),
-            csv.escape(),
-            csv.comment(),
-            csv.newlines_in_values(),
-            csv.file_compression_type,
-        )) as _
+        Arc::new(
+            CsvExec::builder(file_scan)
+                .with_has_header(csv.has_header())
+                .with_delimeter(csv.delimiter())
+                .with_quote(csv.quote())
+                .with_escape(csv.escape())
+                .with_comment(csv.comment())
+                .with_newlines_in_values(csv.newlines_in_values())
+                .with_file_compression_type(csv.file_compression_type)
+                .build(),
+        ) as _
     })
 }
 
@@ -1689,21 +1690,24 @@ mod tests {
             Field::new("d", DataType::Int32, true),
             Field::new("e", DataType::Int32, true),
         ]));
-        Arc::new(CsvExec::new(
-            FileScanConfig::new(
-                ObjectStoreUrl::parse("test:///").unwrap(),
-                schema.clone(),
+        Arc::new(
+            CsvExec::builder(
+                FileScanConfig::new(
+                    ObjectStoreUrl::parse("test:///").unwrap(),
+                    schema.clone(),
+                )
+                .with_file(PartitionedFile::new("x".to_string(), 100))
+                .with_projection(Some(vec![0, 1, 2, 3, 4])),
             )
-            .with_file(PartitionedFile::new("x".to_string(), 100))
-            .with_projection(Some(vec![0, 1, 2, 3, 4])),
-            false,
-            0,
-            0,
-            None,
-            None,
-            false,
-            FileCompressionType::UNCOMPRESSED,
-        ))
+            .with_has_header(false)
+            .with_delimeter(0)
+            .with_quote(0)
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+            .build(),
+        )
     }
 
     fn create_projecting_csv_exec() -> Arc<dyn ExecutionPlan> {
@@ -1713,21 +1717,24 @@ mod tests {
             Field::new("c", DataType::Int32, true),
             Field::new("d", DataType::Int32, true),
         ]));
-        Arc::new(CsvExec::new(
-            FileScanConfig::new(
-                ObjectStoreUrl::parse("test:///").unwrap(),
-                schema.clone(),
+        Arc::new(
+            CsvExec::builder(
+                FileScanConfig::new(
+                    ObjectStoreUrl::parse("test:///").unwrap(),
+                    schema.clone(),
+                )
+                .with_file(PartitionedFile::new("x".to_string(), 100))
+                .with_projection(Some(vec![3, 2, 1])),
             )
-            .with_file(PartitionedFile::new("x".to_string(), 100))
-            .with_projection(Some(vec![3, 2, 1])),
-            false,
-            0,
-            0,
-            None,
-            None,
-            false,
-            FileCompressionType::UNCOMPRESSED,
-        ))
+            .with_has_header(false)
+            .with_delimeter(0)
+            .with_quote(0)
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+            .build(),
+        )
     }
 
     fn create_projecting_memory_exec() -> Arc<dyn ExecutionPlan> {
diff --git a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
index 6565e3e7d0d23..a989be987d3db 100644
--- a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
+++ b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
@@ -1490,21 +1490,24 @@ mod tests {
         let sort_exprs = sort_exprs.into_iter().collect();
         let projection: Vec<usize> = vec![0, 2, 3];
 
-        Arc::new(CsvExec::new(
-            FileScanConfig::new(
-                ObjectStoreUrl::parse("test:///").unwrap(),
-                schema.clone(),
+        Arc::new(
+            CsvExec::builder(
+                FileScanConfig::new(
+                    ObjectStoreUrl::parse("test:///").unwrap(),
+                    schema.clone(),
+                )
+                .with_file(PartitionedFile::new("file_path".to_string(), 100))
+                .with_projection(Some(projection))
+                .with_output_ordering(vec![sort_exprs]),
             )
-            .with_file(PartitionedFile::new("file_path".to_string(), 100))
-            .with_projection(Some(projection))
-            .with_output_ordering(vec![sort_exprs]),
-            true,
-            0,
-            b'"',
-            None,
-            None,
-            false,
-            FileCompressionType::UNCOMPRESSED,
-        ))
+            .with_has_header(true)
+            .with_delimeter(0)
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+            .build(),
+        )
     }
 }
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index 5cb1b6ea7017b..39a126a06bb60 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -92,16 +92,17 @@ pub fn scan_partitioned_csv(partitions: usize, work_dir: &Path) -> Result<Arc<Cs
         work_dir,
     )?;
     let config = partitioned_csv_config(schema, file_groups);
-    Ok(Arc::new(CsvExec::new(
-        config,
-        true,
-        b',',
-        b'"',
-        None,
-        None,
-        false,
-        FileCompressionType::UNCOMPRESSED,
-    )))
+    Ok(Arc::new(
+        CsvExec::builder(config)
+            .with_has_header(true)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+            .build(),
+    ))
 }
 
 /// Returns file groups [`Vec<Vec<PartitionedFile>>`] for scanning `partitions` of `filename`
@@ -275,18 +276,24 @@ pub fn csv_exec_sorted(
 ) -> Arc<dyn ExecutionPlan> {
     let sort_exprs = sort_exprs.into_iter().collect();
 
-    Arc::new(CsvExec::new(
-        FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema.clone())
+    Arc::new(
+        CsvExec::builder(
+            FileScanConfig::new(
+                ObjectStoreUrl::parse("test:///").unwrap(),
+                schema.clone(),
+            )
             .with_file(PartitionedFile::new("x".to_string(), 100))
             .with_output_ordering(vec![sort_exprs]),
-        false,
-        0,
-        0,
-        None,
-        None,
-        false,
-        FileCompressionType::UNCOMPRESSED,
-    ))
+        )
+        .with_has_header(false)
+        .with_delimeter(0)
+        .with_quote(0)
+        .with_escape(None)
+        .with_comment(None)
+        .with_newlines_in_values(false)
+        .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+        .build(),
+    )
 }
 
 // construct a stream partition for test purposes
@@ -332,18 +339,24 @@ pub fn csv_exec_ordered(
 ) -> Arc<dyn ExecutionPlan> {
     let sort_exprs = sort_exprs.into_iter().collect();
 
-    Arc::new(CsvExec::new(
-        FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema.clone())
+    Arc::new(
+        CsvExec::builder(
+            FileScanConfig::new(
+                ObjectStoreUrl::parse("test:///").unwrap(),
+                schema.clone(),
+            )
             .with_file(PartitionedFile::new("file_path".to_string(), 100))
             .with_output_ordering(vec![sort_exprs]),
-        true,
-        0,
-        b'"',
-        None,
-        None,
-        false,
-        FileCompressionType::UNCOMPRESSED,
-    ))
+        )
+        .with_has_header(true)
+        .with_delimeter(0)
+        .with_quote(b'"')
+        .with_escape(None)
+        .with_comment(None)
+        .with_newlines_in_values(false)
+        .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+        .build(),
+    )
 }
 
 /// A mock execution plan that simply returns the provided statistics
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 5c4d41f0eca61..1f433ff01d128 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -187,34 +187,39 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                     )),
                 }
             }
-            PhysicalPlanType::CsvScan(scan) => Ok(Arc::new(CsvExec::new(
-                parse_protobuf_file_scan_config(
+            PhysicalPlanType::CsvScan(scan) => Ok(Arc::new(
+                CsvExec::builder(parse_protobuf_file_scan_config(
                     scan.base_conf.as_ref().unwrap(),
                     registry,
                     extension_codec,
-                )?,
-                scan.has_header,
-                str_to_byte(&scan.delimiter, "delimiter")?,
-                str_to_byte(&scan.quote, "quote")?,
-                if let Some(protobuf::csv_scan_exec_node::OptionalEscape::Escape(
-                    escape,
-                )) = &scan.optional_escape
-                {
-                    Some(str_to_byte(escape, "escape")?)
-                } else {
-                    None
-                },
-                if let Some(protobuf::csv_scan_exec_node::OptionalComment::Comment(
-                    comment,
-                )) = &scan.optional_comment
-                {
-                    Some(str_to_byte(comment, "comment")?)
-                } else {
-                    None
-                },
-                scan.newlines_in_values,
-                FileCompressionType::UNCOMPRESSED,
-            ))),
+                )?)
+                .with_has_header(scan.has_header)
+                .with_delimeter(str_to_byte(&scan.delimiter, "delimiter")?)
+                .with_quote(str_to_byte(&scan.quote, "quote")?)
+                .with_escape(
+                    if let Some(protobuf::csv_scan_exec_node::OptionalEscape::Escape(
+                        escape,
+                    )) = &scan.optional_escape
+                    {
+                        Some(str_to_byte(escape, "escape")?)
+                    } else {
+                        None
+                    },
+                )
+                .with_comment(
+                    if let Some(protobuf::csv_scan_exec_node::OptionalComment::Comment(
+                        comment,
+                    )) = &scan.optional_comment
+                    {
+                        Some(str_to_byte(comment, "comment")?)
+                    } else {
+                        None
+                    },
+                )
+                .with_newlines_in_values(scan.newlines_in_values)
+                .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+                .build(),
+            )),
             #[cfg(feature = "parquet")]
             PhysicalPlanType::ParquetScan(scan) => {
                 let base_config = parse_protobuf_file_scan_config(

From c22e13fa1062578a82bd33c1bce40c01dbeae760 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 26 Jul 2024 10:29:09 -0400
Subject: [PATCH 152/357] chore(deps): update sqlparser requirement from 0.48
 to 0.49 (#11630)

* chore(deps): update sqlparser requirement from 0.48 to 0.49

Updates the requirements on [sqlparser](https://github.com/sqlparser-rs/sqlparser-rs) to permit the latest version.
- [Changelog](https://github.com/sqlparser-rs/sqlparser-rs/blob/main/CHANGELOG.md)
- [Commits](https://github.com/sqlparser-rs/sqlparser-rs/compare/v0.48.0...v0.48.0)

---
updated-dependencies:
- dependency-name: sqlparser
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update for API changes

* update cargo.lock

* Fix generation

* Remove take

* fix depcheck

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
Co-authored-by: jonahgao <jonahgao@msn.com>
---
 Cargo.toml                          |   2 +-
 datafusion-cli/Cargo.lock           | 122 +++++++++++++++-------------
 datafusion/sql/src/expr/order_by.rs |   9 +-
 datafusion/sql/src/parser.rs        |   6 ++
 datafusion/sql/src/query.rs         |  24 ++++--
 datafusion/sql/src/relation/mod.rs  |   5 ++
 datafusion/sql/src/statement.rs     |   5 +-
 datafusion/sql/src/unparser/ast.rs  |  12 ++-
 datafusion/sql/src/unparser/expr.rs |   1 +
 datafusion/sql/src/unparser/plan.rs |   1 +
 dev/depcheck/Cargo.toml             |   2 +-
 dev/depcheck/src/main.rs            |   6 +-
 12 files changed, 125 insertions(+), 70 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 9e7971bdc1e8d..c2904da3c106e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -125,7 +125,7 @@ rand = "0.8"
 regex = "1.8"
 rstest = "0.21.0"
 serde_json = "1"
-sqlparser = { version = "0.48", features = ["visitor"] }
+sqlparser = { version = "0.49", features = ["visitor"] }
 tempfile = "3"
 thiserror = "1.0.44"
 tokio = { version = "1.36", features = ["macros", "rt", "sync"] }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index a4e87f99b5c37..3dc1db2c90366 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -363,9 +363,9 @@ dependencies = [
 
 [[package]]
 name = "async-compression"
-version = "0.4.11"
+version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd066d0b4ef8ecb03a55319dc13aa6910616d0f44008a045bb1835af830abff5"
+checksum = "fec134f64e2bc57411226dfc4e52dec859ddfc7e711fc5e07b612584f000e4aa"
 dependencies = [
  "bzip2",
  "flate2",
@@ -387,7 +387,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -1104,7 +1104,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -1162,7 +1162,7 @@ dependencies = [
  "half",
  "hashbrown 0.14.5",
  "indexmap 2.2.6",
- "itertools",
+ "itertools 0.12.1",
  "log",
  "num-traits",
  "num_cpus",
@@ -1288,7 +1288,7 @@ dependencies = [
  "datafusion-expr",
  "hashbrown 0.14.5",
  "hex",
- "itertools",
+ "itertools 0.12.1",
  "log",
  "md-5",
  "rand",
@@ -1328,7 +1328,7 @@ dependencies = [
  "datafusion-expr",
  "datafusion-functions",
  "datafusion-functions-aggregate",
- "itertools",
+ "itertools 0.12.1",
  "log",
  "paste",
  "rand",
@@ -1346,7 +1346,7 @@ dependencies = [
  "datafusion-physical-expr",
  "hashbrown 0.14.5",
  "indexmap 2.2.6",
- "itertools",
+ "itertools 0.12.1",
  "log",
  "paste",
  "regex-syntax",
@@ -1373,7 +1373,7 @@ dependencies = [
  "hashbrown 0.14.5",
  "hex",
  "indexmap 2.2.6",
- "itertools",
+ "itertools 0.12.1",
  "log",
  "paste",
  "petgraph",
@@ -1425,7 +1425,7 @@ dependencies = [
  "half",
  "hashbrown 0.14.5",
  "indexmap 2.2.6",
- "itertools",
+ "itertools 0.12.1",
  "log",
  "once_cell",
  "parking_lot",
@@ -1705,7 +1705,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -2027,7 +2027,7 @@ dependencies = [
  "http 1.1.0",
  "hyper 1.4.1",
  "hyper-util",
- "rustls 0.23.11",
+ "rustls 0.23.12",
  "rustls-native-certs 0.7.1",
  "rustls-pki-types",
  "tokio",
@@ -2141,6 +2141,15 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "1.0.11"
@@ -2149,9 +2158,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
 
 [[package]]
 name = "jobserver"
-version = "0.1.31"
+version = "0.1.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
+checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
 dependencies = [
  "libc",
 ]
@@ -2375,13 +2384,14 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "0.8.11"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
+checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4"
 dependencies = [
+ "hermit-abi 0.3.9",
  "libc",
  "wasi",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -2502,18 +2512,18 @@ dependencies = [
 
 [[package]]
 name = "object"
-version = "0.36.1"
+version = "0.36.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce"
+checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
 name = "object_store"
-version = "0.10.1"
+version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fbebfd32c213ba1907fa7a9c9138015a8de2b43e30c5aa45b18f7deb46786ad6"
+checksum = "e6da452820c715ce78221e8202ccc599b4a52f3e1eb3eedb487b680c81a8e3f3"
 dependencies = [
  "async-trait",
  "base64 0.22.1",
@@ -2522,7 +2532,7 @@ dependencies = [
  "futures",
  "humantime",
  "hyper 1.4.1",
- "itertools",
+ "itertools 0.13.0",
  "md-5",
  "parking_lot",
  "percent-encoding",
@@ -2718,7 +2728,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -2822,9 +2832,9 @@ checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88"
 
 [[package]]
 name = "quick-xml"
-version = "0.31.0"
+version = "0.36.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
+checksum = "96a05e2e8efddfa51a84ca47cec303fac86c8541b686d37cac5efc0e094417bc"
 dependencies = [
  "memchr",
  "serde",
@@ -2841,7 +2851,7 @@ dependencies = [
  "quinn-proto",
  "quinn-udp",
  "rustc-hash",
- "rustls 0.23.11",
+ "rustls 0.23.12",
  "thiserror",
  "tokio",
  "tracing",
@@ -2857,7 +2867,7 @@ dependencies = [
  "rand",
  "ring 0.17.8",
  "rustc-hash",
- "rustls 0.23.11",
+ "rustls 0.23.12",
  "slab",
  "thiserror",
  "tinyvec",
@@ -2866,14 +2876,13 @@ dependencies = [
 
 [[package]]
 name = "quinn-udp"
-version = "0.5.2"
+version = "0.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9096629c45860fc7fb143e125eb826b5e721e10be3263160c7d60ca832cf8c46"
+checksum = "8bffec3605b73c6f1754535084a85229fa8a30f86014e6c81aeec4abb68b0285"
 dependencies = [
  "libc",
  "once_cell",
  "socket2",
- "tracing",
  "windows-sys 0.52.0",
 ]
 
@@ -3006,7 +3015,7 @@ dependencies = [
  "percent-encoding",
  "pin-project-lite",
  "quinn",
- "rustls 0.23.11",
+ "rustls 0.23.12",
  "rustls-native-certs 0.7.1",
  "rustls-pemfile 2.1.2",
  "rustls-pki-types",
@@ -3136,9 +3145,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.11"
+version = "0.23.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4828ea528154ae444e5a642dbb7d5623354030dc9822b83fd9bb79683c7399d0"
+checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044"
 dependencies = [
  "once_cell",
  "ring 0.17.8",
@@ -3200,9 +3209,9 @@ checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d"
 
 [[package]]
 name = "rustls-webpki"
-version = "0.102.5"
+version = "0.102.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9a6fccd794a42c2c105b513a2f62bc3fd8f3ba57a4593677ceb0bd035164d78"
+checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e"
 dependencies = [
  "ring 0.17.8",
  "rustls-pki-types",
@@ -3330,7 +3339,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3449,9 +3458,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
 [[package]]
 name = "sqlparser"
-version = "0.48.0"
+version = "0.49.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "749780d15ad1ee15fd74f5f84b0665560b6abb913de744c2b69155770f9601da"
+checksum = "a4a404d0e14905361b918cb8afdb73605e25c1d5029312bd9785142dcb3aa49e"
 dependencies = [
  "log",
  "sqlparser_derive",
@@ -3465,7 +3474,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3511,7 +3520,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3524,7 +3533,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3546,9 +3555,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.71"
+version = "2.0.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
+checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3611,7 +3620,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3681,32 +3690,31 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.38.1"
+version = "1.39.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df"
+checksum = "d040ac2b29ab03b09d4129c2f5bbd012a3ac2f79d38ff506a4bf8dd34b0eac8a"
 dependencies = [
  "backtrace",
  "bytes",
  "libc",
  "mio",
- "num_cpus",
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
  "socket2",
  "tokio-macros",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "tokio-macros"
-version = "2.3.0"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
+checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3726,7 +3734,7 @@ version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
 dependencies = [
- "rustls 0.23.11",
+ "rustls 0.23.12",
  "rustls-pki-types",
  "tokio",
 ]
@@ -3803,7 +3811,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -3848,7 +3856,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
@@ -4002,7 +4010,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
  "wasm-bindgen-shared",
 ]
 
@@ -4036,7 +4044,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -4301,7 +4309,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.71",
+ "syn 2.0.72",
 ]
 
 [[package]]
diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs
index 6010da6fd325a..7fb32f714cfa6 100644
--- a/datafusion/sql/src/expr/order_by.rs
+++ b/datafusion/sql/src/expr/order_by.rs
@@ -16,7 +16,9 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{plan_datafusion_err, plan_err, Column, DFSchema, Result};
+use datafusion_common::{
+    not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, Result,
+};
 use datafusion_expr::expr::Sort;
 use datafusion_expr::Expr;
 use sqlparser::ast::{Expr as SQLExpr, OrderByExpr, Value};
@@ -63,8 +65,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 asc,
                 expr,
                 nulls_first,
+                with_fill,
             } = e;
 
+            if let Some(with_fill) = with_fill {
+                return not_impl_err!("ORDER BY WITH FILL is not supported: {with_fill}");
+            }
+
             let expr = match expr {
                 SQLExpr::Value(Value::Number(v, _)) if literal_to_column => {
                     let field_index = v
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 5a774a2397b34..40dd368f9e80b 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -625,6 +625,7 @@ impl<'a> DFParser<'a> {
             expr,
             asc,
             nulls_first,
+            with_fill: None,
         })
     }
 
@@ -1206,6 +1207,7 @@ mod tests {
                     }),
                     asc,
                     nulls_first,
+                    with_fill: None,
                 }]],
                 if_not_exists: false,
                 unbounded: false,
@@ -1235,6 +1237,7 @@ mod tests {
                     }),
                     asc: Some(true),
                     nulls_first: None,
+                    with_fill: None,
                 },
                 OrderByExpr {
                     expr: Identifier(Ident {
@@ -1243,6 +1246,7 @@ mod tests {
                     }),
                     asc: Some(false),
                     nulls_first: Some(true),
+                    with_fill: None,
                 },
             ]],
             if_not_exists: false,
@@ -1278,6 +1282,7 @@ mod tests {
                 },
                 asc: Some(true),
                 nulls_first: None,
+                with_fill: None,
             }]],
             if_not_exists: false,
             unbounded: false,
@@ -1321,6 +1326,7 @@ mod tests {
                 },
                 asc: Some(true),
                 nulls_first: None,
+                with_fill: None,
             }]],
             if_not_exists: true,
             unbounded: true,
diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs
index 00560b5c93089..ba2b41bb6ecff 100644
--- a/datafusion/sql/src/query.rs
+++ b/datafusion/sql/src/query.rs
@@ -19,13 +19,14 @@ use std::sync::Arc;
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
-use datafusion_common::{plan_err, Constraints, Result, ScalarValue};
+use datafusion_common::{not_impl_err, plan_err, Constraints, Result, ScalarValue};
 use datafusion_expr::{
     CreateMemoryTable, DdlStatement, Distinct, Expr, LogicalPlan, LogicalPlanBuilder,
     Operator,
 };
 use sqlparser::ast::{
-    Expr as SQLExpr, Offset as SQLOffset, Query, SelectInto, SetExpr, Value,
+    Expr as SQLExpr, Offset as SQLOffset, OrderBy, OrderByExpr, Query, SelectInto,
+    SetExpr, Value,
 };
 
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -50,16 +51,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 let select_into = select.into.take();
                 // Order-by expressions may refer to columns in the `FROM` clause,
                 // so we need to process `SELECT` and `ORDER BY` together.
-                let plan =
-                    self.select_to_plan(*select, query.order_by, planner_context)?;
+                let oby_exprs = to_order_by_exprs(query.order_by)?;
+                let plan = self.select_to_plan(*select, oby_exprs, planner_context)?;
                 let plan = self.limit(plan, query.offset, query.limit)?;
                 // Process the `SELECT INTO` after `LIMIT`.
                 self.select_into(plan, select_into)
             }
             other => {
                 let plan = self.set_expr_to_plan(other, planner_context)?;
+                let oby_exprs = to_order_by_exprs(query.order_by)?;
                 let order_by_rex = self.order_by_to_sort_expr(
-                    query.order_by,
+                    oby_exprs,
                     plan.schema(),
                     planner_context,
                     true,
@@ -198,3 +200,15 @@ fn convert_usize_with_check(n: i64, arg_name: &str) -> Result<usize> {
         Ok(n as usize)
     }
 }
+
+/// Returns the order by expressions from the query.
+fn to_order_by_exprs(order_by: Option<OrderBy>) -> Result<Vec<OrderByExpr>> {
+    let Some(OrderBy { exprs, interpolate }) = order_by else {
+        // if no order by, return an empty array
+        return Ok(vec![]);
+    };
+    if let Some(_interpolate) = interpolate {
+        return not_impl_err!("ORDER BY INTERPOLATE is not supported");
+    }
+    Ok(exprs)
+}
diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs
index b812dae5ae3e3..5d7b3d5918d3f 100644
--- a/datafusion/sql/src/relation/mod.rs
+++ b/datafusion/sql/src/relation/mod.rs
@@ -101,7 +101,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 array_exprs,
                 with_offset: false,
                 with_offset_alias: None,
+                with_ordinality,
             } => {
+                if with_ordinality {
+                    return not_impl_err!("UNNEST with ordinality is not supported yet");
+                }
+
                 // Unnest table factor has empty input
                 let schema = DFSchema::empty();
                 let input = LogicalPlanBuilder::empty(true).build()?;
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 218ff54a1a090..3737e1adf8f3c 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -147,7 +147,10 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec<TableConst
                 | ast::ColumnOption::Generated { .. }
                 | ast::ColumnOption::Comment(_)
                 | ast::ColumnOption::Options(_)
-                | ast::ColumnOption::OnUpdate(_) => {}
+                | ast::ColumnOption::OnUpdate(_)
+                | ast::ColumnOption::Materialized(_)
+                | ast::ColumnOption::Ephemeral(_)
+                | ast::ColumnOption::Alias(_) => {}
             }
         }
     }
diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs
index 02eb44dbb657d..c10db9831457b 100644
--- a/datafusion/sql/src/unparser/ast.rs
+++ b/datafusion/sql/src/unparser/ast.rs
@@ -80,13 +80,22 @@ impl QueryBuilder {
         self
     }
     pub fn build(&self) -> Result<ast::Query, BuilderError> {
+        let order_by = if self.order_by.is_empty() {
+            None
+        } else {
+            Some(ast::OrderBy {
+                exprs: self.order_by.clone(),
+                interpolate: None,
+            })
+        };
+
         Ok(ast::Query {
             with: self.with.clone(),
             body: match self.body {
                 Some(ref value) => value.clone(),
                 None => return Err(Into::into(UninitializedFieldError::from("body"))),
             },
-            order_by: self.order_by.clone(),
+            order_by,
             limit: self.limit.clone(),
             limit_by: self.limit_by.clone(),
             offset: self.offset.clone(),
@@ -423,6 +432,7 @@ impl TableRelationBuilder {
             with_hints: self.with_hints.clone(),
             version: self.version.clone(),
             partitions: self.partitions.clone(),
+            with_ordinality: false,
         })
     }
     fn create_empty() -> Self {
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 3f7a85da276be..e144dfd649d20 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -543,6 +543,7 @@ impl Unparser<'_> {
                     expr: sql_parser_expr,
                     asc: Some(*asc),
                     nulls_first,
+                    with_fill: None,
                 }))
             }
             _ => {
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 59660f4f0404f..9519b520ce5ee 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -558,6 +558,7 @@ impl Unparser<'_> {
                         asc: Some(sort_expr.asc),
                         expr: col,
                         nulls_first,
+                        with_fill: None,
                     })
                 }
                 _ => plan_err!("Expecting Sort expr"),
diff --git a/dev/depcheck/Cargo.toml b/dev/depcheck/Cargo.toml
index cb4e77eabb223..23cefaec43be4 100644
--- a/dev/depcheck/Cargo.toml
+++ b/dev/depcheck/Cargo.toml
@@ -22,4 +22,4 @@ name = "depcheck"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-cargo = "0.78.1"
+cargo = "0.81.0"
diff --git a/dev/depcheck/src/main.rs b/dev/depcheck/src/main.rs
index 1599fdd4188d3..80feefcd1b1c5 100644
--- a/dev/depcheck/src/main.rs
+++ b/dev/depcheck/src/main.rs
@@ -23,7 +23,7 @@ use std::collections::{HashMap, HashSet};
 use std::env;
 use std::path::Path;
 
-use cargo::util::config::Config;
+use cargo::util::context::GlobalContext;
 
 /// Verifies that there are no circular dependencies between DataFusion crates
 /// (which prevents publishing on crates.io) by parsing the Cargo.toml files and
@@ -31,7 +31,7 @@ use cargo::util::config::Config;
 ///
 /// See https://github.com/apache/datafusion/issues/9278 for more details
 fn main() -> CargoResult<()> {
-    let config = Config::default()?;
+    let gctx = GlobalContext::default()?;
     // This is the path for the depcheck binary
     let path = env::var("CARGO_MANIFEST_DIR").unwrap();
     let root_cargo_toml = Path::new(&path)
@@ -47,7 +47,7 @@ fn main() -> CargoResult<()> {
         "Checking for circular dependencies in {}",
         root_cargo_toml.display()
     );
-    let workspace = cargo::core::Workspace::new(&root_cargo_toml, &config)?;
+    let workspace = cargo::core::Workspace::new(&root_cargo_toml, &gctx)?;
     let (_, resolve) = cargo::ops::resolve_ws(&workspace)?;
 
     let mut package_deps = HashMap::new();

From 2eaf1ea8ac4592bff88714ec9e1a0b4a0e41e610 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Fri, 26 Jul 2024 16:58:17 +0200
Subject: [PATCH 153/357] Extract catalog API to separate crate (#11516)

This moves `CatalogProvider`, `TableProvider`, `SchemaProvider` to a new
`datafusion-catalog` crate.  The circular dependency between core
`SessionState` and implementations is broken up by introducing
`CatalogSession` dyn trait.  Implementations of `TableProvider` that
reside under core current have access to `CatalogSession` by
downcasting. This is supposed to be an intermediate step.
---
 Cargo.toml                                    |   2 +
 datafusion-cli/Cargo.lock                     |  13 +
 datafusion-cli/src/catalog.rs                 |   5 +-
 datafusion-cli/src/functions.rs               |   4 +-
 .../examples/advanced_parquet_index.rs        |   6 +-
 datafusion-examples/examples/catalog.rs       |   5 +-
 .../examples/custom_datasource.rs             |   5 +-
 datafusion-examples/examples/parquet_index.rs |   4 +-
 datafusion-examples/examples/simple_udtf.rs   |   6 +-
 datafusion/catalog/Cargo.toml                 |  38 +++
 datafusion/catalog/src/catalog.rs             | 173 +++++++++++
 datafusion/catalog/src/lib.rs                 |  26 ++
 .../src/catalog => catalog/src}/schema.rs     |   7 +-
 datafusion/catalog/src/session.rs             | 138 +++++++++
 datafusion/catalog/src/table.rs               | 292 ++++++++++++++++++
 datafusion/core/Cargo.toml                    |   1 +
 .../information_schema.rs                     |   4 +-
 .../listing_schema.rs                         |   4 +-
 .../src/{catalog => catalog_common}/memory.rs |   9 +-
 .../src/{catalog => catalog_common}/mod.rs    | 171 +---------
 datafusion/core/src/dataframe/mod.rs          |   3 +-
 .../core/src/datasource/cte_worktable.rs      |   4 +-
 datafusion/core/src/datasource/empty.rs       |   4 +-
 .../core/src/datasource/file_format/mod.rs    |   2 +-
 .../core/src/datasource/listing/table.rs      |  27 +-
 .../src/datasource/listing_table_factory.rs   |  18 +-
 datafusion/core/src/datasource/memory.rs      |   5 +-
 datafusion/core/src/datasource/mod.rs         |   2 +-
 datafusion/core/src/datasource/provider.rs    | 276 +----------------
 datafusion/core/src/datasource/stream.rs      |  12 +-
 datafusion/core/src/datasource/streaming.rs   |  11 +-
 datafusion/core/src/datasource/view.rs        |   4 +-
 datafusion/core/src/execution/context/mod.rs  |  14 +-
 .../core/src/execution/session_state.rs       |  67 +++-
 .../src/execution/session_state_defaults.rs   |   7 +-
 datafusion/core/src/lib.rs                    |   7 +-
 datafusion/core/src/test_util/mod.rs          |  12 +-
 .../core/tests/custom_sources_cases/mod.rs    |   5 +-
 .../provider_filter_pushdown.rs               |   8 +-
 .../tests/custom_sources_cases/statistics.rs  |   5 +-
 datafusion/core/tests/memory_limit/mod.rs     |   4 +-
 .../user_defined_table_functions.rs           |   6 +-
 .../tests/cases/roundtrip_logical_plan.rs     |   3 +-
 datafusion/sqllogictest/src/test_context.rs   |   7 +-
 .../custom-table-providers.md                 |   2 +-
 45 files changed, 872 insertions(+), 556 deletions(-)
 create mode 100644 datafusion/catalog/Cargo.toml
 create mode 100644 datafusion/catalog/src/catalog.rs
 create mode 100644 datafusion/catalog/src/lib.rs
 rename datafusion/{core/src/catalog => catalog/src}/schema.rs (95%)
 create mode 100644 datafusion/catalog/src/session.rs
 create mode 100644 datafusion/catalog/src/table.rs
 rename datafusion/core/src/{catalog => catalog_common}/information_schema.rs (99%)
 rename datafusion/core/src/{catalog => catalog_common}/listing_schema.rs (98%)
 rename datafusion/core/src/{catalog => catalog_common}/memory.rs (97%)
 rename datafusion/core/src/{catalog => catalog_common}/mod.rs (59%)

diff --git a/Cargo.toml b/Cargo.toml
index c2904da3c106e..cdf3d2f93b93e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,6 +20,7 @@ exclude = ["datafusion-cli", "dev/depcheck"]
 members = [
     "datafusion/common",
     "datafusion/common-runtime",
+    "datafusion/catalog",
     "datafusion/core",
     "datafusion/expr",
     "datafusion/execution",
@@ -88,6 +89,7 @@ chrono = { version = "0.4.34", default-features = false }
 ctor = "0.2.0"
 dashmap = "6.0.1"
 datafusion = { path = "datafusion/core", version = "40.0.0", default-features = false }
+datafusion-catalog = { path = "datafusion/catalog", version = "40.0.0" }
 datafusion-common = { path = "datafusion/common", version = "40.0.0", default-features = false }
 datafusion-common-runtime = { path = "datafusion/common-runtime", version = "40.0.0" }
 datafusion-execution = { path = "datafusion/execution", version = "40.0.0" }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 3dc1db2c90366..89ea13cf5bc7c 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1143,6 +1143,7 @@ dependencies = [
  "bzip2",
  "chrono",
  "dashmap",
+ "datafusion-catalog",
  "datafusion-common",
  "datafusion-common-runtime",
  "datafusion-execution",
@@ -1182,6 +1183,18 @@ dependencies = [
  "zstd 0.13.2",
 ]
 
+[[package]]
+name = "datafusion-catalog"
+version = "40.0.0"
+dependencies = [
+ "arrow-schema",
+ "async-trait",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-plan",
+]
+
 [[package]]
 name = "datafusion-cli"
 version = "40.0.0"
diff --git a/datafusion-cli/src/catalog.rs b/datafusion-cli/src/catalog.rs
index b83f659756105..273eb30d3a710 100644
--- a/datafusion-cli/src/catalog.rs
+++ b/datafusion-cli/src/catalog.rs
@@ -20,8 +20,7 @@ use std::sync::{Arc, Weak};
 
 use crate::object_storage::{get_object_store, AwsOptions, GcpOptions};
 
-use datafusion::catalog::schema::SchemaProvider;
-use datafusion::catalog::{CatalogProvider, CatalogProviderList};
+use datafusion::catalog::{CatalogProvider, CatalogProviderList, SchemaProvider};
 use datafusion::common::plan_datafusion_err;
 use datafusion::datasource::listing::{
     ListingTable, ListingTableConfig, ListingTableUrl,
@@ -237,7 +236,7 @@ fn substitute_tilde(cur: String) -> String {
 mod tests {
     use super::*;
 
-    use datafusion::catalog::schema::SchemaProvider;
+    use datafusion::catalog::SchemaProvider;
     use datafusion::prelude::SessionContext;
 
     fn setup_context() -> (SessionContext, Arc<dyn SchemaProvider>) {
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index 806e2bb39cd4a..a85c43f3576f5 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -22,11 +22,11 @@ use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::pretty_format_batches;
 use async_trait::async_trait;
 
+use datafusion::catalog::Session;
 use datafusion::common::{plan_err, Column};
 use datafusion::datasource::function::TableFunctionImpl;
 use datafusion::datasource::TableProvider;
 use datafusion::error::Result;
-use datafusion::execution::context::SessionState;
 use datafusion::logical_expr::Expr;
 use datafusion::physical_plan::memory::MemoryExec;
 use datafusion::physical_plan::ExecutionPlan;
@@ -234,7 +234,7 @@ impl TableProvider for ParquetMetadataTable {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         _limit: Option<usize>,
diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/advanced_parquet_index.rs
index 9bf71e52c3de8..903defafe3ab5 100644
--- a/datafusion-examples/examples/advanced_parquet_index.rs
+++ b/datafusion-examples/examples/advanced_parquet_index.rs
@@ -19,6 +19,7 @@ use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
 use arrow_schema::SchemaRef;
 use async_trait::async_trait;
 use bytes::Bytes;
+use datafusion::catalog::Session;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::physical_plan::parquet::{
     ParquetAccessPlan, ParquetExecBuilder,
@@ -27,7 +28,6 @@ use datafusion::datasource::physical_plan::{
     parquet::ParquetFileReaderFactory, FileMeta, FileScanConfig,
 };
 use datafusion::datasource::TableProvider;
-use datafusion::execution::context::SessionState;
 use datafusion::execution::object_store::ObjectStoreUrl;
 use datafusion::parquet::arrow::arrow_reader::{
     ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelection, RowSelector,
@@ -271,7 +271,7 @@ impl IndexTableProvider {
     /// to a single predicate like `a = 1 AND b = 2` suitable for execution
     fn filters_to_predicate(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         filters: &[Expr],
     ) -> Result<Arc<dyn PhysicalExpr>> {
         let df_schema = DFSchema::try_from(self.schema())?;
@@ -463,7 +463,7 @@ impl TableProvider for IndexTableProvider {
 
     async fn scan(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         projection: Option<&Vec<usize>>,
         filters: &[Expr],
         limit: Option<usize>,
diff --git a/datafusion-examples/examples/catalog.rs b/datafusion-examples/examples/catalog.rs
index b9188e1cd5e01..f9ead592c7eac 100644
--- a/datafusion-examples/examples/catalog.rs
+++ b/datafusion-examples/examples/catalog.rs
@@ -19,10 +19,7 @@
 use async_trait::async_trait;
 use datafusion::{
     arrow::util::pretty,
-    catalog::{
-        schema::SchemaProvider,
-        {CatalogProvider, CatalogProviderList},
-    },
+    catalog::{CatalogProvider, CatalogProviderList, SchemaProvider},
     datasource::{
         file_format::{csv::CsvFormat, FileFormat},
         listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl},
diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_datasource.rs
index cfb49b0231596..0f7748b133650 100644
--- a/datafusion-examples/examples/custom_datasource.rs
+++ b/datafusion-examples/examples/custom_datasource.rs
@@ -26,7 +26,7 @@ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::datasource::{provider_as_source, TableProvider, TableType};
 use datafusion::error::Result;
-use datafusion::execution::context::{SessionState, TaskContext};
+use datafusion::execution::context::TaskContext;
 use datafusion::physical_plan::memory::MemoryStream;
 use datafusion::physical_plan::{
     project_schema, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan,
@@ -37,6 +37,7 @@ use datafusion_expr::LogicalPlanBuilder;
 use datafusion_physical_expr::EquivalenceProperties;
 
 use async_trait::async_trait;
+use datafusion::catalog::Session;
 use tokio::time::timeout;
 
 /// This example demonstrates executing a simple query against a custom datasource
@@ -175,7 +176,7 @@ impl TableProvider for CustomDataSource {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         // filters and limit can be used here to inject some push-down operations if needed
         _filters: &[Expr],
diff --git a/datafusion-examples/examples/parquet_index.rs b/datafusion-examples/examples/parquet_index.rs
index 668eda047444a..91e178f1f1a53 100644
--- a/datafusion-examples/examples/parquet_index.rs
+++ b/datafusion-examples/examples/parquet_index.rs
@@ -23,13 +23,13 @@ use arrow::datatypes::Int32Type;
 use arrow::util::pretty::pretty_format_batches;
 use arrow_schema::SchemaRef;
 use async_trait::async_trait;
+use datafusion::catalog::Session;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::physical_plan::{
     parquet::StatisticsConverter,
     {FileScanConfig, ParquetExec},
 };
 use datafusion::datasource::TableProvider;
-use datafusion::execution::context::SessionState;
 use datafusion::execution::object_store::ObjectStoreUrl;
 use datafusion::parquet::arrow::{
     arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter,
@@ -222,7 +222,7 @@ impl TableProvider for IndexTableProvider {
 
     async fn scan(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         projection: Option<&Vec<usize>>,
         filters: &[Expr],
         limit: Option<usize>,
diff --git a/datafusion-examples/examples/simple_udtf.rs b/datafusion-examples/examples/simple_udtf.rs
index c68c21fab169d..fe7f37cc00e39 100644
--- a/datafusion-examples/examples/simple_udtf.rs
+++ b/datafusion-examples/examples/simple_udtf.rs
@@ -20,10 +20,11 @@ use arrow::csv::ReaderBuilder;
 use async_trait::async_trait;
 use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::catalog::Session;
 use datafusion::datasource::function::TableFunctionImpl;
 use datafusion::datasource::TableProvider;
 use datafusion::error::Result;
-use datafusion::execution::context::{ExecutionProps, SessionState};
+use datafusion::execution::context::ExecutionProps;
 use datafusion::physical_plan::memory::MemoryExec;
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionContext;
@@ -35,7 +36,6 @@ use std::fs::File;
 use std::io::Seek;
 use std::path::Path;
 use std::sync::Arc;
-
 // To define your own table function, you only need to do the following 3 things:
 // 1. Implement your own [`TableProvider`]
 // 2. Implement your own [`TableFunctionImpl`] and return your [`TableProvider`]
@@ -95,7 +95,7 @@ impl TableProvider for LocalCsvTable {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         _limit: Option<usize>,
diff --git a/datafusion/catalog/Cargo.toml b/datafusion/catalog/Cargo.toml
new file mode 100644
index 0000000000000..2ebca511c5c87
--- /dev/null
+++ b/datafusion/catalog/Cargo.toml
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-catalog"
+authors.workspace = true
+edition.workspace = true
+homepage.workspace = true
+license.workspace = true
+readme.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+version.workspace = true
+
+[dependencies]
+arrow-schema = { workspace = true }
+async-trait = "0.1.41"
+datafusion-common = { workspace = true }
+datafusion-execution = { workspace = true }
+datafusion-expr = { workspace = true }
+datafusion-physical-plan = { workspace = true }
+
+[lints]
+workspace = true
diff --git a/datafusion/catalog/src/catalog.rs b/datafusion/catalog/src/catalog.rs
new file mode 100644
index 0000000000000..026c3c008f59f
--- /dev/null
+++ b/datafusion/catalog/src/catalog.rs
@@ -0,0 +1,173 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+pub use crate::schema::SchemaProvider;
+use datafusion_common::not_impl_err;
+use datafusion_common::Result;
+
+/// Represents a catalog, comprising a number of named schemas.
+///
+/// # Catalog Overview
+///
+/// To plan and execute queries, DataFusion needs a "Catalog" that provides
+/// metadata such as which schemas and tables exist, their columns and data
+/// types, and how to access the data.
+///
+/// The Catalog API consists:
+/// * [`CatalogProviderList`]: a collection of `CatalogProvider`s
+/// * [`CatalogProvider`]: a collection of `SchemaProvider`s (sometimes called a "database" in other systems)
+/// * [`SchemaProvider`]:  a collection of `TableProvider`s (often called a "schema" in other systems)
+/// * [`TableProvider]`:  individual tables
+///
+/// # Implementing Catalogs
+///
+/// To implement a catalog, you implement at least one of the [`CatalogProviderList`],
+/// [`CatalogProvider`] and [`SchemaProvider`] traits and register them
+/// appropriately in the `SessionContext`.
+///
+/// DataFusion comes with a simple in-memory catalog implementation,
+/// `MemoryCatalogProvider`, that is used by default and has no persistence.
+/// DataFusion does not include more complex Catalog implementations because
+/// catalog management is a key design choice for most data systems, and thus
+/// it is unlikely that any general-purpose catalog implementation will work
+/// well across many use cases.
+///
+/// # Implementing "Remote" catalogs
+///
+/// Sometimes catalog information is stored remotely and requires a network call
+/// to retrieve. For example, the [Delta Lake] table format stores table
+/// metadata in files on S3 that must be first downloaded to discover what
+/// schemas and tables exist.
+///
+/// [Delta Lake]: https://delta.io/
+///
+/// The [`CatalogProvider`] can support this use case, but it takes some care.
+/// The planning APIs in DataFusion are not `async` and thus network IO can not
+/// be performed "lazily" / "on demand" during query planning. The rationale for
+/// this design is that using remote procedure calls for all catalog accesses
+/// required for query planning would likely result in multiple network calls
+/// per plan, resulting in very poor planning performance.
+///
+/// To implement [`CatalogProvider`] and [`SchemaProvider`] for remote catalogs,
+/// you need to provide an in memory snapshot of the required metadata. Most
+/// systems typically either already have this information cached locally or can
+/// batch access to the remote catalog to retrieve multiple schemas and tables
+/// in a single network call.
+///
+/// Note that [`SchemaProvider::table`] is an `async` function in order to
+/// simplify implementing simple [`SchemaProvider`]s. For many table formats it
+/// is easy to list all available tables but there is additional non trivial
+/// access required to read table details (e.g. statistics).
+///
+/// The pattern that DataFusion itself uses to plan SQL queries is to walk over
+/// the query to find all table references,
+/// performing required remote catalog in parallel, and then plans the query
+/// using that snapshot.
+///
+/// # Example Catalog Implementations
+///
+/// Here are some examples of how to implement custom catalogs:
+///
+/// * [`datafusion-cli`]: [`DynamicFileCatalogProvider`] catalog provider
+///   that treats files and directories on a filesystem as tables.
+///
+/// * The [`catalog.rs`]:  a simple directory based catalog.
+///
+/// * [delta-rs]:  [`UnityCatalogProvider`] implementation that can
+///   read from Delta Lake tables
+///
+/// [`datafusion-cli`]: https://datafusion.apache.org/user-guide/cli/index.html
+/// [`DynamicFileCatalogProvider`]: https://github.com/apache/datafusion/blob/31b9b48b08592b7d293f46e75707aad7dadd7cbc/datafusion-cli/src/catalog.rs#L75
+/// [`catalog.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/catalog.rs
+/// [delta-rs]: https://github.com/delta-io/delta-rs
+/// [`UnityCatalogProvider`]: https://github.com/delta-io/delta-rs/blob/951436ecec476ce65b5ed3b58b50fb0846ca7b91/crates/deltalake-core/src/data_catalog/unity/datafusion.rs#L111-L123
+///
+/// [`TableProvider]: crate::datasource::TableProvider
+
+pub trait CatalogProvider: Sync + Send {
+    /// Returns the catalog provider as [`Any`]
+    /// so that it can be downcast to a specific implementation.
+    fn as_any(&self) -> &dyn Any;
+
+    /// Retrieves the list of available schema names in this catalog.
+    fn schema_names(&self) -> Vec<String>;
+
+    /// Retrieves a specific schema from the catalog by name, provided it exists.
+    fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>>;
+
+    /// Adds a new schema to this catalog.
+    ///
+    /// If a schema of the same name existed before, it is replaced in
+    /// the catalog and returned.
+    ///
+    /// By default returns a "Not Implemented" error
+    fn register_schema(
+        &self,
+        name: &str,
+        schema: Arc<dyn SchemaProvider>,
+    ) -> Result<Option<Arc<dyn SchemaProvider>>> {
+        // use variables to avoid unused variable warnings
+        let _ = name;
+        let _ = schema;
+        not_impl_err!("Registering new schemas is not supported")
+    }
+
+    /// Removes a schema from this catalog. Implementations of this method should return
+    /// errors if the schema exists but cannot be dropped. For example, in DataFusion's
+    /// default in-memory catalog, `MemoryCatalogProvider`, a non-empty schema
+    /// will only be successfully dropped when `cascade` is true.
+    /// This is equivalent to how DROP SCHEMA works in PostgreSQL.
+    ///
+    /// Implementations of this method should return None if schema with `name`
+    /// does not exist.
+    ///
+    /// By default returns a "Not Implemented" error
+    fn deregister_schema(
+        &self,
+        _name: &str,
+        _cascade: bool,
+    ) -> Result<Option<Arc<dyn SchemaProvider>>> {
+        not_impl_err!("Deregistering new schemas is not supported")
+    }
+}
+
+/// Represent a list of named [`CatalogProvider`]s.
+///
+/// Please see the documentation on `CatalogProvider` for details of
+/// implementing a custom catalog.
+pub trait CatalogProviderList: Sync + Send {
+    /// Returns the catalog list as [`Any`]
+    /// so that it can be downcast to a specific implementation.
+    fn as_any(&self) -> &dyn Any;
+
+    /// Adds a new catalog to this catalog list
+    /// If a catalog of the same name existed before, it is replaced in the list and returned.
+    fn register_catalog(
+        &self,
+        name: String,
+        catalog: Arc<dyn CatalogProvider>,
+    ) -> Option<Arc<dyn CatalogProvider>>;
+
+    /// Retrieves the list of available catalog names
+    fn catalog_names(&self) -> Vec<String>;
+
+    /// Retrieves a specific catalog by name, provided it exists.
+    fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>;
+}
diff --git a/datafusion/catalog/src/lib.rs b/datafusion/catalog/src/lib.rs
new file mode 100644
index 0000000000000..fe76b5dc9c649
--- /dev/null
+++ b/datafusion/catalog/src/lib.rs
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod catalog;
+mod schema;
+mod session;
+mod table;
+
+pub use catalog::*;
+pub use schema::*;
+pub use session::*;
+pub use table::*;
diff --git a/datafusion/core/src/catalog/schema.rs b/datafusion/catalog/src/schema.rs
similarity index 95%
rename from datafusion/core/src/catalog/schema.rs
rename to datafusion/catalog/src/schema.rs
index 7d76b3fa4f197..21bca9fa828dc 100644
--- a/datafusion/core/src/catalog/schema.rs
+++ b/datafusion/catalog/src/schema.rs
@@ -23,11 +23,8 @@ use datafusion_common::{exec_err, DataFusionError};
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::datasource::TableProvider;
-use crate::error::Result;
-
-// backwards compatibility
-pub use super::MemorySchemaProvider;
+use crate::table::TableProvider;
+use datafusion_common::Result;
 
 /// Represents a schema, comprising a number of named tables.
 ///
diff --git a/datafusion/catalog/src/session.rs b/datafusion/catalog/src/session.rs
new file mode 100644
index 0000000000000..05d2684ed3e01
--- /dev/null
+++ b/datafusion/catalog/src/session.rs
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use async_trait::async_trait;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::{DFSchema, Result};
+use datafusion_execution::config::SessionConfig;
+use datafusion_execution::runtime_env::RuntimeEnv;
+use datafusion_execution::TaskContext;
+use datafusion_expr::execution_props::ExecutionProps;
+use datafusion_expr::{AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF};
+use datafusion_physical_plan::{ExecutionPlan, PhysicalExpr};
+use std::any::Any;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+/// Interface for accessing [`SessionState`] from the catalog.
+///
+/// This trait provides access to the information needed to plan and execute
+/// queries, such as configuration, functions, and runtime environment. See the
+/// documentation on [`SessionState`] for more information.
+///
+/// Historically, the `SessionState` struct was passed directly to catalog
+/// traits such as [`TableProvider`], which required a direct dependency on the
+/// DataFusion core. The interface required is now defined by this trait. See
+/// [#10782] for more details.
+///
+/// [#10782]: https://github.com/apache/datafusion/issues/10782
+///
+/// # Migration from `SessionState`
+///
+/// Using trait methods is preferred, as the implementation may change in future
+/// versions. However, you can downcast a `Session` to a `SessionState` as shown
+/// in the example below. If you find yourself needing to do this, please open
+/// an issue on the DataFusion repository so we can extend the trait to provide
+/// the required information.
+///
+/// ```
+/// # use datafusion_catalog::Session;
+/// # use datafusion_common::{Result, exec_datafusion_err};
+/// # struct SessionState {}
+/// // Given a `Session` reference, get the concrete `SessionState` reference
+/// // Note: this may stop working in future versions,
+/// fn session_state_from_session(session: &dyn Session) -> Result<&SessionState> {
+///    session.as_any()
+///     .downcast_ref::<SessionState>()
+///     .ok_or_else(|| exec_datafusion_err!("Failed to downcast Session to SessionState"))
+/// }
+/// ```
+///
+/// [`SessionState`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html
+/// [`TableProvider`]: crate::TableProvider
+#[async_trait]
+pub trait Session: Send + Sync {
+    /// Return the session ID
+    fn session_id(&self) -> &str;
+
+    /// Return the [`SessionConfig`]
+    fn config(&self) -> &SessionConfig;
+
+    /// return the [`ConfigOptions`]
+    fn config_options(&self) -> &ConfigOptions {
+        self.config().options()
+    }
+
+    /// Creates a physical [`ExecutionPlan`] plan from a [`LogicalPlan`].
+    ///
+    /// Note: this will optimize the provided plan first.
+    ///
+    /// This function will error for [`LogicalPlan`]s such as catalog DDL like
+    /// `CREATE TABLE`, which do not have corresponding physical plans and must
+    /// be handled by another layer, typically the `SessionContext`.
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+    ) -> Result<Arc<dyn ExecutionPlan>>;
+
+    /// Create a [`PhysicalExpr`] from an [`Expr`] after applying type
+    /// coercion, and function rewrites.
+    ///
+    /// Note: The expression is not simplified or otherwise optimized:  `a = 1
+    /// + 2` will not be simplified to `a = 3` as this is a more involved process.
+    /// See the [expr_api] example for how to simplify expressions.
+    ///
+    /// [expr_api]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
+    fn create_physical_expr(
+        &self,
+        expr: Expr,
+        df_schema: &DFSchema,
+    ) -> Result<Arc<dyn PhysicalExpr>>;
+
+    /// Return reference to scalar_functions
+    fn scalar_functions(&self) -> &HashMap<String, Arc<ScalarUDF>>;
+
+    /// Return reference to aggregate_functions
+    fn aggregate_functions(&self) -> &HashMap<String, Arc<AggregateUDF>>;
+
+    /// Return reference to window functions
+    fn window_functions(&self) -> &HashMap<String, Arc<WindowUDF>>;
+
+    /// Return the runtime env
+    fn runtime_env(&self) -> &Arc<RuntimeEnv>;
+
+    /// Return the execution properties
+    fn execution_props(&self) -> &ExecutionProps;
+
+    fn as_any(&self) -> &dyn Any;
+}
+
+/// Create a new task context instance from Session
+impl From<&dyn Session> for TaskContext {
+    fn from(state: &dyn Session) -> Self {
+        let task_id = None;
+        TaskContext::new(
+            task_id,
+            state.session_id().to_string(),
+            state.config().clone(),
+            state.scalar_functions().clone(),
+            state.aggregate_functions().clone(),
+            state.window_functions().clone(),
+            state.runtime_env().clone(),
+        )
+    }
+}
diff --git a/datafusion/catalog/src/table.rs b/datafusion/catalog/src/table.rs
new file mode 100644
index 0000000000000..792315642a001
--- /dev/null
+++ b/datafusion/catalog/src/table.rs
@@ -0,0 +1,292 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use crate::session::Session;
+use arrow_schema::SchemaRef;
+use async_trait::async_trait;
+use datafusion_common::Result;
+use datafusion_common::{not_impl_err, Constraints, Statistics};
+use datafusion_expr::{
+    CreateExternalTable, Expr, LogicalPlan, TableProviderFilterPushDown, TableType,
+};
+use datafusion_physical_plan::ExecutionPlan;
+
+/// Source table
+#[async_trait]
+pub trait TableProvider: Sync + Send {
+    /// Returns the table provider as [`Any`](std::any::Any) so that it can be
+    /// downcast to a specific implementation.
+    fn as_any(&self) -> &dyn Any;
+
+    /// Get a reference to the schema for this table
+    fn schema(&self) -> SchemaRef;
+
+    /// Get a reference to the constraints of the table.
+    /// Returns:
+    /// - `None` for tables that do not support constraints.
+    /// - `Some(&Constraints)` for tables supporting constraints.
+    /// Therefore, a `Some(&Constraints::empty())` return value indicates that
+    /// this table supports constraints, but there are no constraints.
+    fn constraints(&self) -> Option<&Constraints> {
+        None
+    }
+
+    /// Get the type of this table for metadata/catalog purposes.
+    fn table_type(&self) -> TableType;
+
+    /// Get the create statement used to create this table, if available.
+    fn get_table_definition(&self) -> Option<&str> {
+        None
+    }
+
+    /// Get the [`LogicalPlan`] of this table, if available
+    fn get_logical_plan(&self) -> Option<&LogicalPlan> {
+        None
+    }
+
+    /// Get the default value for a column, if available.
+    fn get_column_default(&self, _column: &str) -> Option<&Expr> {
+        None
+    }
+
+    /// Create an [`ExecutionPlan`] for scanning the table with optionally
+    /// specified `projection`, `filter` and `limit`, described below.
+    ///
+    /// The `ExecutionPlan` is responsible scanning the datasource's
+    /// partitions in a streaming, parallelized fashion.
+    ///
+    /// # Projection
+    ///
+    /// If specified, only a subset of columns should be returned, in the order
+    /// specified. The projection is a set of indexes of the fields in
+    /// [`Self::schema`].
+    ///
+    /// DataFusion provides the projection to scan only the columns actually
+    /// used in the query to improve performance, an optimization  called
+    /// "Projection Pushdown". Some datasources, such as Parquet, can use this
+    /// information to go significantly faster when only a subset of columns is
+    /// required.
+    ///
+    /// # Filters
+    ///
+    /// A list of boolean filter [`Expr`]s to evaluate *during* the scan, in the
+    /// manner specified by [`Self::supports_filters_pushdown`]. Only rows for
+    /// which *all* of the `Expr`s evaluate to `true` must be returned (aka the
+    /// expressions are `AND`ed together).
+    ///
+    /// To enable filter pushdown you must override
+    /// [`Self::supports_filters_pushdown`] as the default implementation does
+    /// not and `filters` will be empty.
+    ///
+    /// DataFusion pushes filtering into the scans whenever possible
+    /// ("Filter Pushdown"), and depending on the format and the
+    /// implementation of the format, evaluating the predicate during the scan
+    /// can increase performance significantly.
+    ///
+    /// ## Note: Some columns may appear *only* in Filters
+    ///
+    /// In certain cases, a query may only use a certain column in a Filter that
+    /// has been completely pushed down to the scan. In this case, the
+    /// projection will not contain all the columns found in the filter
+    /// expressions.
+    ///
+    /// For example, given the query `SELECT t.a FROM t WHERE t.b > 5`,
+    ///
+    /// ```text
+    /// ┌────────────────────┐
+    /// │  Projection(t.a)   │
+    /// └────────────────────┘
+    ///            ▲
+    ///            │
+    ///            │
+    /// ┌────────────────────┐     Filter     ┌────────────────────┐   Projection    ┌────────────────────┐
+    /// │  Filter(t.b > 5)   │────Pushdown──▶ │  Projection(t.a)   │ ───Pushdown───▶ │  Projection(t.a)   │
+    /// └────────────────────┘                └────────────────────┘                 └────────────────────┘
+    ///            ▲                                     ▲                                      ▲
+    ///            │                                     │                                      │
+    ///            │                                     │                           ┌────────────────────┐
+    /// ┌────────────────────┐                ┌────────────────────┐                 │        Scan        │
+    /// │        Scan        │                │        Scan        │                 │  filter=(t.b > 5)  │
+    /// └────────────────────┘                │  filter=(t.b > 5)  │                 │  projection=(t.a)  │
+    ///                                       └────────────────────┘                 └────────────────────┘
+    ///
+    /// Initial Plan                  If `TableProviderFilterPushDown`           Projection pushdown notes that
+    ///                               returns true, filter pushdown              the scan only needs t.a
+    ///                               pushes the filter into the scan
+    ///                                                                          BUT internally evaluating the
+    ///                                                                          predicate still requires t.b
+    /// ```
+    ///
+    /// # Limit
+    ///
+    /// If `limit` is specified,  must only produce *at least* this many rows,
+    /// (though it may return more).  Like Projection Pushdown and Filter
+    /// Pushdown, DataFusion pushes `LIMIT`s  as far down in the plan as
+    /// possible, called "Limit Pushdown" as some sources can use this
+    /// information to improve their performance. Note that if there are any
+    /// Inexact filters pushed down, the LIMIT cannot be pushed down. This is
+    /// because inexact filters do not guarantee that every filtered row is
+    /// removed, so applying the limit could lead to too few rows being available
+    /// to return as a final result.
+    async fn scan(
+        &self,
+        state: &dyn Session,
+        projection: Option<&Vec<usize>>,
+        filters: &[Expr],
+        limit: Option<usize>,
+    ) -> Result<Arc<dyn ExecutionPlan>>;
+
+    /// Specify if DataFusion should provide filter expressions to the
+    /// TableProvider to apply *during* the scan.
+    ///
+    /// Some TableProviders can evaluate filters more efficiently than the
+    /// `Filter` operator in DataFusion, for example by using an index.
+    ///
+    /// # Parameters and Return Value
+    ///
+    /// The return `Vec` must have one element for each element of the `filters`
+    /// argument. The value of each element indicates if the TableProvider can
+    /// apply the corresponding filter during the scan. The position in the return
+    /// value corresponds to the expression in the `filters` parameter.
+    ///
+    /// If the length of the resulting `Vec` does not match the `filters` input
+    /// an error will be thrown.
+    ///
+    /// Each element in the resulting `Vec` is one of the following:
+    /// * [`Exact`] or [`Inexact`]: The TableProvider can apply the filter
+    /// during scan
+    /// * [`Unsupported`]: The TableProvider cannot apply the filter during scan
+    ///
+    /// By default, this function returns [`Unsupported`] for all filters,
+    /// meaning no filters will be provided to [`Self::scan`].
+    ///
+    /// [`Unsupported`]: TableProviderFilterPushDown::Unsupported
+    /// [`Exact`]: TableProviderFilterPushDown::Exact
+    /// [`Inexact`]: TableProviderFilterPushDown::Inexact
+    /// # Example
+    ///
+    /// ```rust
+    /// # use std::any::Any;
+    /// # use std::sync::Arc;
+    /// # use arrow_schema::SchemaRef;
+    /// # use async_trait::async_trait;
+    /// # use datafusion_catalog::{TableProvider, Session};
+    /// # use datafusion_common::Result;
+    /// # use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
+    /// # use datafusion_physical_plan::ExecutionPlan;
+    /// // Define a struct that implements the TableProvider trait
+    /// struct TestDataSource {}
+    ///
+    /// #[async_trait]
+    /// impl TableProvider for TestDataSource {
+    /// # fn as_any(&self) -> &dyn Any { todo!() }
+    /// # fn schema(&self) -> SchemaRef { todo!() }
+    /// # fn table_type(&self) -> TableType { todo!() }
+    /// # async fn scan(&self, s: &dyn Session, p: Option<&Vec<usize>>, f: &[Expr], l: Option<usize>) -> Result<Arc<dyn ExecutionPlan>> {
+    ///         todo!()
+    /// # }
+    ///     // Override the supports_filters_pushdown to evaluate which expressions
+    ///     // to accept as pushdown predicates.
+    ///     fn supports_filters_pushdown(&self, filters: &[&Expr]) -> Result<Vec<TableProviderFilterPushDown>> {
+    ///         // Process each filter
+    ///         let support: Vec<_> = filters.iter().map(|expr| {
+    ///           match expr {
+    ///             // This example only supports a between expr with a single column named "c1".
+    ///             Expr::Between(between_expr) => {
+    ///                 between_expr.expr
+    ///                 .try_into_col()
+    ///                 .map(|column| {
+    ///                     if column.name == "c1" {
+    ///                         TableProviderFilterPushDown::Exact
+    ///                     } else {
+    ///                         TableProviderFilterPushDown::Unsupported
+    ///                     }
+    ///                 })
+    ///                 // If there is no column in the expr set the filter to unsupported.
+    ///                 .unwrap_or(TableProviderFilterPushDown::Unsupported)
+    ///             }
+    ///             _ => {
+    ///                 // For all other cases return Unsupported.
+    ///                 TableProviderFilterPushDown::Unsupported
+    ///             }
+    ///         }
+    ///     }).collect();
+    ///     Ok(support)
+    ///     }
+    /// }
+    /// ```
+    fn supports_filters_pushdown(
+        &self,
+        filters: &[&Expr],
+    ) -> Result<Vec<TableProviderFilterPushDown>> {
+        Ok(vec![
+            TableProviderFilterPushDown::Unsupported;
+            filters.len()
+        ])
+    }
+
+    /// Get statistics for this table, if available
+    fn statistics(&self) -> Option<Statistics> {
+        None
+    }
+
+    /// Return an [`ExecutionPlan`] to insert data into this table, if
+    /// supported.
+    ///
+    /// The returned plan should return a single row in a UInt64
+    /// column called "count" such as the following
+    ///
+    /// ```text
+    /// +-------+,
+    /// | count |,
+    /// +-------+,
+    /// | 6     |,
+    /// +-------+,
+    /// ```
+    ///
+    /// # See Also
+    ///
+    /// See [`DataSinkExec`] for the common pattern of inserting a
+    /// streams of `RecordBatch`es as files to an ObjectStore.
+    ///
+    /// [`DataSinkExec`]: datafusion_physical_plan::insert::DataSinkExec
+    async fn insert_into(
+        &self,
+        _state: &dyn Session,
+        _input: Arc<dyn ExecutionPlan>,
+        _overwrite: bool,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        not_impl_err!("Insert into not implemented for this table")
+    }
+}
+
+/// A factory which creates [`TableProvider`]s at runtime given a URL.
+///
+/// For example, this can be used to create a table "on the fly"
+/// from a directory of files only when that name is referenced.
+#[async_trait]
+pub trait TableProviderFactory: Sync + Send {
+    /// Create a TableProvider with the given url
+    async fn create(
+        &self,
+        state: &dyn Session,
+        cmd: &CreateExternalTable,
+    ) -> Result<Arc<dyn TableProvider>>;
+}
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 98d501794f775..09b90a56d2aaf 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -98,6 +98,7 @@ bytes = { workspace = true }
 bzip2 = { version = "0.4.3", optional = true }
 chrono = { workspace = true }
 dashmap = { workspace = true }
+datafusion-catalog = { workspace = true }
 datafusion-common = { workspace = true, features = ["object_store"] }
 datafusion-common-runtime = { workspace = true }
 datafusion-execution = { workspace = true }
diff --git a/datafusion/core/src/catalog/information_schema.rs b/datafusion/core/src/catalog_common/information_schema.rs
similarity index 99%
rename from datafusion/core/src/catalog/information_schema.rs
rename to datafusion/core/src/catalog_common/information_schema.rs
index a79f62e742bef..d086ce900cc37 100644
--- a/datafusion/core/src/catalog/information_schema.rs
+++ b/datafusion/core/src/catalog_common/information_schema.rs
@@ -29,8 +29,8 @@ use arrow::{
     record_batch::RecordBatch,
 };
 
+use crate::catalog::{CatalogProviderList, SchemaProvider, TableProvider};
 use crate::datasource::streaming::StreamingTable;
-use crate::datasource::TableProvider;
 use crate::execution::context::TaskContext;
 use crate::logical_expr::TableType;
 use crate::physical_plan::stream::RecordBatchStreamAdapter;
@@ -40,8 +40,6 @@ use crate::{
     physical_plan::streaming::PartitionStream,
 };
 
-use super::{schema::SchemaProvider, CatalogProviderList};
-
 pub(crate) const INFORMATION_SCHEMA: &str = "information_schema";
 pub(crate) const TABLES: &str = "tables";
 pub(crate) const VIEWS: &str = "views";
diff --git a/datafusion/core/src/catalog/listing_schema.rs b/datafusion/core/src/catalog_common/listing_schema.rs
similarity index 98%
rename from datafusion/core/src/catalog/listing_schema.rs
rename to datafusion/core/src/catalog_common/listing_schema.rs
index 373fe788c7218..5b91f963ca244 100644
--- a/datafusion/core/src/catalog/listing_schema.rs
+++ b/datafusion/core/src/catalog_common/listing_schema.rs
@@ -22,9 +22,7 @@ use std::collections::{HashMap, HashSet};
 use std::path::Path;
 use std::sync::{Arc, Mutex};
 
-use crate::catalog::schema::SchemaProvider;
-use crate::datasource::provider::TableProviderFactory;
-use crate::datasource::TableProvider;
+use crate::catalog::{SchemaProvider, TableProvider, TableProviderFactory};
 use crate::execution::context::SessionState;
 
 use datafusion_common::{Constraints, DFSchema, DataFusionError, TableReference};
diff --git a/datafusion/core/src/catalog/memory.rs b/datafusion/core/src/catalog_common/memory.rs
similarity index 97%
rename from datafusion/core/src/catalog/memory.rs
rename to datafusion/core/src/catalog_common/memory.rs
index 3af823913a291..6d8bddec45473 100644
--- a/datafusion/core/src/catalog/memory.rs
+++ b/datafusion/core/src/catalog_common/memory.rs
@@ -18,9 +18,9 @@
 //! [`MemoryCatalogProvider`], [`MemoryCatalogProviderList`]: In-memory
 //! implementations of [`CatalogProviderList`] and [`CatalogProvider`].
 
-use crate::catalog::schema::SchemaProvider;
-use crate::catalog::{CatalogProvider, CatalogProviderList};
-use crate::datasource::TableProvider;
+use crate::catalog::{
+    CatalogProvider, CatalogProviderList, SchemaProvider, TableProvider,
+};
 use async_trait::async_trait;
 use dashmap::DashMap;
 use datafusion_common::{exec_err, DataFusionError};
@@ -201,11 +201,10 @@ impl SchemaProvider for MemorySchemaProvider {
 #[cfg(test)]
 mod test {
     use super::*;
-    use crate::catalog::schema::{MemorySchemaProvider, SchemaProvider};
     use crate::catalog::CatalogProvider;
+    use crate::catalog_common::memory::MemorySchemaProvider;
     use crate::datasource::empty::EmptyTable;
     use crate::datasource::listing::{ListingTable, ListingTableConfig, ListingTableUrl};
-    use crate::datasource::TableProvider;
     use crate::prelude::SessionContext;
     use arrow_schema::Schema;
     use datafusion_common::assert_batches_eq;
diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog_common/mod.rs
similarity index 59%
rename from datafusion/core/src/catalog/mod.rs
rename to datafusion/core/src/catalog_common/mod.rs
index fc50b4214d6dc..b8414378862e4 100644
--- a/datafusion/core/src/catalog/mod.rs
+++ b/datafusion/core/src/catalog_common/mod.rs
@@ -17,11 +17,6 @@
 
 //! Interfaces and default implementations of catalogs and schemas.
 //!
-//! Traits:
-//! * [`CatalogProviderList`]: a collection of `CatalogProvider`s
-//! * [`CatalogProvider`]: a collection of [`SchemaProvider`]s (sometimes called a "database" in other systems)
-//! * [`SchemaProvider`]:  a collection of `TableProvider`s (often called a "schema" in other systems)
-//!
 //! Implementations
 //! * Simple memory based catalog: [`MemoryCatalogProviderList`], [`MemoryCatalogProvider`], [`MemorySchemaProvider`]
 //! * Information schema: [`information_schema`]
@@ -29,180 +24,22 @@
 
 pub mod information_schema;
 pub mod listing_schema;
-mod memory;
-pub mod schema;
+pub mod memory;
 
+pub use crate::catalog::{CatalogProvider, CatalogProviderList, SchemaProvider};
 pub use memory::{
     MemoryCatalogProvider, MemoryCatalogProviderList, MemorySchemaProvider,
 };
-pub use schema::SchemaProvider;
 
 pub use datafusion_sql::{ResolvedTableReference, TableReference};
 
-use datafusion_common::{not_impl_err, Result};
-use std::any::Any;
 use std::collections::BTreeSet;
 use std::ops::ControlFlow;
-use std::sync::Arc;
-
-/// Represent a list of named [`CatalogProvider`]s.
-///
-/// Please see the documentation on `CatalogProvider` for details of
-/// implementing a custom catalog.
-pub trait CatalogProviderList: Sync + Send {
-    /// Returns the catalog list as [`Any`]
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Adds a new catalog to this catalog list
-    /// If a catalog of the same name existed before, it is replaced in the list and returned.
-    fn register_catalog(
-        &self,
-        name: String,
-        catalog: Arc<dyn CatalogProvider>,
-    ) -> Option<Arc<dyn CatalogProvider>>;
-
-    /// Retrieves the list of available catalog names
-    fn catalog_names(&self) -> Vec<String>;
-
-    /// Retrieves a specific catalog by name, provided it exists.
-    fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>;
-}
 
 /// See [`CatalogProviderList`]
 #[deprecated(since = "35.0.0", note = "use [`CatalogProviderList`] instead")]
 pub trait CatalogList: CatalogProviderList {}
 
-/// Represents a catalog, comprising a number of named schemas.
-///
-/// # Catalog Overview
-///
-/// To plan and execute queries, DataFusion needs a "Catalog" that provides
-/// metadata such as which schemas and tables exist, their columns and data
-/// types, and how to access the data.
-///
-/// The Catalog API consists:
-/// * [`CatalogProviderList`]: a collection of `CatalogProvider`s
-/// * [`CatalogProvider`]: a collection of `SchemaProvider`s (sometimes called a "database" in other systems)
-/// * [`SchemaProvider`]:  a collection of `TableProvider`s (often called a "schema" in other systems)
-/// * [`TableProvider]`:  individual tables
-///
-/// # Implementing Catalogs
-///
-/// To implement a catalog, you implement at least one of the [`CatalogProviderList`],
-/// [`CatalogProvider`] and [`SchemaProvider`] traits and register them
-/// appropriately the [`SessionContext`].
-///
-/// [`SessionContext`]: crate::execution::context::SessionContext
-///
-/// DataFusion comes with a simple in-memory catalog implementation,
-/// [`MemoryCatalogProvider`], that is used by default and has no persistence.
-/// DataFusion does not include more complex Catalog implementations because
-/// catalog management is a key design choice for most data systems, and thus
-/// it is unlikely that any general-purpose catalog implementation will work
-/// well across many use cases.
-///
-/// # Implementing "Remote" catalogs
-///
-/// Sometimes catalog information is stored remotely and requires a network call
-/// to retrieve. For example, the [Delta Lake] table format stores table
-/// metadata in files on S3 that must be first downloaded to discover what
-/// schemas and tables exist.
-///
-/// [Delta Lake]: https://delta.io/
-///
-/// The [`CatalogProvider`] can support this use case, but it takes some care.
-/// The planning APIs in DataFusion are not `async` and thus network IO can not
-/// be performed "lazily" / "on demand" during query planning. The rationale for
-/// this design is that using remote procedure calls for all catalog accesses
-/// required for query planning would likely result in multiple network calls
-/// per plan, resulting in very poor planning performance.
-///
-/// To implement [`CatalogProvider`] and [`SchemaProvider`] for remote catalogs,
-/// you need to provide an in memory snapshot of the required metadata. Most
-/// systems typically either already have this information cached locally or can
-/// batch access to the remote catalog to retrieve multiple schemas and tables
-/// in a single network call.
-///
-/// Note that [`SchemaProvider::table`] is an `async` function in order to
-/// simplify implementing simple [`SchemaProvider`]s. For many table formats it
-/// is easy to list all available tables but there is additional non trivial
-/// access required to read table details (e.g. statistics).
-///
-/// The pattern that DataFusion itself uses to plan SQL queries is to walk over
-/// the query to [find all table references],
-/// performing required remote catalog in parallel, and then plans the query
-/// using that snapshot.
-///
-/// [find all table references]: resolve_table_references
-///
-/// # Example Catalog Implementations
-///
-/// Here are some examples of how to implement custom catalogs:
-///
-/// * [`datafusion-cli`]: [`DynamicFileCatalogProvider`] catalog provider
-///   that treats files and directories on a filesystem as tables.
-///
-/// * The [`catalog.rs`]:  a simple directory based catalog.
-///
-///  * [delta-rs]:  [`UnityCatalogProvider`] implementation that can
-///    read from Delta Lake tables
-///
-/// [`datafusion-cli`]: https://datafusion.apache.org/user-guide/cli/index.html
-/// [`DynamicFileCatalogProvider`]: https://github.com/apache/datafusion/blob/31b9b48b08592b7d293f46e75707aad7dadd7cbc/datafusion-cli/src/catalog.rs#L75
-/// [`catalog.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/catalog.rs
-/// [delta-rs]: https://github.com/delta-io/delta-rs
-/// [`UnityCatalogProvider`]: https://github.com/delta-io/delta-rs/blob/951436ecec476ce65b5ed3b58b50fb0846ca7b91/crates/deltalake-core/src/data_catalog/unity/datafusion.rs#L111-L123
-///
-/// [`TableProvider]: crate::datasource::TableProvider
-
-pub trait CatalogProvider: Sync + Send {
-    /// Returns the catalog provider as [`Any`]
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Retrieves the list of available schema names in this catalog.
-    fn schema_names(&self) -> Vec<String>;
-
-    /// Retrieves a specific schema from the catalog by name, provided it exists.
-    fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>>;
-
-    /// Adds a new schema to this catalog.
-    ///
-    /// If a schema of the same name existed before, it is replaced in
-    /// the catalog and returned.
-    ///
-    /// By default returns a "Not Implemented" error
-    fn register_schema(
-        &self,
-        name: &str,
-        schema: Arc<dyn SchemaProvider>,
-    ) -> Result<Option<Arc<dyn SchemaProvider>>> {
-        // use variables to avoid unused variable warnings
-        let _ = name;
-        let _ = schema;
-        not_impl_err!("Registering new schemas is not supported")
-    }
-
-    /// Removes a schema from this catalog. Implementations of this method should return
-    /// errors if the schema exists but cannot be dropped. For example, in DataFusion's
-    /// default in-memory catalog, [`MemoryCatalogProvider`], a non-empty schema
-    /// will only be successfully dropped when `cascade` is true.
-    /// This is equivalent to how DROP SCHEMA works in PostgreSQL.
-    ///
-    /// Implementations of this method should return None if schema with `name`
-    /// does not exist.
-    ///
-    /// By default returns a "Not Implemented" error
-    fn deregister_schema(
-        &self,
-        _name: &str,
-        _cascade: bool,
-    ) -> Result<Option<Arc<dyn SchemaProvider>>> {
-        not_impl_err!("Deregistering new schemas is not supported")
-    }
-}
-
 /// Collects all tables and views referenced in the SQL statement. CTEs are collected separately.
 /// This can be used to determine which tables need to be in the catalog for a query to be planned.
 ///
@@ -215,7 +52,7 @@ pub trait CatalogProvider: Sync + Send {
 ///
 /// ```
 /// # use datafusion_sql::parser::DFParser;
-/// # use datafusion::catalog::resolve_table_references;
+/// # use datafusion::catalog_common::resolve_table_references;
 /// let query = "SELECT a FROM foo where x IN (SELECT y FROM bar)";
 /// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
 /// let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
@@ -229,7 +66,7 @@ pub trait CatalogProvider: Sync + Send {
 ///  
 /// ```  
 /// # use datafusion_sql::parser::DFParser;  
-/// # use datafusion::catalog::resolve_table_references;  
+/// # use datafusion::catalog_common::resolve_table_references;
 /// let query = "with my_cte as (values (1), (2)) SELECT * from my_cte;";  
 /// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();  
 /// let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();  
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index e1021d06261f2..8feccfb43d6b4 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -58,6 +58,7 @@ use datafusion_expr::{
 use datafusion_functions_aggregate::expr_fn::{avg, count, median, stddev, sum};
 
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 
 /// Contains options that control how data is
 /// written out from a DataFrame
@@ -1657,7 +1658,7 @@ impl TableProvider for DataFrameTableProvider {
 
     async fn scan(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         projection: Option<&Vec<usize>>,
         filters: &[Expr],
         limit: Option<usize>,
diff --git a/datafusion/core/src/datasource/cte_worktable.rs b/datafusion/core/src/datasource/cte_worktable.rs
index afc4536f068e2..d7d224828dda0 100644
--- a/datafusion/core/src/datasource/cte_worktable.rs
+++ b/datafusion/core/src/datasource/cte_worktable.rs
@@ -22,6 +22,7 @@ use std::sync::Arc;
 
 use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 use datafusion_physical_plan::work_table::WorkTableExec;
 
 use crate::{
@@ -31,7 +32,6 @@ use crate::{
 };
 
 use crate::datasource::{TableProvider, TableType};
-use crate::execution::context::SessionState;
 
 /// The temporary working table where the previous iteration of a recursive query is stored
 /// Naming is based on PostgreSQL's implementation.
@@ -77,7 +77,7 @@ impl TableProvider for CteWorkTable {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         _projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         _limit: Option<usize>,
diff --git a/datafusion/core/src/datasource/empty.rs b/datafusion/core/src/datasource/empty.rs
index 5100987520ee1..d831dd0060318 100644
--- a/datafusion/core/src/datasource/empty.rs
+++ b/datafusion/core/src/datasource/empty.rs
@@ -22,11 +22,11 @@ use std::sync::Arc;
 
 use arrow::datatypes::*;
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 use datafusion_common::project_schema;
 
 use crate::datasource::{TableProvider, TableType};
 use crate::error::Result;
-use crate::execution::context::SessionState;
 use crate::logical_expr::Expr;
 use crate::physical_plan::{empty::EmptyExec, ExecutionPlan};
 
@@ -69,7 +69,7 @@ impl TableProvider for EmptyTable {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         _limit: Option<usize>,
diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
index 500f20af474f9..7154b50b9dd9e 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -73,7 +73,7 @@ pub trait FileFormatFactory: Sync + Send + GetExt + Debug {
 /// from the [`TableProvider`]. This helps code re-utilization across
 /// providers that support the same file formats.
 ///
-/// [`TableProvider`]: crate::datasource::provider::TableProvider
+/// [`TableProvider`]: crate::catalog::TableProvider
 #[async_trait]
 pub trait FileFormat: Send + Sync + fmt::Debug {
     /// Returns the table provider as [`Any`](std::any::Any) so that it can be
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index b91a4bd09c550..3af4d41bcf037 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -24,9 +24,8 @@ use std::{any::Any, sync::Arc};
 use super::helpers::{expr_applicable_for_cols, pruned_partition_list, split_files};
 use super::PartitionedFile;
 
-use crate::datasource::{
-    create_ordering, get_statistics_with_limit, TableProvider, TableType,
-};
+use crate::catalog::TableProvider;
+use crate::datasource::{create_ordering, get_statistics_with_limit, TableType};
 use crate::datasource::{
     file_format::{file_compression_type::FileCompressionType, FileFormat},
     listing::ListingTableUrl,
@@ -52,6 +51,7 @@ use datafusion_physical_expr::{
 };
 
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 use futures::{future, stream, StreamExt, TryStreamExt};
 use itertools::Itertools;
 use object_store::ObjectStore;
@@ -736,13 +736,16 @@ impl TableProvider for ListingTable {
 
     async fn scan(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         projection: Option<&Vec<usize>>,
         filters: &[Expr],
         limit: Option<usize>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let (mut partitioned_file_lists, statistics) =
-            self.list_files_for_scan(state, filters, limit).await?;
+        // TODO remove downcast_ref from here?
+        let session_state = state.as_any().downcast_ref::<SessionState>().unwrap();
+        let (mut partitioned_file_lists, statistics) = self
+            .list_files_for_scan(session_state, filters, limit)
+            .await?;
 
         // if no files need to be read, return an `EmptyExec`
         if partitioned_file_lists.is_empty() {
@@ -805,7 +808,7 @@ impl TableProvider for ListingTable {
         self.options
             .format
             .create_physical_plan(
-                state,
+                session_state,
                 FileScanConfig::new(object_store_url, Arc::clone(&self.file_schema))
                     .with_file_groups(partitioned_file_lists)
                     .with_statistics(statistics)
@@ -852,7 +855,7 @@ impl TableProvider for ListingTable {
 
     async fn insert_into(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         input: Arc<dyn ExecutionPlan>,
         overwrite: bool,
     ) -> Result<Arc<dyn ExecutionPlan>> {
@@ -878,8 +881,10 @@ impl TableProvider for ListingTable {
         // Get the object store for the table path.
         let store = state.runtime_env().object_store(table_path)?;
 
+        // TODO remove downcast_ref from here?
+        let session_state = state.as_any().downcast_ref::<SessionState>().unwrap();
         let file_list_stream = pruned_partition_list(
-            state,
+            session_state,
             store.as_ref(),
             table_path,
             &[],
@@ -890,7 +895,7 @@ impl TableProvider for ListingTable {
 
         let file_groups = file_list_stream.try_collect::<Vec<_>>().await?;
         let keep_partition_by_columns =
-            state.config().options().execution.keep_partition_by_columns;
+            state.config_options().execution.keep_partition_by_columns;
 
         // Sink related option, apart from format
         let config = FileSinkConfig {
@@ -926,7 +931,7 @@ impl TableProvider for ListingTable {
 
         self.options()
             .format
-            .create_writer_physical_plan(input, state, config, order_requirements)
+            .create_writer_physical_plan(input, session_state, config, order_requirements)
             .await
     }
 
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index 1d4d08481895b..ce52dd98166e2 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -20,11 +20,10 @@
 use std::path::Path;
 use std::sync::Arc;
 
+use crate::catalog::{TableProvider, TableProviderFactory};
 use crate::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
-use crate::datasource::provider::TableProviderFactory;
-use crate::datasource::TableProvider;
 use crate::execution::context::SessionState;
 
 use arrow::datatypes::{DataType, SchemaRef};
@@ -33,6 +32,7 @@ use datafusion_common::{config_datafusion_err, Result};
 use datafusion_expr::CreateExternalTable;
 
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 
 /// A `TableProviderFactory` capable of creating new `ListingTable`s
 #[derive(Debug, Default)]
@@ -49,16 +49,18 @@ impl ListingTableFactory {
 impl TableProviderFactory for ListingTableFactory {
     async fn create(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         cmd: &CreateExternalTable,
     ) -> Result<Arc<dyn TableProvider>> {
-        let file_format = state
+        // TODO remove downcast_ref from here. Should file format factory be an extension to session state?
+        let session_state = state.as_any().downcast_ref::<SessionState>().unwrap();
+        let file_format = session_state
             .get_file_format_factory(cmd.file_type.as_str())
             .ok_or(config_datafusion_err!(
                 "Unable to create table with format {}! Could not find FileFormat.",
                 cmd.file_type
             ))?
-            .create(state, &cmd.options)?;
+            .create(session_state, &cmd.options)?;
 
         let file_extension = get_extension(cmd.location.as_str());
 
@@ -114,10 +116,12 @@ impl TableProviderFactory for ListingTableFactory {
             .with_table_partition_cols(table_partition_cols)
             .with_file_sort_order(cmd.order_exprs.clone());
 
-        options.validate_partitions(state, &table_path).await?;
+        options
+            .validate_partitions(session_state, &table_path)
+            .await?;
 
         let resolved_schema = match provided_schema {
-            None => options.infer_schema(state, &table_path).await?,
+            None => options.infer_schema(session_state, &table_path).await?,
             Some(s) => s,
         };
         let config = ListingTableConfig::new(table_path)
diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs
index 5c49282095598..44e01e71648a0 100644
--- a/datafusion/core/src/datasource/memory.rs
+++ b/datafusion/core/src/datasource/memory.rs
@@ -42,6 +42,7 @@ use datafusion_execution::TaskContext;
 use datafusion_physical_plan::metrics::MetricsSet;
 
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 use futures::StreamExt;
 use log::debug;
 use parking_lot::Mutex;
@@ -206,7 +207,7 @@ impl TableProvider for MemTable {
 
     async fn scan(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         _limit: Option<usize>,
@@ -258,7 +259,7 @@ impl TableProvider for MemTable {
     /// * A plan that returns the number of rows written.
     async fn insert_into(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         input: Arc<dyn ExecutionPlan>,
         overwrite: bool,
     ) -> Result<Arc<dyn ExecutionPlan>> {
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index c28788eed4582..1c9924735735d 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -43,8 +43,8 @@ pub use self::default_table_source::{
     provider_as_source, source_as_provider, DefaultTableSource,
 };
 pub use self::memory::MemTable;
-pub use self::provider::TableProvider;
 pub use self::view::ViewTable;
+pub use crate::catalog::TableProvider;
 pub use crate::logical_expr::TableType;
 pub use statistics::get_statistics_with_limit;
 
diff --git a/datafusion/core/src/datasource/provider.rs b/datafusion/core/src/datasource/provider.rs
index 7c58aded31081..9d4b67632a019 100644
--- a/datafusion/core/src/datasource/provider.rs
+++ b/datafusion/core/src/datasource/provider.rs
@@ -17,285 +17,17 @@
 
 //! Data source traits
 
-use std::any::Any;
 use std::sync::Arc;
 
 use async_trait::async_trait;
-use datafusion_common::{not_impl_err, Constraints, Statistics};
-use datafusion_expr::{CreateExternalTable, LogicalPlan};
+use datafusion_catalog::Session;
+use datafusion_expr::CreateExternalTable;
 pub use datafusion_expr::{TableProviderFilterPushDown, TableType};
 
-use crate::arrow::datatypes::SchemaRef;
+use crate::catalog::{TableProvider, TableProviderFactory};
 use crate::datasource::listing_table_factory::ListingTableFactory;
 use crate::datasource::stream::StreamTableFactory;
 use crate::error::Result;
-use crate::execution::context::SessionState;
-use crate::logical_expr::Expr;
-use crate::physical_plan::ExecutionPlan;
-
-/// Source table
-#[async_trait]
-pub trait TableProvider: Sync + Send {
-    /// Returns the table provider as [`Any`](std::any::Any) so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Get a reference to the schema for this table
-    fn schema(&self) -> SchemaRef;
-
-    /// Get a reference to the constraints of the table.
-    /// Returns:
-    /// - `None` for tables that do not support constraints.
-    /// - `Some(&Constraints)` for tables supporting constraints.
-    /// Therefore, a `Some(&Constraints::empty())` return value indicates that
-    /// this table supports constraints, but there are no constraints.
-    fn constraints(&self) -> Option<&Constraints> {
-        None
-    }
-
-    /// Get the type of this table for metadata/catalog purposes.
-    fn table_type(&self) -> TableType;
-
-    /// Get the create statement used to create this table, if available.
-    fn get_table_definition(&self) -> Option<&str> {
-        None
-    }
-
-    /// Get the [`LogicalPlan`] of this table, if available
-    fn get_logical_plan(&self) -> Option<&LogicalPlan> {
-        None
-    }
-
-    /// Get the default value for a column, if available.
-    fn get_column_default(&self, _column: &str) -> Option<&Expr> {
-        None
-    }
-
-    /// Create an [`ExecutionPlan`] for scanning the table with optionally
-    /// specified `projection`, `filter` and `limit`, described below.
-    ///
-    /// The `ExecutionPlan` is responsible scanning the datasource's
-    /// partitions in a streaming, parallelized fashion.
-    ///
-    /// # Projection
-    ///
-    /// If specified, only a subset of columns should be returned, in the order
-    /// specified. The projection is a set of indexes of the fields in
-    /// [`Self::schema`].
-    ///
-    /// DataFusion provides the projection to scan only the columns actually
-    /// used in the query to improve performance, an optimization  called
-    /// "Projection Pushdown". Some datasources, such as Parquet, can use this
-    /// information to go significantly faster when only a subset of columns is
-    /// required.
-    ///
-    /// # Filters
-    ///
-    /// A list of boolean filter [`Expr`]s to evaluate *during* the scan, in the
-    /// manner specified by [`Self::supports_filters_pushdown`]. Only rows for
-    /// which *all* of the `Expr`s evaluate to `true` must be returned (aka the
-    /// expressions are `AND`ed together).
-    ///
-    /// To enable filter pushdown you must override
-    /// [`Self::supports_filters_pushdown`] as the default implementation does
-    /// not and `filters` will be empty.
-    ///
-    /// DataFusion pushes filtering into the scans whenever possible
-    /// ("Filter Pushdown"), and depending on the format and the
-    /// implementation of the format, evaluating the predicate during the scan
-    /// can increase performance significantly.
-    ///
-    /// ## Note: Some columns may appear *only* in Filters
-    ///
-    /// In certain cases, a query may only use a certain column in a Filter that
-    /// has been completely pushed down to the scan. In this case, the
-    /// projection will not contain all the columns found in the filter
-    /// expressions.
-    ///
-    /// For example, given the query `SELECT t.a FROM t WHERE t.b > 5`,
-    ///
-    /// ```text
-    /// ┌────────────────────┐
-    /// │  Projection(t.a)   │
-    /// └────────────────────┘
-    ///            ▲
-    ///            │
-    ///            │
-    /// ┌────────────────────┐     Filter     ┌────────────────────┐   Projection    ┌────────────────────┐
-    /// │  Filter(t.b > 5)   │────Pushdown──▶ │  Projection(t.a)   │ ───Pushdown───▶ │  Projection(t.a)   │
-    /// └────────────────────┘                └────────────────────┘                 └────────────────────┘
-    ///            ▲                                     ▲                                      ▲
-    ///            │                                     │                                      │
-    ///            │                                     │                           ┌────────────────────┐
-    /// ┌────────────────────┐                ┌────────────────────┐                 │        Scan        │
-    /// │        Scan        │                │        Scan        │                 │  filter=(t.b > 5)  │
-    /// └────────────────────┘                │  filter=(t.b > 5)  │                 │  projection=(t.a)  │
-    ///                                       └────────────────────┘                 └────────────────────┘
-    ///
-    /// Initial Plan                  If `TableProviderFilterPushDown`           Projection pushdown notes that
-    ///                               returns true, filter pushdown              the scan only needs t.a
-    ///                               pushes the filter into the scan
-    ///                                                                          BUT internally evaluating the
-    ///                                                                          predicate still requires t.b
-    /// ```
-    ///
-    /// # Limit
-    ///
-    /// If `limit` is specified,  must only produce *at least* this many rows,
-    /// (though it may return more).  Like Projection Pushdown and Filter
-    /// Pushdown, DataFusion pushes `LIMIT`s  as far down in the plan as
-    /// possible, called "Limit Pushdown" as some sources can use this
-    /// information to improve their performance. Note that if there are any
-    /// Inexact filters pushed down, the LIMIT cannot be pushed down. This is
-    /// because inexact filters do not guarantee that every filtered row is
-    /// removed, so applying the limit could lead to too few rows being available
-    /// to return as a final result.
-    async fn scan(
-        &self,
-        state: &SessionState,
-        projection: Option<&Vec<usize>>,
-        filters: &[Expr],
-        limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-
-    /// Specify if DataFusion should provide filter expressions to the
-    /// TableProvider to apply *during* the scan.
-    ///
-    /// Some TableProviders can evaluate filters more efficiently than the
-    /// `Filter` operator in DataFusion, for example by using an index.
-    ///
-    /// # Parameters and Return Value
-    ///
-    /// The return `Vec` must have one element for each element of the `filters`
-    /// argument. The value of each element indicates if the TableProvider can
-    /// apply the corresponding filter during the scan. The position in the return
-    /// value corresponds to the expression in the `filters` parameter.
-    ///
-    /// If the length of the resulting `Vec` does not match the `filters` input
-    /// an error will be thrown.
-    ///
-    /// Each element in the resulting `Vec` is one of the following:
-    /// * [`Exact`] or [`Inexact`]: The TableProvider can apply the filter
-    /// during scan
-    /// * [`Unsupported`]: The TableProvider cannot apply the filter during scan
-    ///
-    /// By default, this function returns [`Unsupported`] for all filters,
-    /// meaning no filters will be provided to [`Self::scan`].
-    ///
-    /// [`Unsupported`]: TableProviderFilterPushDown::Unsupported
-    /// [`Exact`]: TableProviderFilterPushDown::Exact
-    /// [`Inexact`]: TableProviderFilterPushDown::Inexact
-    /// # Example
-    ///
-    /// ```rust
-    /// # use std::any::Any;
-    /// # use std::sync::Arc;
-    /// # use arrow_schema::SchemaRef;
-    /// # use async_trait::async_trait;
-    /// # use datafusion::datasource::TableProvider;
-    /// # use datafusion::error::{Result, DataFusionError};
-    /// # use datafusion::execution::context::SessionState;
-    /// # use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
-    /// # use datafusion_physical_plan::ExecutionPlan;
-    /// // Define a struct that implements the TableProvider trait
-    /// struct TestDataSource {}
-    ///
-    /// #[async_trait]
-    /// impl TableProvider for TestDataSource {
-    /// # fn as_any(&self) -> &dyn Any { todo!() }
-    /// # fn schema(&self) -> SchemaRef { todo!() }
-    /// # fn table_type(&self) -> TableType { todo!() }
-    /// # async fn scan(&self, s: &SessionState, p: Option<&Vec<usize>>, f: &[Expr], l: Option<usize>) -> Result<Arc<dyn ExecutionPlan>> {
-    ///         todo!()
-    /// # }
-    ///     // Override the supports_filters_pushdown to evaluate which expressions
-    ///     // to accept as pushdown predicates.
-    ///     fn supports_filters_pushdown(&self, filters: &[&Expr]) -> Result<Vec<TableProviderFilterPushDown>> {
-    ///         // Process each filter
-    ///         let support: Vec<_> = filters.iter().map(|expr| {
-    ///           match expr {
-    ///             // This example only supports a between expr with a single column named "c1".
-    ///             Expr::Between(between_expr) => {
-    ///                 between_expr.expr
-    ///                 .try_into_col()
-    ///                 .map(|column| {
-    ///                     if column.name == "c1" {
-    ///                         TableProviderFilterPushDown::Exact
-    ///                     } else {
-    ///                         TableProviderFilterPushDown::Unsupported
-    ///                     }
-    ///                 })
-    ///                 // If there is no column in the expr set the filter to unsupported.
-    ///                 .unwrap_or(TableProviderFilterPushDown::Unsupported)
-    ///             }
-    ///             _ => {
-    ///                 // For all other cases return Unsupported.
-    ///                 TableProviderFilterPushDown::Unsupported
-    ///             }
-    ///         }
-    ///     }).collect();
-    ///     Ok(support)
-    ///     }
-    /// }
-    /// ```
-    fn supports_filters_pushdown(
-        &self,
-        filters: &[&Expr],
-    ) -> Result<Vec<TableProviderFilterPushDown>> {
-        Ok(vec![
-            TableProviderFilterPushDown::Unsupported;
-            filters.len()
-        ])
-    }
-
-    /// Get statistics for this table, if available
-    fn statistics(&self) -> Option<Statistics> {
-        None
-    }
-
-    /// Return an [`ExecutionPlan`] to insert data into this table, if
-    /// supported.
-    ///
-    /// The returned plan should return a single row in a UInt64
-    /// column called "count" such as the following
-    ///
-    /// ```text
-    /// +-------+,
-    /// | count |,
-    /// +-------+,
-    /// | 6     |,
-    /// +-------+,
-    /// ```
-    ///
-    /// # See Also
-    ///
-    /// See [`DataSinkExec`] for the common pattern of inserting a
-    /// streams of `RecordBatch`es as files to an ObjectStore.
-    ///
-    /// [`DataSinkExec`]: crate::physical_plan::insert::DataSinkExec
-    async fn insert_into(
-        &self,
-        _state: &SessionState,
-        _input: Arc<dyn ExecutionPlan>,
-        _overwrite: bool,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        not_impl_err!("Insert into not implemented for this table")
-    }
-}
-
-/// A factory which creates [`TableProvider`]s at runtime given a URL.
-///
-/// For example, this can be used to create a table "on the fly"
-/// from a directory of files only when that name is referenced.
-#[async_trait]
-pub trait TableProviderFactory: Sync + Send {
-    /// Create a TableProvider with the given url
-    async fn create(
-        &self,
-        state: &SessionState,
-        cmd: &CreateExternalTable,
-    ) -> Result<Arc<dyn TableProvider>>;
-}
 
 /// The default [`TableProviderFactory`]
 ///
@@ -318,7 +50,7 @@ impl DefaultTableFactory {
 impl TableProviderFactory for DefaultTableFactory {
     async fn create(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         cmd: &CreateExternalTable,
     ) -> Result<Arc<dyn TableProvider>> {
         let mut unbounded = cmd.unbounded;
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 9cfdb7bb1168f..682565aea9096 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -25,9 +25,8 @@ use std::path::PathBuf;
 use std::str::FromStr;
 use std::sync::Arc;
 
-use crate::datasource::provider::TableProviderFactory;
-use crate::datasource::{create_ordering, TableProvider};
-use crate::execution::context::SessionState;
+use crate::catalog::{TableProvider, TableProviderFactory};
+use crate::datasource::create_ordering;
 
 use arrow_array::{RecordBatch, RecordBatchReader, RecordBatchWriter};
 use arrow_schema::SchemaRef;
@@ -42,6 +41,7 @@ use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
 use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 use futures::StreamExt;
 
 /// A [`TableProviderFactory`] for [`StreamTable`]
@@ -52,7 +52,7 @@ pub struct StreamTableFactory {}
 impl TableProviderFactory for StreamTableFactory {
     async fn create(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         cmd: &CreateExternalTable,
     ) -> Result<Arc<dyn TableProvider>> {
         let schema: SchemaRef = Arc::new(cmd.schema.as_ref().into());
@@ -322,7 +322,7 @@ impl TableProvider for StreamTable {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         limit: Option<usize>,
@@ -347,7 +347,7 @@ impl TableProvider for StreamTable {
 
     async fn insert_into(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         input: Arc<dyn ExecutionPlan>,
         _overwrite: bool,
     ) -> Result<Arc<dyn ExecutionPlan>> {
diff --git a/datafusion/core/src/datasource/streaming.rs b/datafusion/core/src/datasource/streaming.rs
index 205faee43334f..f9ded357b5a55 100644
--- a/datafusion/core/src/datasource/streaming.rs
+++ b/datafusion/core/src/datasource/streaming.rs
@@ -23,14 +23,13 @@ use std::sync::Arc;
 use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
 
-use datafusion_common::{plan_err, Result};
-use datafusion_expr::{Expr, TableType};
-use log::debug;
-
 use crate::datasource::TableProvider;
-use crate::execution::context::SessionState;
 use crate::physical_plan::streaming::{PartitionStream, StreamingTableExec};
 use crate::physical_plan::ExecutionPlan;
+use datafusion_catalog::Session;
+use datafusion_common::{plan_err, Result};
+use datafusion_expr::{Expr, TableType};
+use log::debug;
 
 /// A [`TableProvider`] that streams a set of [`PartitionStream`]
 pub struct StreamingTable {
@@ -85,7 +84,7 @@ impl TableProvider for StreamingTable {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         limit: Option<usize>,
diff --git a/datafusion/core/src/datasource/view.rs b/datafusion/core/src/datasource/view.rs
index 3f024a6b4cb71..98d118c027b7a 100644
--- a/datafusion/core/src/datasource/view.rs
+++ b/datafusion/core/src/datasource/view.rs
@@ -21,6 +21,7 @@ use std::{any::Any, sync::Arc};
 
 use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 use datafusion_common::Column;
 use datafusion_expr::{LogicalPlanBuilder, TableProviderFilterPushDown};
 
@@ -31,7 +32,6 @@ use crate::{
 };
 
 use crate::datasource::{TableProvider, TableType};
-use crate::execution::context::SessionState;
 
 /// An implementation of `TableProvider` that uses another logical plan.
 pub struct ViewTable {
@@ -103,7 +103,7 @@ impl TableProvider for ViewTable {
 
     async fn scan(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         projection: Option<&Vec<usize>>,
         filters: &[Expr],
         limit: Option<usize>,
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 18db4dc8eb0a1..9b889c37ab522 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -23,16 +23,18 @@ use std::sync::{Arc, Weak};
 
 use super::options::ReadOptions;
 use crate::{
-    catalog::listing_schema::ListingSchemaProvider,
-    catalog::schema::MemorySchemaProvider,
-    catalog::{CatalogProvider, CatalogProviderList, MemoryCatalogProvider},
+    catalog::{
+        CatalogProvider, CatalogProviderList, TableProvider, TableProviderFactory,
+    },
+    catalog_common::listing_schema::ListingSchemaProvider,
+    catalog_common::memory::MemorySchemaProvider,
+    catalog_common::MemoryCatalogProvider,
     dataframe::DataFrame,
     datasource::{
         function::{TableFunction, TableFunctionImpl},
         listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl},
-        provider::TableProviderFactory,
     },
-    datasource::{provider_as_source, MemTable, TableProvider, ViewTable},
+    datasource::{provider_as_source, MemTable, ViewTable},
     error::{DataFusionError, Result},
     execution::{options::ArrowReadOptions, runtime_env::RuntimeEnv, FunctionRegistry},
     logical_expr::AggregateUDF,
@@ -1579,7 +1581,7 @@ mod tests {
 
     use datafusion_common_runtime::SpawnedTask;
 
-    use crate::catalog::schema::SchemaProvider;
+    use crate::catalog::SchemaProvider;
     use crate::execution::session_state::SessionStateBuilder;
     use crate::physical_planner::PhysicalPlanner;
     use async_trait::async_trait;
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index cc2b44cf1933b..226e8085341e5 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -17,13 +17,14 @@
 
 //! [`SessionState`]: information required to run queries in a session
 
-use crate::catalog::information_schema::{InformationSchemaProvider, INFORMATION_SCHEMA};
-use crate::catalog::schema::SchemaProvider;
-use crate::catalog::{CatalogProviderList, MemoryCatalogProviderList};
+use crate::catalog::{CatalogProviderList, SchemaProvider, TableProviderFactory};
+use crate::catalog_common::information_schema::{
+    InformationSchemaProvider, INFORMATION_SCHEMA,
+};
+use crate::catalog_common::MemoryCatalogProviderList;
 use crate::datasource::cte_worktable::CteWorkTable;
 use crate::datasource::file_format::{format_as_file_type, FileFormatFactory};
 use crate::datasource::function::{TableFunction, TableFunctionImpl};
-use crate::datasource::provider::TableProviderFactory;
 use crate::datasource::provider_as_source;
 use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner};
 use crate::execution::SessionStateDefaults;
@@ -32,6 +33,7 @@ use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
 use arrow_schema::{DataType, SchemaRef};
 use async_trait::async_trait;
 use chrono::{DateTime, Utc};
+use datafusion_catalog::Session;
 use datafusion_common::alias::AliasGenerator;
 use datafusion_common::config::{ConfigExtension, ConfigOptions, TableOptions};
 use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan};
@@ -68,6 +70,7 @@ use itertools::Itertools;
 use log::{debug, info};
 use sqlparser::ast::Expr as SQLExpr;
 use sqlparser::dialect::dialect_from_str;
+use std::any::Any;
 use std::collections::hash_map::Entry;
 use std::collections::{HashMap, HashSet};
 use std::fmt::Debug;
@@ -144,7 +147,7 @@ pub struct SessionState {
     /// `CREATE EXTERNAL TABLE ... STORED AS <FORMAT>` for custom file
     /// formats other than those built into DataFusion
     ///
-    /// [`TableProvider`]: crate::datasource::provider::TableProvider
+    /// [`TableProvider`]: crate::catalog::TableProvider
     table_factories: HashMap<String, Arc<dyn TableProviderFactory>>,
     /// Runtime environment
     runtime_env: Arc<RuntimeEnv>,
@@ -180,6 +183,56 @@ impl Debug for SessionState {
     }
 }
 
+#[async_trait]
+impl Session for SessionState {
+    fn session_id(&self) -> &str {
+        self.session_id()
+    }
+
+    fn config(&self) -> &SessionConfig {
+        self.config()
+    }
+
+    async fn create_physical_plan(
+        &self,
+        logical_plan: &LogicalPlan,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        self.create_physical_plan(logical_plan).await
+    }
+
+    fn create_physical_expr(
+        &self,
+        expr: Expr,
+        df_schema: &DFSchema,
+    ) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
+        self.create_physical_expr(expr, df_schema)
+    }
+
+    fn scalar_functions(&self) -> &HashMap<String, Arc<ScalarUDF>> {
+        self.scalar_functions()
+    }
+
+    fn aggregate_functions(&self) -> &HashMap<String, Arc<AggregateUDF>> {
+        self.aggregate_functions()
+    }
+
+    fn window_functions(&self) -> &HashMap<String, Arc<WindowUDF>> {
+        self.window_functions()
+    }
+
+    fn runtime_env(&self) -> &Arc<RuntimeEnv> {
+        self.runtime_env()
+    }
+
+    fn execution_props(&self) -> &ExecutionProps {
+        self.execution_props()
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
 impl SessionState {
     /// Returns new [`SessionState`] using the provided
     /// [`SessionConfig`] and [`RuntimeEnv`].
@@ -465,14 +518,14 @@ impl SessionState {
     ///
     /// See [`catalog::resolve_table_references`] for more information.
     ///
-    /// [`catalog::resolve_table_references`]: crate::catalog::resolve_table_references
+    /// [`catalog::resolve_table_references`]: crate::catalog_common::resolve_table_references
     pub fn resolve_table_references(
         &self,
         statement: &datafusion_sql::parser::Statement,
     ) -> datafusion_common::Result<Vec<TableReference>> {
         let enable_ident_normalization =
             self.config.options().sql_parser.enable_ident_normalization;
-        let (table_refs, _) = crate::catalog::resolve_table_references(
+        let (table_refs, _) = crate::catalog_common::resolve_table_references(
             statement,
             enable_ident_normalization,
         )?;
diff --git a/datafusion/core/src/execution/session_state_defaults.rs b/datafusion/core/src/execution/session_state_defaults.rs
index b7e7b5f0955f1..07420afe842f7 100644
--- a/datafusion/core/src/execution/session_state_defaults.rs
+++ b/datafusion/core/src/execution/session_state_defaults.rs
@@ -15,8 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::catalog::listing_schema::ListingSchemaProvider;
-use crate::catalog::{CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider};
+use crate::catalog::{CatalogProvider, TableProviderFactory};
+use crate::catalog_common::listing_schema::ListingSchemaProvider;
+use crate::catalog_common::{MemoryCatalogProvider, MemorySchemaProvider};
 use crate::datasource::file_format::arrow::ArrowFormatFactory;
 use crate::datasource::file_format::avro::AvroFormatFactory;
 use crate::datasource::file_format::csv::CsvFormatFactory;
@@ -24,7 +25,7 @@ use crate::datasource::file_format::json::JsonFormatFactory;
 #[cfg(feature = "parquet")]
 use crate::datasource::file_format::parquet::ParquetFormatFactory;
 use crate::datasource::file_format::FileFormatFactory;
-use crate::datasource::provider::{DefaultTableFactory, TableProviderFactory};
+use crate::datasource::provider::DefaultTableFactory;
 use crate::execution::context::SessionState;
 #[cfg(feature = "nested_expressions")]
 use crate::functions_nested;
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 59a1106462766..cf5a184e34160 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -500,7 +500,7 @@ pub const DATAFUSION_VERSION: &str = env!("CARGO_PKG_VERSION");
 extern crate core;
 extern crate sqlparser;
 
-pub mod catalog;
+pub mod catalog_common;
 pub mod dataframe;
 pub mod datasource;
 pub mod error;
@@ -535,6 +535,11 @@ pub use common::config;
 
 // NB datafusion execution is re-exported in the `execution` module
 
+/// re-export of [`datafusion_catalog`] crate
+pub mod catalog {
+    pub use datafusion_catalog::*;
+}
+
 /// re-export of [`datafusion_expr`] crate
 pub mod logical_expr {
     pub use datafusion_expr::*;
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index ba0509f3f51ac..042febf32fd19 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -29,12 +29,12 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
+use crate::catalog::{TableProvider, TableProviderFactory};
 use crate::dataframe::DataFrame;
-use crate::datasource::provider::TableProviderFactory;
 use crate::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable};
-use crate::datasource::{empty::EmptyTable, provider_as_source, TableProvider};
+use crate::datasource::{empty::EmptyTable, provider_as_source};
 use crate::error::Result;
-use crate::execution::context::{SessionState, TaskContext};
+use crate::execution::context::TaskContext;
 use crate::logical_expr::{LogicalPlanBuilder, UNNAMED_TABLE};
 use crate::physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
@@ -49,9 +49,9 @@ use datafusion_expr::{CreateExternalTable, Expr, TableType};
 use datafusion_physical_expr::EquivalenceProperties;
 
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 use futures::Stream;
 use tempfile::TempDir;
-
 // backwards compatibility
 #[cfg(feature = "parquet")]
 pub use datafusion_common::test_util::parquet_test_data;
@@ -177,7 +177,7 @@ pub struct TestTableFactory {}
 impl TableProviderFactory for TestTableFactory {
     async fn create(
         &self,
-        _: &SessionState,
+        _: &dyn Session,
         cmd: &CreateExternalTable,
     ) -> Result<Arc<dyn TableProvider>> {
         Ok(Arc::new(TestTableProvider {
@@ -213,7 +213,7 @@ impl TableProvider for TestTableProvider {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         _projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         _limit: Option<usize>,
diff --git a/datafusion/core/tests/custom_sources_cases/mod.rs b/datafusion/core/tests/custom_sources_cases/mod.rs
index eebc946ccb68c..7c051ffaa7e10 100644
--- a/datafusion/core/tests/custom_sources_cases/mod.rs
+++ b/datafusion/core/tests/custom_sources_cases/mod.rs
@@ -26,7 +26,7 @@ use arrow::datatypes::{DataType, Field, Int32Type, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion::datasource::{TableProvider, TableType};
 use datafusion::error::Result;
-use datafusion::execution::context::{SessionContext, SessionState, TaskContext};
+use datafusion::execution::context::{SessionContext, TaskContext};
 use datafusion::logical_expr::{
     col, Expr, LogicalPlan, LogicalPlanBuilder, TableScan, UNNAMED_TABLE,
 };
@@ -43,6 +43,7 @@ use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_physical_plan::{ExecutionMode, PlanProperties};
 
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 use futures::stream::Stream;
 
 mod provider_filter_pushdown;
@@ -212,7 +213,7 @@ impl TableProvider for CustomTableProvider {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         _limit: Option<usize>,
diff --git a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
index b5506b7c12f61..e91bb023ef38f 100644
--- a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
+++ b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
@@ -21,9 +21,10 @@ use std::sync::Arc;
 use arrow::array::{Int32Builder, Int64Array};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
-use datafusion::datasource::provider::{TableProvider, TableType};
+use datafusion::catalog::TableProvider;
+use datafusion::datasource::provider::TableType;
 use datafusion::error::Result;
-use datafusion::execution::context::{SessionState, TaskContext};
+use datafusion::execution::context::TaskContext;
 use datafusion::logical_expr::TableProviderFilterPushDown;
 use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion::physical_plan::{
@@ -39,6 +40,7 @@ use datafusion_functions_aggregate::expr_fn::count;
 use datafusion_physical_expr::EquivalenceProperties;
 
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 
 fn create_batch(value: i32, num_rows: usize) -> Result<RecordBatch> {
     let mut builder = Int32Builder::with_capacity(num_rows);
@@ -162,7 +164,7 @@ impl TableProvider for CustomProvider {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         filters: &[Expr],
         _: Option<usize>,
diff --git a/datafusion/core/tests/custom_sources_cases/statistics.rs b/datafusion/core/tests/custom_sources_cases/statistics.rs
index 2d42b03bfed87..41d182a3767b3 100644
--- a/datafusion/core/tests/custom_sources_cases/statistics.rs
+++ b/datafusion/core/tests/custom_sources_cases/statistics.rs
@@ -20,7 +20,7 @@
 use std::{any::Any, sync::Arc};
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion::execution::context::{SessionState, TaskContext};
+use datafusion::execution::context::TaskContext;
 use datafusion::{
     datasource::{TableProvider, TableType},
     error::Result,
@@ -36,6 +36,7 @@ use datafusion_common::{project_schema, stats::Precision};
 use datafusion_physical_expr::EquivalenceProperties;
 
 use async_trait::async_trait;
+use datafusion_catalog::Session;
 
 /// This is a testing structure for statistics
 /// It will act both as a table provider and execution plan
@@ -89,7 +90,7 @@ impl TableProvider for StatisticsValidation {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         filters: &[Expr],
         // limit is ignored because it is not mandatory for a `TableProvider` to honor it
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index bc2c3315da592..f62a019eb9607 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -35,7 +35,6 @@ use tokio::fs::File;
 
 use datafusion::datasource::streaming::StreamingTable;
 use datafusion::datasource::{MemTable, TableProvider};
-use datafusion::execution::context::SessionState;
 use datafusion::execution::disk_manager::DiskManagerConfig;
 use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
 use datafusion::execution::session_state::SessionStateBuilder;
@@ -45,6 +44,7 @@ use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream};
 use datafusion_common::{assert_contains, Result};
 
 use datafusion::prelude::{SessionConfig, SessionContext};
+use datafusion_catalog::Session;
 use datafusion_execution::TaskContext;
 use test_utils::AccessLogGenerator;
 
@@ -792,7 +792,7 @@ impl TableProvider for SortedTableProvider {
 
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         _limit: Option<usize>,
diff --git a/datafusion/core/tests/user_defined/user_defined_table_functions.rs b/datafusion/core/tests/user_defined/user_defined_table_functions.rs
index 1e8d30cab6385..5fd3b7a033849 100644
--- a/datafusion/core/tests/user_defined/user_defined_table_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_table_functions.rs
@@ -24,11 +24,11 @@ use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::datasource::function::TableFunctionImpl;
 use datafusion::datasource::TableProvider;
 use datafusion::error::Result;
-use datafusion::execution::context::SessionState;
 use datafusion::execution::TaskContext;
 use datafusion::physical_plan::memory::MemoryExec;
 use datafusion::physical_plan::{collect, ExecutionPlan};
 use datafusion::prelude::SessionContext;
+use datafusion_catalog::Session;
 use datafusion_common::{assert_batches_eq, DFSchema, ScalarValue};
 use datafusion_expr::{EmptyRelation, Expr, LogicalPlan, Projection, TableType};
 use std::fs::File;
@@ -127,7 +127,7 @@ impl TableProvider for SimpleCsvTable {
 
     async fn scan(
         &self,
-        state: &SessionState,
+        state: &dyn Session,
         projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         _limit: Option<usize>,
@@ -161,7 +161,7 @@ impl TableProvider for SimpleCsvTable {
 }
 
 impl SimpleCsvTable {
-    async fn interpreter_expr(&self, state: &SessionState) -> Result<i64> {
+    async fn interpreter_expr(&self, state: &dyn Session) -> Result<i64> {
         use datafusion::logical_expr::expr_rewriter::normalize_col;
         use datafusion::logical_expr::utils::columnize_expr;
         let plan = LogicalPlan::EmptyRelation(EmptyRelation {
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 9c81c48527833..1bd6e9ad34b49 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -30,12 +30,11 @@ use std::fmt::{self, Debug, Formatter};
 use std::sync::Arc;
 use std::vec;
 
+use datafusion::catalog::{TableProvider, TableProviderFactory};
 use datafusion::datasource::file_format::arrow::ArrowFormatFactory;
 use datafusion::datasource::file_format::csv::CsvFormatFactory;
 use datafusion::datasource::file_format::parquet::ParquetFormatFactory;
 use datafusion::datasource::file_format::{format_as_file_type, DefaultFileType};
-use datafusion::datasource::provider::TableProviderFactory;
-use datafusion::datasource::TableProvider;
 use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::execution::FunctionRegistry;
 use datafusion::functions_aggregate::count::count_udaf;
diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs
index dd27727e3ad5d..224a0e18eac4d 100644
--- a/datafusion/sqllogictest/src/test_context.rs
+++ b/datafusion/sqllogictest/src/test_context.rs
@@ -27,12 +27,12 @@ use arrow::array::{
 };
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
 use arrow::record_batch::RecordBatch;
-use datafusion::execution::context::SessionState;
 use datafusion::logical_expr::{create_udf, ColumnarValue, Expr, ScalarUDF, Volatility};
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionConfig;
 use datafusion::{
-    catalog::{schema::MemorySchemaProvider, CatalogProvider, MemoryCatalogProvider},
+    catalog::CatalogProvider,
+    catalog_common::{memory::MemoryCatalogProvider, memory::MemorySchemaProvider},
     datasource::{MemTable, TableProvider, TableType},
     prelude::{CsvReadOptions, SessionContext},
 };
@@ -40,6 +40,7 @@ use datafusion_common::cast::as_float64_array;
 use datafusion_common::DataFusionError;
 
 use async_trait::async_trait;
+use datafusion::catalog::Session;
 use log::info;
 use tempfile::TempDir;
 
@@ -221,7 +222,7 @@ pub async fn register_temp_table(ctx: &SessionContext) {
 
         async fn scan(
             &self,
-            _state: &SessionState,
+            _state: &dyn Session,
             _: Option<&Vec<usize>>,
             _: &[Expr],
             _: Option<usize>,
diff --git a/docs/source/library-user-guide/custom-table-providers.md b/docs/source/library-user-guide/custom-table-providers.md
index f53ac6cfae976..a250e880913c5 100644
--- a/docs/source/library-user-guide/custom-table-providers.md
+++ b/docs/source/library-user-guide/custom-table-providers.md
@@ -112,7 +112,7 @@ impl CustomDataSource {
 impl TableProvider for CustomDataSource {
     async fn scan(
         &self,
-        _state: &SessionState,
+        _state: &dyn Session,
         projection: Option<&Vec<usize>>,
         // filters and limit can be used here to inject some push-down operations if needed
         _filters: &[Expr],

From d6e016e691d7ea540a71449b8e1fc5fe9d76fa0c Mon Sep 17 00:00:00 2001
From: jcsherin <jacob@protoship.io>
Date: Fri, 26 Jul 2024 20:28:59 +0530
Subject: [PATCH 154/357] doc: why nullable of list item is set to true
 (#11626)

* doc: why nullable of list item is set to true

* Adds an external doc to avoid repeating text

* rewrite

* redirects to external doc

* Adds ASF license

* Minor: formatting fixes

* Minor: copy edits

* Retrigger CI

* Fixes: name of aggregation in example

In `array_agg` the list is nullable, so changed the example to
`nth_value` where the list is not nullable to be correct.

* Disambiguates list item nullability in copy

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions-aggregate/COMMENTS.md    | 77 +++++++++++++++++++
 .../functions-aggregate/src/array_agg.rs      |  2 +
 .../functions-aggregate/src/bit_and_or_xor.rs |  1 +
 datafusion/functions-aggregate/src/count.rs   |  1 +
 .../functions-aggregate/src/nth_value.rs      |  5 +-
 datafusion/functions-aggregate/src/sum.rs     |  1 +
 6 files changed, 83 insertions(+), 4 deletions(-)
 create mode 100644 datafusion/functions-aggregate/COMMENTS.md

diff --git a/datafusion/functions-aggregate/COMMENTS.md b/datafusion/functions-aggregate/COMMENTS.md
new file mode 100644
index 0000000000000..23a996faf0075
--- /dev/null
+++ b/datafusion/functions-aggregate/COMMENTS.md
@@ -0,0 +1,77 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Why Is List Item Always Nullable?
+
+## Motivation
+
+There were independent proposals to make the `nullable` setting of list
+items in accumulator state be configurable. This meant adding additional
+fields which captured the `nullable` setting from schema in planning for
+the first argument to the aggregation function, and the returned value.
+
+These fields were to be added to `StateFieldArgs`. But then we found out
+that aggregate computation does not depend on it, and it can be avoided.
+
+This document exists to make that reasoning explicit.
+
+## Background
+
+The list data type is used in the accumulator state for a few aggregate
+functions like:
+
+- `sum`
+- `count`
+- `array_agg`
+- `bit_and`, `bit_or` and `bit_xor`
+- `nth_value`
+
+In all of the above cases the data type of the list item is equivalent
+to either the first argument of the aggregate function or the returned
+value.
+
+For example, in `array_agg` the data type of item is equivalent to the
+first argument and the definition looks like this:
+
+```rust
+// `args`       : `StateFieldArgs`
+// `input_type` :  data type of the first argument
+let mut fields = vec![Field::new_list(
+    format_state_name(self.name(), "nth_value"),
+    Field::new("item", args.input_type.clone(), true /* nullable of list item */ ),
+    false, // nullable of list itself
+)];
+```
+
+For all the aggregates listed above, the list item is always defined as
+nullable.
+
+## Computing Intermediate State
+
+By setting `nullable` (of list item) to be always `true` like this we
+ensure that the aggregate computation works even when nulls are
+present. The advantage of doing it this way is that it eliminates the
+need for additional code and special treatment of nulls in the
+accumulator state.
+
+## Nullable Of List Itself
+
+The `nullable` of list itself depends on the aggregate. In the case of
+`array_agg` the list is nullable(`true`), meanwhile for `sum` the list
+is not nullable(`false`).
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index 96b39ae4121eb..c25d592428bbd 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -89,6 +89,7 @@ impl AggregateUDFImpl for ArrayAgg {
         if args.is_distinct {
             return Ok(vec![Field::new_list(
                 format_state_name(args.name, "distinct_array_agg"),
+                // See COMMENTS.md to understand why nullable is set to true
                 Field::new("item", args.input_type.clone(), true),
                 true,
             )]);
@@ -96,6 +97,7 @@ impl AggregateUDFImpl for ArrayAgg {
 
         let mut fields = vec![Field::new_list(
             format_state_name(args.name, "array_agg"),
+            // See COMMENTS.md to understand why nullable is set to true
             Field::new("item", args.input_type.clone(), true),
             true,
         )];
diff --git a/datafusion/functions-aggregate/src/bit_and_or_xor.rs b/datafusion/functions-aggregate/src/bit_and_or_xor.rs
index 6c2d6cb5285c1..f6dd0bc20a831 100644
--- a/datafusion/functions-aggregate/src/bit_and_or_xor.rs
+++ b/datafusion/functions-aggregate/src/bit_and_or_xor.rs
@@ -203,6 +203,7 @@ impl AggregateUDFImpl for BitwiseOperation {
                     args.name,
                     format!("{} distinct", self.name()).as_str(),
                 ),
+                // See COMMENTS.md to understand why nullable is set to true
                 Field::new("item", args.return_type.clone(), true),
                 false,
             )])
diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index 0ead22e90a163..56850d0e02a16 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -125,6 +125,7 @@ impl AggregateUDFImpl for Count {
         if args.is_distinct {
             Ok(vec![Field::new_list(
                 format_state_name(args.name, "count distinct"),
+                // See COMMENTS.md to understand why nullable is set to true
                 Field::new("item", args.input_type.clone(), true),
                 false,
             )])
diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs
index 9bbd68c9bdf60..0c1f1f2928223 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -134,10 +134,7 @@ impl AggregateUDFImpl for NthValueAgg {
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         let mut fields = vec![Field::new_list(
             format_state_name(self.name(), "nth_value"),
-            // TODO: The nullability of the list element should be configurable.
-            // The hard-coded `true` should be changed once the field for
-            // nullability is added to `StateFieldArgs` struct.
-            // See: https://github.com/apache/datafusion/pull/11063
+            // See COMMENTS.md to understand why nullable is set to true
             Field::new("item", args.input_type.clone(), true),
             false,
         )];
diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs
index a9f31dc05be96..08e3908a58297 100644
--- a/datafusion/functions-aggregate/src/sum.rs
+++ b/datafusion/functions-aggregate/src/sum.rs
@@ -174,6 +174,7 @@ impl AggregateUDFImpl for Sum {
         if args.is_distinct {
             Ok(vec![Field::new_list(
                 format_state_name(args.name, "sum distinct"),
+                // See COMMENTS.md to understand why nullable is set to true
                 Field::new("item", args.return_type.clone(), true),
                 false,
             )])

From 64af410d0bcdd04210eb65df424cd1287f79b088 Mon Sep 17 00:00:00 2001
From: Wendell Smith <wackywendell@gmail.com>
Date: Fri, 26 Jul 2024 10:59:15 -0400
Subject: [PATCH 155/357] Add support for USING to SQL unparser (#11636)

* Add support for USING to SQL unparser

* cargo fmt

* Downgrade USING to ON when necessary when unparsing

When the conditions and filters in the LogicalPlan are not in a form compatible with USING, we can instead use ON - so we do.
---
 datafusion/sql/src/unparser/plan.rs       | 150 +++++++++++++++-------
 datafusion/sql/tests/cases/plan_to_sql.rs |   2 +
 2 files changed, 107 insertions(+), 45 deletions(-)

diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 9519b520ce5ee..b30e109881c2e 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -15,7 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion_common::{internal_err, not_impl_err, plan_err, DataFusionError, Result};
+use datafusion_common::{
+    internal_err, not_impl_err, plan_err, Column, DataFusionError, Result,
+};
 use datafusion_expr::{
     expr::Alias, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, Projection,
 };
@@ -368,37 +370,11 @@ impl Unparser<'_> {
                 self.select_to_sql_recursively(input, query, select, relation)
             }
             LogicalPlan::Join(join) => {
-                match join.join_constraint {
-                    JoinConstraint::On => {}
-                    JoinConstraint::Using => {
-                        return not_impl_err!(
-                            "Unsupported join constraint: {:?}",
-                            join.join_constraint
-                        )
-                    }
-                }
-
-                // parse filter if exists
-                let join_filter = match &join.filter {
-                    Some(filter) => Some(self.expr_to_sql(filter)?),
-                    None => None,
-                };
-
-                // map join.on to `l.a = r.a AND l.b = r.b AND ...`
-                let eq_op = ast::BinaryOperator::Eq;
-                let join_on = self.join_conditions_to_sql(&join.on, eq_op)?;
-
-                // Merge `join_on` and `join_filter`
-                let join_expr = match (join_filter, join_on) {
-                    (Some(filter), Some(on)) => Some(self.and_op_to_sql(filter, on)),
-                    (Some(filter), None) => Some(filter),
-                    (None, Some(on)) => Some(on),
-                    (None, None) => None,
-                };
-                let join_constraint = match join_expr {
-                    Some(expr) => ast::JoinConstraint::On(expr),
-                    None => ast::JoinConstraint::None,
-                };
+                let join_constraint = self.join_constraint_to_sql(
+                    join.join_constraint,
+                    &join.on,
+                    join.filter.as_ref(),
+                )?;
 
                 let mut right_relation = RelationBuilder::default();
 
@@ -583,24 +559,108 @@ impl Unparser<'_> {
         }
     }
 
-    fn join_conditions_to_sql(
+    /// Convert the components of a USING clause to the USING AST. Returns
+    /// 'None' if the conditions are not compatible with a USING expression,
+    /// e.g. non-column expressions or non-matching names.
+    fn join_using_to_sql(
         &self,
-        join_conditions: &Vec<(Expr, Expr)>,
-        eq_op: ast::BinaryOperator,
-    ) -> Result<Option<ast::Expr>> {
-        // Only support AND conjunction for each binary expression in join conditions
-        let mut exprs: Vec<ast::Expr> = vec![];
+        join_conditions: &[(Expr, Expr)],
+    ) -> Option<ast::JoinConstraint> {
+        let mut idents = Vec::with_capacity(join_conditions.len());
         for (left, right) in join_conditions {
-            // Parse left
+            match (left, right) {
+                (
+                    Expr::Column(Column {
+                        relation: _,
+                        name: left_name,
+                    }),
+                    Expr::Column(Column {
+                        relation: _,
+                        name: right_name,
+                    }),
+                ) if left_name == right_name => {
+                    idents.push(self.new_ident_quoted_if_needs(left_name.to_string()));
+                }
+                // USING is only valid with matching column names; arbitrary expressions
+                // are not allowed
+                _ => return None,
+            }
+        }
+        Some(ast::JoinConstraint::Using(idents))
+    }
+
+    /// Convert a join constraint and associated conditions and filter to a SQL AST node
+    fn join_constraint_to_sql(
+        &self,
+        constraint: JoinConstraint,
+        conditions: &[(Expr, Expr)],
+        filter: Option<&Expr>,
+    ) -> Result<ast::JoinConstraint> {
+        match (constraint, conditions, filter) {
+            // No constraints
+            (JoinConstraint::On | JoinConstraint::Using, [], None) => {
+                Ok(ast::JoinConstraint::None)
+            }
+
+            (JoinConstraint::Using, conditions, None) => {
+                match self.join_using_to_sql(conditions) {
+                    Some(using) => Ok(using),
+                    // As above, this should not be reachable from parsed SQL,
+                    // but a user could create this; we "downgrade" to ON.
+                    None => self.join_conditions_to_sql_on(conditions, None),
+                }
+            }
+
+            // Two cases here:
+            // 1. Straightforward ON case, with possible equi-join conditions
+            //    and additional filters
+            // 2. USING with additional filters; we "downgrade" to ON, because
+            //    you can't use USING with arbitrary filters. (This should not
+            //    be accessible from parsed SQL, but may have been a
+            //    custom-built JOIN by a user.)
+            (JoinConstraint::On | JoinConstraint::Using, conditions, filter) => {
+                self.join_conditions_to_sql_on(conditions, filter)
+            }
+        }
+    }
+
+    // Convert a list of equi0join conditions and an optional filter to a SQL ON
+    // AST node, with the equi-join conditions and the filter merged into a
+    // single conditional expression
+    fn join_conditions_to_sql_on(
+        &self,
+        join_conditions: &[(Expr, Expr)],
+        filter: Option<&Expr>,
+    ) -> Result<ast::JoinConstraint> {
+        let mut condition = None;
+        // AND the join conditions together to create the overall condition
+        for (left, right) in join_conditions {
+            // Parse left and right
             let l = self.expr_to_sql(left)?;
-            // Parse right
             let r = self.expr_to_sql(right)?;
-            // AND with existing expression
-            exprs.push(self.binary_op_to_sql(l, r, eq_op.clone()));
+            let e = self.binary_op_to_sql(l, r, ast::BinaryOperator::Eq);
+            condition = match condition {
+                Some(expr) => Some(self.and_op_to_sql(expr, e)),
+                None => Some(e),
+            };
         }
-        let join_expr: Option<ast::Expr> =
-            exprs.into_iter().reduce(|r, l| self.and_op_to_sql(r, l));
-        Ok(join_expr)
+
+        // Then AND the non-equijoin filter condition as well
+        condition = match (condition, filter) {
+            (Some(expr), Some(filter)) => {
+                Some(self.and_op_to_sql(expr, self.expr_to_sql(filter)?))
+            }
+            (Some(expr), None) => Some(expr),
+            (None, Some(filter)) => Some(self.expr_to_sql(filter)?),
+            (None, None) => None,
+        };
+
+        let constraint = match condition {
+            Some(filter) => ast::JoinConstraint::On(filter),
+            None => ast::JoinConstraint::None,
+        };
+
+        Ok(constraint)
     }
 
     fn and_op_to_sql(&self, lhs: ast::Expr, rhs: ast::Expr) -> ast::Expr {
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index aada560fd884a..a52333e54fac6 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -84,6 +84,7 @@ fn roundtrip_statement() -> Result<()> {
             "select 1;",
             "select 1 limit 0;",
             "select ta.j1_id from j1 ta join (select 1 as j1_id) tb on ta.j1_id = tb.j1_id;",
+            "select ta.j1_id from j1 ta join (select 1 as j1_id) tb using (j1_id);",
             "select ta.j1_id from j1 ta join (select 1 as j1_id) tb on ta.j1_id = tb.j1_id where ta.j1_id > 1;",
             "select ta.j1_id from (select 1 as j1_id) ta;",
             "select ta.j1_id from j1 ta;",
@@ -142,6 +143,7 @@ fn roundtrip_statement() -> Result<()> {
             r#"SELECT id, count(distinct id) over (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
             sum(id) OVER (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) from person"#,
             "SELECT id, sum(id) OVER (PARTITION BY first_name ROWS BETWEEN 5 PRECEDING AND 2 FOLLOWING) from person",
+            "WITH t1 AS (SELECT j1_id AS id, j1_string name FROM j1), t2 AS (SELECT j2_id AS id, j2_string name FROM j2) SELECT * FROM t1 JOIN t2 USING (id, name)",
         ];
 
     // For each test sql string, we transform as follows:

From cdf387e4de562d5c8bde7af30dcc153acfa0dfce Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Fri, 26 Jul 2024 07:59:34 -0700
Subject: [PATCH 156/357] fix: dont try to coerce list for regex match (#11646)

* fix: dont try to coerce list for regex

* feat: add test for regex planning error

* fix: add error to prove during planning
---
 datafusion/expr/src/type_coercion/binary.rs   | 1 -
 datafusion/functions/src/regex/regexpmatch.rs | 1 +
 datafusion/sqllogictest/test_files/regexp.slt | 5 +++++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index e1765b5c3e6ad..ae5bdc88b115f 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -1018,7 +1018,6 @@ pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
 /// This is a union of string coercion rules and dictionary coercion rules
 pub fn regex_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     string_coercion(lhs_type, rhs_type)
-        .or_else(|| list_coercion(lhs_type, rhs_type))
         .or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
 }
 
diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs
index 73228e6081434..f57d3c17bd72b 100644
--- a/datafusion/functions/src/regex/regexpmatch.rs
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -36,6 +36,7 @@ use std::sync::Arc;
 pub struct RegexpMatchFunc {
     signature: Signature,
 }
+
 impl Default for RegexpMatchFunc {
     fn default() -> Self {
         Self::new()
diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt
index f5349fc659f6a..aa99a54c26eea 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -325,6 +325,11 @@ SELECT 'foo\nbar\nbaz' ~ 'bar';
 ----
 true
 
+statement error
+Error during planning: Cannot infer common argument type for regex operation List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata
+: {} }) ~ List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+select [1,2] ~ [3];
+
 query B
 SELECT 'foo\nbar\nbaz' LIKE '%bar%';
 ----

From 01dc3f9766e1e36ef9864effdb1a7c89d138a982 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 26 Jul 2024 11:00:17 -0400
Subject: [PATCH 157/357] Run CI with latest (Rust 1.80), add ticket references
 to commented out tests (#11661)

* Revert "Temporarily pin toolchain version to avoid clippy (#11655)"

This reverts commit 5b7e0aae22c2d5a5be19e4b877e878199b9f6c43.

* Add ticket references in comments
---
 .../physical-plan/src/joins/hash_join.rs      |  3 +++
 .../sqllogictest/test_files/parquet.slt       |  1 +
 .../test_files/sort_merge_join.slt            |  1 +
 rust-toolchain.toml                           | 20 -------------------
 4 files changed, 5 insertions(+), 20 deletions(-)
 delete mode 100644 rust-toolchain.toml

diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 4ed4d9ba2534f..063f35059fb8f 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -1932,6 +1932,7 @@ mod tests {
     }
 
     // FIXME(#TODO) test fails with feature `force_hash_collisions`
+    // https://github.com/apache/datafusion/issues/11658
     #[cfg(not(feature = "force_hash_collisions"))]
     #[apply(batch_sizes)]
     #[tokio::test]
@@ -1989,6 +1990,7 @@ mod tests {
 
     /// Test where the left has 2 parts, the right with 1 part => 1 part
     // FIXME(#TODO) test fails with feature `force_hash_collisions`
+    // https://github.com/apache/datafusion/issues/11658
     #[cfg(not(feature = "force_hash_collisions"))]
     #[apply(batch_sizes)]
     #[tokio::test]
@@ -2103,6 +2105,7 @@ mod tests {
 
     /// Test where the left has 1 part, the right has 2 parts => 2 parts
     // FIXME(#TODO) test fails with feature `force_hash_collisions`
+    // https://github.com/apache/datafusion/issues/11658
     #[cfg(not(feature = "force_hash_collisions"))]
     #[apply(batch_sizes)]
     #[tokio::test]
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index 553cdeee908cc..3342f85c81417 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -252,6 +252,7 @@ SELECT COUNT(*) FROM timestamp_with_tz;
 131072
 
 # FIXME(#TODO) fails with feature `force_hash_collisions`
+# https://github.com/apache/datafusion/issues/11660
 # Perform the query:
 # query IPT
 # SELECT
diff --git a/datafusion/sqllogictest/test_files/sort_merge_join.slt b/datafusion/sqllogictest/test_files/sort_merge_join.slt
index bebec31b90c09..6e7b50973cde2 100644
--- a/datafusion/sqllogictest/test_files/sort_merge_join.slt
+++ b/datafusion/sqllogictest/test_files/sort_merge_join.slt
@@ -239,6 +239,7 @@ SELECT * FROM t1 FULL JOIN t2 ON t1_id = t2_id
 NULL NULL NULL 55 w 3
 
 # FIXME(#TODO) fails with feature `force_hash_collisions`
+# https://github.com/apache/datafusion/issues/11659
 # equijoin_full_and_condition_from_both
 # query ITIITI rowsort
 # SELECT * FROM t1 FULL JOIN t2 ON t1_id = t2_id AND t2_int <= t1_int
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
deleted file mode 100644
index 0ef859ae8db3b..0000000000000
--- a/rust-toolchain.toml
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[toolchain]
-# Temporarily pin toolchain version until we solve problems reported by newer clippy release.
-channel = "1.79.0"
\ No newline at end of file

From 204e1bcbaa5fcd3cf3cbe045f9832ee2f669e92f Mon Sep 17 00:00:00 2001
From: jcsherin <jacob@protoship.io>
Date: Sat, 27 Jul 2024 05:55:17 +0530
Subject: [PATCH 158/357] Use `AccumulatorArgs::is_reversed` in `NthValueAgg`
 (#11669)

* Refactor: use `AccumulatorArgs::is_reversed`

* Minor: fixes comment
---
 .../functions-aggregate/src/nth_value.rs       | 18 +++---------------
 .../physical-expr-common/src/aggregate/mod.rs  |  2 +-
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs
index 0c1f1f2928223..74f77f3f4b86a 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -30,8 +30,7 @@ use datafusion_common::{exec_err, internal_err, not_impl_err, Result, ScalarValu
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDF, AggregateUDFImpl, Expr, ReversedUDAF, Signature,
-    Volatility,
+    Accumulator, AggregateUDFImpl, Expr, ReversedUDAF, Signature, Volatility,
 };
 use datafusion_physical_expr_common::aggregate::merge_arrays::merge_ordered_arrays;
 use datafusion_physical_expr_common::aggregate::utils::ordering_fields;
@@ -53,9 +52,6 @@ make_udaf_expr_and_func!(
 #[derive(Debug)]
 pub struct NthValueAgg {
     signature: Signature,
-    /// Determines whether `N` is relative to the beginning or the end
-    /// of the aggregation. When set to `true`, then `N` is from the end.
-    reversed: bool,
 }
 
 impl NthValueAgg {
@@ -63,14 +59,8 @@ impl NthValueAgg {
     pub fn new() -> Self {
         Self {
             signature: Signature::any(2, Volatility::Immutable),
-            reversed: false,
         }
     }
-
-    pub fn with_reversed(mut self, reversed: bool) -> Self {
-        self.reversed = reversed;
-        self
-    }
 }
 
 impl Default for NthValueAgg {
@@ -99,7 +89,7 @@ impl AggregateUDFImpl for NthValueAgg {
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
         let n = match acc_args.input_exprs[1] {
             Expr::Literal(ScalarValue::Int64(Some(value))) => {
-                if self.reversed {
+                if acc_args.is_reversed {
                     Ok(-value)
                 } else {
                     Ok(value)
@@ -154,9 +144,7 @@ impl AggregateUDFImpl for NthValueAgg {
     }
 
     fn reverse_expr(&self) -> ReversedUDAF {
-        ReversedUDAF::Reversed(Arc::from(AggregateUDF::from(
-            Self::new().with_reversed(!self.reversed),
-        )))
+        ReversedUDAF::Reversed(nth_value_udaf())
     }
 }
 
diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
index b58a5a6faf242..4eede6567504a 100644
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -485,7 +485,7 @@ impl AggregateFunctionExpr {
         self.ignore_nulls
     }
 
-    /// Return if the aggregation is distinct
+    /// Return if the aggregation is reversed
     pub fn is_reversed(&self) -> bool {
         self.is_reversed
     }

From 42732b5f40fbeb22cea43f104c720f66c3eba2ad Mon Sep 17 00:00:00 2001
From: Edmondo Porcu <edmondo.porcu@gmail.com>
Date: Sat, 27 Jul 2024 07:18:00 -0400
Subject: [PATCH 159/357] Docs: adding explicit mention of test_utils to docs
 (#11670)

* Docs: adding explicit mention of test_utils to docs

* Improved wording
---
 docs/source/contributor-guide/testing.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/contributor-guide/testing.md b/docs/source/contributor-guide/testing.md
index 0f4461ab2c2c6..90e39c0057c12 100644
--- a/docs/source/contributor-guide/testing.md
+++ b/docs/source/contributor-guide/testing.md
@@ -30,6 +30,7 @@ and tries to follow the Rust standard [Testing Organization](https://doc.rust-la
 ## Unit tests
 
 Tests for code in an individual module are defined in the same source file with a `test` module, following Rust convention.
+The [test_util](https://github.com/apache/datafusion/tree/main/datafusion/common/src/test_util.rs) module provides useful macros to write unit tests effectively, such as `assert_batches_sorted_eq` and `assert_batches_eq` for RecordBatches and `assert_contains` / `assert_not_contains` which are used extensively in the codebase.
 
 ## sqllogictests Tests
 

From a598739ad7ab9d135272d9633c562589b4ef7083 Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Sat, 27 Jul 2024 04:18:58 -0700
Subject: [PATCH 160/357] Ensure statistic defaults in parquet writers are in
 sync (#11656)

* test(11367): update tests to indicate that the diffferent compression setting is expected

* chore(11367): update default settings for statistics_enabled

* test(11367): fix test cases, as we set the same variable for from_datafusion_defaults twice (in both the original DF options and in the builder too); only should set once

* test(11367): fix bug in the test case, should have set the arrow-rs to true (default is false)

* test(11367): fix test for fpp and ndv as defaults, when bloom filter turned on

* test(11367): update readme and sqllogictests for updated default config
---
 datafusion/common/src/config.rs               |   2 +-
 .../common/src/file_options/parquet_writer.rs | 118 +++++-------------
 .../test_files/information_schema.slt         |   4 +-
 docs/source/user-guide/configs.md             |   2 +-
 4 files changed, 35 insertions(+), 91 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index c3082546b4971..8af71d5abbb36 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -400,7 +400,7 @@ config_namespace! {
         /// Valid values are: "none", "chunk", and "page"
         /// These values are not case sensitive. If NULL, uses
         /// default parquet writer setting
-        pub statistics_enabled: Option<String>, default = None
+        pub statistics_enabled: Option<String>, default = Some("page".into())
 
         /// (writing) Sets max statistics size for any column. If NULL, uses
         /// default parquet writer setting
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index e63a7e5ef78d8..34b7379823f87 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -392,7 +392,7 @@ mod tests {
         ColumnOptions {
             compression: Some("zstd(22)".into()),
             dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v),
-            statistics_enabled: Some("page".into()),
+            statistics_enabled: Some("none".into()),
             max_statistics_size: Some(72),
             encoding: Some("RLE".into()),
             bloom_filter_enabled: Some(true),
@@ -614,23 +614,7 @@ mod tests {
             "should indicate that table_parquet_opts defaults came from datafusion",
         );
 
-        // Expected: the remaining should match
-        let same_created_by = default_table_writer_opts.global.created_by.clone();
-        let mut from_extern_parquet =
-            session_config_from_writer_props(&default_writer_props);
-        from_extern_parquet.global.created_by = same_created_by;
-        // TODO: the remaining defaults do not match!
-        // refer to https://github.com/apache/datafusion/issues/11367
-        assert_ne!(
-            default_table_writer_opts,
-            from_extern_parquet,
-            "the default writer_props should have the same configuration as the session's default TableParquetOptions",
-        );
-
-        // Below here itemizes how the defaults **should** match, but do not.
-
-        // TODO: compression defaults do not match
-        // refer to https://github.com/apache/datafusion/issues/11367
+        // Expected: the datafusion default compression is different from arrow-rs's parquet
         assert_eq!(
             default_writer_props.compression(&"default".into()),
             Compression::UNCOMPRESSED,
@@ -644,35 +628,13 @@ mod tests {
             "datafusion's default is zstd"
         );
 
-        // datafusion's `None` for Option<String> => becomes parquet's EnabledStatistics::Page
-        // TODO: should this be changed?
-        // refer to https://github.com/apache/datafusion/issues/11367
-        assert_eq!(
-            default_writer_props.statistics_enabled(&"default".into()),
-            EnabledStatistics::Page,
-            "extern parquet's default is page"
-        );
-        assert_eq!(
-            default_table_writer_opts.global.statistics_enabled, None,
-            "datafusion's has no default"
-        );
-        assert_eq!(
-            from_datafusion_defaults.statistics_enabled(&"default".into()),
-            EnabledStatistics::Page,
-            "should see the extern parquet's default over-riding datafusion's None",
-        );
-
-        // Confirm all other settings are equal.
-        // First resolve the known discrepancies, (set as the same).
-        // TODO: once we fix the above mis-matches, we should be able to remove this.
+        // Expected: the remaining should match
+        let same_created_by = default_table_writer_opts.global.created_by.clone();
         let mut from_extern_parquet =
             session_config_from_writer_props(&default_writer_props);
+        from_extern_parquet.global.created_by = same_created_by;
         from_extern_parquet.global.compression = Some("zstd(3)".into());
-        from_extern_parquet.global.statistics_enabled = None;
 
-        // Expected: the remaining should match
-        let same_created_by = default_table_writer_opts.global.created_by.clone(); // we expect these to be different
-        from_extern_parquet.global.created_by = same_created_by; // we expect these to be different
         assert_eq!(
             default_table_writer_opts,
             from_extern_parquet,
@@ -685,31 +647,25 @@ mod tests {
         // the TableParquetOptions::default, with only the bloom filter turned on
         let mut default_table_writer_opts = TableParquetOptions::default();
         default_table_writer_opts.global.bloom_filter_on_write = true;
-
-        // the WriterProperties::default, with only the bloom filter turned on
-        let default_writer_props = WriterProperties::new();
         let from_datafusion_defaults =
             WriterPropertiesBuilder::try_from(&default_table_writer_opts)
                 .unwrap()
-                .set_bloom_filter_enabled(true)
                 .build();
 
-        // TODO: should have same behavior in either.
-        // refer to https://github.com/apache/datafusion/issues/11367
-        assert_ne!(
+        // the WriterProperties::default, with only the bloom filter turned on
+        let default_writer_props = WriterProperties::builder()
+            .set_bloom_filter_enabled(true)
+            .build();
+
+        assert_eq!(
             default_writer_props.bloom_filter_properties(&"default".into()),
             from_datafusion_defaults.bloom_filter_properties(&"default".into()),
-            "parquet and datafusion props, will not have the same bloom filter props",
+            "parquet and datafusion props, should have the same bloom filter props",
         );
         assert_eq!(
             default_writer_props.bloom_filter_properties(&"default".into()),
-            None,
-            "extern parquet's default remains None"
-        );
-        assert_eq!(
-            from_datafusion_defaults.bloom_filter_properties(&"default".into()),
             Some(&BloomFilterProperties::default()),
-            "datafusion's has BloomFilterProperties::default",
+            "should use the default bloom filter props"
         );
     }
 
@@ -719,35 +675,29 @@ mod tests {
         let mut default_table_writer_opts = TableParquetOptions::default();
         default_table_writer_opts.global.bloom_filter_on_write = true;
         default_table_writer_opts.global.bloom_filter_fpp = Some(0.42);
-
-        // the WriterProperties::default, with only fpp set
-        let default_writer_props = WriterProperties::new();
         let from_datafusion_defaults =
             WriterPropertiesBuilder::try_from(&default_table_writer_opts)
                 .unwrap()
-                .set_bloom_filter_enabled(true)
-                .set_bloom_filter_fpp(0.42)
                 .build();
 
-        // TODO: should have same behavior in either.
-        // refer to https://github.com/apache/datafusion/issues/11367
-        assert_ne!(
+        // the WriterProperties::default, with only fpp set
+        let default_writer_props = WriterProperties::builder()
+            .set_bloom_filter_enabled(true)
+            .set_bloom_filter_fpp(0.42)
+            .build();
+
+        assert_eq!(
             default_writer_props.bloom_filter_properties(&"default".into()),
             from_datafusion_defaults.bloom_filter_properties(&"default".into()),
-            "parquet and datafusion props, will not have the same bloom filter props",
+            "parquet and datafusion props, should have the same bloom filter props",
         );
         assert_eq!(
             default_writer_props.bloom_filter_properties(&"default".into()),
-            None,
-            "extern parquet's default remains None"
-        );
-        assert_eq!(
-            from_datafusion_defaults.bloom_filter_properties(&"default".into()),
             Some(&BloomFilterProperties {
                 fpp: 0.42,
                 ndv: DEFAULT_BLOOM_FILTER_NDV
             }),
-            "datafusion's has BloomFilterProperties",
+            "should have only the fpp set, and the ndv at default",
         );
     }
 
@@ -757,35 +707,29 @@ mod tests {
         let mut default_table_writer_opts = TableParquetOptions::default();
         default_table_writer_opts.global.bloom_filter_on_write = true;
         default_table_writer_opts.global.bloom_filter_ndv = Some(42);
-
-        // the WriterProperties::default, with only ndv set
-        let default_writer_props = WriterProperties::new();
         let from_datafusion_defaults =
             WriterPropertiesBuilder::try_from(&default_table_writer_opts)
                 .unwrap()
-                .set_bloom_filter_enabled(true)
-                .set_bloom_filter_ndv(42)
                 .build();
 
-        // TODO: should have same behavior in either.
-        // refer to https://github.com/apache/datafusion/issues/11367
-        assert_ne!(
+        // the WriterProperties::default, with only ndv set
+        let default_writer_props = WriterProperties::builder()
+            .set_bloom_filter_enabled(true)
+            .set_bloom_filter_ndv(42)
+            .build();
+
+        assert_eq!(
             default_writer_props.bloom_filter_properties(&"default".into()),
             from_datafusion_defaults.bloom_filter_properties(&"default".into()),
-            "parquet and datafusion props, will not have the same bloom filter props",
+            "parquet and datafusion props, should have the same bloom filter props",
         );
         assert_eq!(
             default_writer_props.bloom_filter_properties(&"default".into()),
-            None,
-            "extern parquet's default remains None"
-        );
-        assert_eq!(
-            from_datafusion_defaults.bloom_filter_properties(&"default".into()),
             Some(&BloomFilterProperties {
                 fpp: DEFAULT_BLOOM_FILTER_FPP,
                 ndv: 42
             }),
-            "datafusion's has BloomFilterProperties",
+            "should have only the ndv set, and the fpp at default",
         );
     }
 }
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 431060a1f6f80..e85159fd137a7 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -202,7 +202,7 @@ datafusion.execution.parquet.pruning true
 datafusion.execution.parquet.pushdown_filters false
 datafusion.execution.parquet.reorder_filters false
 datafusion.execution.parquet.skip_metadata true
-datafusion.execution.parquet.statistics_enabled NULL
+datafusion.execution.parquet.statistics_enabled page
 datafusion.execution.parquet.write_batch_size 1024
 datafusion.execution.parquet.writer_version 1.0
 datafusion.execution.planning_concurrency 13
@@ -288,7 +288,7 @@ datafusion.execution.parquet.pruning true (reading) If true, the parquet reader
 datafusion.execution.parquet.pushdown_filters false (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".
 datafusion.execution.parquet.reorder_filters false (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query
 datafusion.execution.parquet.skip_metadata true (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata
-datafusion.execution.parquet.statistics_enabled NULL (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting
+datafusion.execution.parquet.statistics_enabled page (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting
 datafusion.execution.parquet.write_batch_size 1024 (writing) Sets write_batch_size in bytes
 datafusion.execution.parquet.writer_version 1.0 (writing) Sets parquet writer version valid values are "1.0" and "2.0"
 datafusion.execution.planning_concurrency 13 Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index e992361755d36..5814d88c7dd87 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -62,7 +62,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.compression                                | zstd(3)                   | (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting Note that this default setting is not the same as the default parquet writer setting.                                                                                                                                                                                                                                                                       |
 | datafusion.execution.parquet.dictionary_enabled                         | true                      | (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.execution.parquet.dictionary_page_size_limit                 | 1048576                   | (writing) Sets best effort maximum dictionary page size, in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| datafusion.execution.parquet.statistics_enabled                         | NULL                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.parquet.max_statistics_size                        | 4096                      | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                         |
 | datafusion.execution.parquet.created_by                                 | datafusion version 40.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |

From 3cfb99d9874736c1240e60b698857aa8d218c3e3 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Sat, 27 Jul 2024 04:24:02 -0700
Subject: [PATCH 161/357] Implement physical plan serialization for json Copy
 plans (#11645)

---
 .../core/src/datasource/file_format/json.rs   |  3 +-
 .../proto/datafusion_common.proto             |  5 +
 .../proto-common/src/generated/pbjson.rs      | 91 +++++++++++++++++++
 .../proto-common/src/generated/prost.rs       |  6 ++
 datafusion/proto/proto/datafusion.proto       |  1 +
 .../src/generated/datafusion_proto_common.rs  |  6 ++
 datafusion/proto/src/generated/pbjson.rs      | 13 +++
 datafusion/proto/src/generated/prost.rs       |  4 +-
 datafusion/proto/src/lib.rs                   |  2 +-
 .../proto/src/logical_plan/file_formats.rs    | 69 ++++++++++++--
 datafusion/proto/src/logical_plan/mod.rs      | 25 ++++-
 .../tests/cases/roundtrip_logical_plan.rs     | 84 ++++++++++++++++-
 12 files changed, 296 insertions(+), 13 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs
index 9de9c3d7d8712..7c579e890c8c6 100644
--- a/datafusion/core/src/datasource/file_format/json.rs
+++ b/datafusion/core/src/datasource/file_format/json.rs
@@ -57,7 +57,8 @@ use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
 #[derive(Default)]
 /// Factory struct used to create [JsonFormat]
 pub struct JsonFormatFactory {
-    options: Option<JsonOptions>,
+    /// the options carried by format factory
+    pub options: Option<JsonOptions>,
 }
 
 impl JsonFormatFactory {
diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto
index 8e8fd2352c6ca..85983dddf6ae5 100644
--- a/datafusion/proto-common/proto/datafusion_common.proto
+++ b/datafusion/proto-common/proto/datafusion_common.proto
@@ -51,6 +51,11 @@ message ParquetFormat {
 
 message AvroFormat {}
 
+message NdJsonFormat {
+  JsonOptions options = 1;
+}
+
+
 message PrimaryKeyConstraint{
   repeated uint64 indices = 1;
 }
diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs
index 511072f3cb558..4ac6517ed739a 100644
--- a/datafusion/proto-common/src/generated/pbjson.rs
+++ b/datafusion/proto-common/src/generated/pbjson.rs
@@ -4642,6 +4642,97 @@ impl<'de> serde::Deserialize<'de> for Map {
         deserializer.deserialize_struct("datafusion_common.Map", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for NdJsonFormat {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.options.is_some() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion_common.NdJsonFormat", len)?;
+        if let Some(v) = self.options.as_ref() {
+            struct_ser.serialize_field("options", v)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for NdJsonFormat {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "options",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Options,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "options" => Ok(GeneratedField::Options),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = NdJsonFormat;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion_common.NdJsonFormat")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<NdJsonFormat, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut options__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Options => {
+                            if options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("options"));
+                            }
+                            options__ = map_.next_value()?;
+                        }
+                    }
+                }
+                Ok(NdJsonFormat {
+                    options: options__,
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion_common.NdJsonFormat", FIELDS, GeneratedVisitor)
+    }
+}
 impl serde::Serialize for ParquetFormat {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs
index 62919e218b133..bf198a24c811b 100644
--- a/datafusion/proto-common/src/generated/prost.rs
+++ b/datafusion/proto-common/src/generated/prost.rs
@@ -49,6 +49,12 @@ pub struct ParquetFormat {
 pub struct AvroFormat {}
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct NdJsonFormat {
+    #[prost(message, optional, tag = "1")]
+    pub options: ::core::option::Option<JsonOptions>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct PrimaryKeyConstraint {
     #[prost(uint64, repeated, tag = "1")]
     pub indices: ::prost::alloc::vec::Vec<u64>,
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index e133abd46f43d..4c90297263c45 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -90,6 +90,7 @@ message ListingTableScanNode {
     datafusion_common.CsvFormat csv = 10;
     datafusion_common.ParquetFormat parquet = 11;
     datafusion_common.AvroFormat avro = 12;
+    datafusion_common.NdJsonFormat json = 15;
   }
   repeated LogicalExprNodeCollection file_sort_order = 13;
 }
diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs
index 62919e218b133..bf198a24c811b 100644
--- a/datafusion/proto/src/generated/datafusion_proto_common.rs
+++ b/datafusion/proto/src/generated/datafusion_proto_common.rs
@@ -49,6 +49,12 @@ pub struct ParquetFormat {
 pub struct AvroFormat {}
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct NdJsonFormat {
+    #[prost(message, optional, tag = "1")]
+    pub options: ::core::option::Option<JsonOptions>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct PrimaryKeyConstraint {
     #[prost(uint64, repeated, tag = "1")]
     pub indices: ::prost::alloc::vec::Vec<u64>,
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index c5ec67d72875f..163a4c044aeb5 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -9031,6 +9031,9 @@ impl serde::Serialize for ListingTableScanNode {
                 listing_table_scan_node::FileFormatType::Avro(v) => {
                     struct_ser.serialize_field("avro", v)?;
                 }
+                listing_table_scan_node::FileFormatType::Json(v) => {
+                    struct_ser.serialize_field("json", v)?;
+                }
             }
         }
         struct_ser.end()
@@ -9062,6 +9065,7 @@ impl<'de> serde::Deserialize<'de> for ListingTableScanNode {
             "csv",
             "parquet",
             "avro",
+            "json",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -9079,6 +9083,7 @@ impl<'de> serde::Deserialize<'de> for ListingTableScanNode {
             Csv,
             Parquet,
             Avro,
+            Json,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -9113,6 +9118,7 @@ impl<'de> serde::Deserialize<'de> for ListingTableScanNode {
                             "csv" => Ok(GeneratedField::Csv),
                             "parquet" => Ok(GeneratedField::Parquet),
                             "avro" => Ok(GeneratedField::Avro),
+                            "json" => Ok(GeneratedField::Json),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -9226,6 +9232,13 @@ impl<'de> serde::Deserialize<'de> for ListingTableScanNode {
                                 return Err(serde::de::Error::duplicate_field("avro"));
                             }
                             file_format_type__ = map_.next_value::<::std::option::Option<_>>()?.map(listing_table_scan_node::FileFormatType::Avro)
+;
+                        }
+                        GeneratedField::Json => {
+                            if file_format_type__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("json"));
+                            }
+                            file_format_type__ = map_.next_value::<::std::option::Option<_>>()?.map(listing_table_scan_node::FileFormatType::Json)
 ;
                         }
                     }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 98b70dc253511..606fe3c1699fc 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -118,7 +118,7 @@ pub struct ListingTableScanNode {
     pub target_partitions: u32,
     #[prost(message, repeated, tag = "13")]
     pub file_sort_order: ::prost::alloc::vec::Vec<LogicalExprNodeCollection>,
-    #[prost(oneof = "listing_table_scan_node::FileFormatType", tags = "10, 11, 12")]
+    #[prost(oneof = "listing_table_scan_node::FileFormatType", tags = "10, 11, 12, 15")]
     pub file_format_type: ::core::option::Option<
         listing_table_scan_node::FileFormatType,
     >,
@@ -134,6 +134,8 @@ pub mod listing_table_scan_node {
         Parquet(super::super::datafusion_common::ParquetFormat),
         #[prost(message, tag = "12")]
         Avro(super::super::datafusion_common::AvroFormat),
+        #[prost(message, tag = "15")]
+        Json(super::super::datafusion_common::NdJsonFormat),
     }
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
diff --git a/datafusion/proto/src/lib.rs b/datafusion/proto/src/lib.rs
index bac31850c875b..e7019553f53d1 100644
--- a/datafusion/proto/src/lib.rs
+++ b/datafusion/proto/src/lib.rs
@@ -124,7 +124,7 @@ pub mod protobuf {
     pub use datafusion_proto_common::common::proto_error;
     pub use datafusion_proto_common::protobuf_common::{
         ArrowOptions, ArrowType, AvroFormat, AvroOptions, CsvFormat, DfSchema,
-        EmptyMessage, Field, JoinSide, ParquetFormat, ScalarValue, Schema,
+        EmptyMessage, Field, JoinSide, NdJsonFormat, ParquetFormat, ScalarValue, Schema,
     };
     pub use datafusion_proto_common::{FromProtoError, ToProtoError};
 }
diff --git a/datafusion/proto/src/logical_plan/file_formats.rs b/datafusion/proto/src/logical_plan/file_formats.rs
index 2c4085b888692..ce9d24d94d990 100644
--- a/datafusion/proto/src/logical_plan/file_formats.rs
+++ b/datafusion/proto/src/logical_plan/file_formats.rs
@@ -18,7 +18,7 @@
 use std::sync::Arc;
 
 use datafusion::{
-    config::CsvOptions,
+    config::{CsvOptions, JsonOptions},
     datasource::file_format::{
         arrow::ArrowFormatFactory, csv::CsvFormatFactory, json::JsonFormatFactory,
         parquet::ParquetFormatFactory, FileFormatFactory,
@@ -31,7 +31,7 @@ use datafusion_common::{
 };
 use prost::Message;
 
-use crate::protobuf::CsvOptions as CsvOptionsProto;
+use crate::protobuf::{CsvOptions as CsvOptionsProto, JsonOptions as JsonOptionsProto};
 
 use super::LogicalExtensionCodec;
 
@@ -222,6 +222,34 @@ impl LogicalExtensionCodec for CsvLogicalExtensionCodec {
     }
 }
 
+impl JsonOptionsProto {
+    fn from_factory(factory: &JsonFormatFactory) -> Self {
+        if let Some(options) = &factory.options {
+            JsonOptionsProto {
+                compression: options.compression as i32,
+                schema_infer_max_rec: options.schema_infer_max_rec as u64,
+            }
+        } else {
+            JsonOptionsProto::default()
+        }
+    }
+}
+
+impl From<&JsonOptionsProto> for JsonOptions {
+    fn from(proto: &JsonOptionsProto) -> Self {
+        JsonOptions {
+            compression: match proto.compression {
+                0 => CompressionTypeVariant::GZIP,
+                1 => CompressionTypeVariant::BZIP2,
+                2 => CompressionTypeVariant::XZ,
+                3 => CompressionTypeVariant::ZSTD,
+                _ => CompressionTypeVariant::UNCOMPRESSED,
+            },
+            schema_infer_max_rec: proto.schema_infer_max_rec as usize,
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct JsonLogicalExtensionCodec;
 
@@ -267,17 +295,44 @@ impl LogicalExtensionCodec for JsonLogicalExtensionCodec {
 
     fn try_decode_file_format(
         &self,
-        __buf: &[u8],
-        __ctx: &SessionContext,
+        buf: &[u8],
+        _ctx: &SessionContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
-        Ok(Arc::new(JsonFormatFactory::new()))
+        let proto = JsonOptionsProto::decode(buf).map_err(|e| {
+            DataFusionError::Execution(format!(
+                "Failed to decode JsonOptionsProto: {:?}",
+                e
+            ))
+        })?;
+        let options: JsonOptions = (&proto).into();
+        Ok(Arc::new(JsonFormatFactory {
+            options: Some(options),
+        }))
     }
 
     fn try_encode_file_format(
         &self,
-        __buf: &mut Vec<u8>,
-        __node: Arc<dyn FileFormatFactory>,
+        buf: &mut Vec<u8>,
+        node: Arc<dyn FileFormatFactory>,
     ) -> datafusion_common::Result<()> {
+        let options = if let Some(json_factory) =
+            node.as_any().downcast_ref::<JsonFormatFactory>()
+        {
+            json_factory.options.clone().unwrap_or_default()
+        } else {
+            return Err(DataFusionError::Execution(
+                "Unsupported FileFormatFactory type".to_string(),
+            ));
+        };
+
+        let proto = JsonOptionsProto::from_factory(&JsonFormatFactory {
+            options: Some(options),
+        });
+
+        proto.encode(buf).map_err(|e| {
+            DataFusionError::Execution(format!("Failed to encode JsonOptions: {:?}", e))
+        })?;
+
         Ok(())
     }
 }
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index 5427f34e8e071..0a91babdfb609 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -38,7 +38,10 @@ use datafusion::datasource::file_format::{
 };
 use datafusion::{
     datasource::{
-        file_format::{avro::AvroFormat, csv::CsvFormat, FileFormat},
+        file_format::{
+            avro::AvroFormat, csv::CsvFormat, json::JsonFormat as OtherNdJsonFormat,
+            FileFormat,
+        },
         listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl},
         view::ViewTable,
         TableProvider,
@@ -395,7 +398,17 @@ impl AsLogicalPlan for LogicalPlanNode {
                             if let Some(options) = options {
                                 csv = csv.with_options(options.try_into()?)
                             }
-                            Arc::new(csv)},
+                            Arc::new(csv)
+                        },
+                        FileFormatType::Json(protobuf::NdJsonFormat {
+                            options
+                        }) => {
+                            let mut json = OtherNdJsonFormat::default();
+                            if let Some(options) = options {
+                                json = json.with_options(options.try_into()?)
+                            }
+                            Arc::new(json)
+                        }
                         FileFormatType::Avro(..) => Arc::new(AvroFormat),
                     };
 
@@ -996,6 +1009,14 @@ impl AsLogicalPlan for LogicalPlanNode {
                                 }));
                         }
 
+                        if let Some(json) = any.downcast_ref::<OtherNdJsonFormat>() {
+                            let options = json.options();
+                            maybe_some_type =
+                                Some(FileFormatType::Json(protobuf::NdJsonFormat {
+                                    options: Some(options.try_into()?),
+                                }))
+                        }
+
                         if any.is::<AvroFormat>() {
                             maybe_some_type =
                                 Some(FileFormatType::Avro(protobuf::AvroFormat {}))
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 1bd6e9ad34b49..daa92475068f4 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -23,6 +23,8 @@ use arrow::datatypes::{
     IntervalUnit, Schema, SchemaRef, TimeUnit, UnionFields, UnionMode,
     DECIMAL256_MAX_PRECISION,
 };
+use datafusion::datasource::file_format::json::JsonFormatFactory;
+use datafusion_common::parsers::CompressionTypeVariant;
 use prost::Message;
 use std::any::Any;
 use std::collections::HashMap;
@@ -74,7 +76,8 @@ use datafusion_proto::bytes::{
     logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec,
 };
 use datafusion_proto::logical_plan::file_formats::{
-    ArrowLogicalExtensionCodec, CsvLogicalExtensionCodec, ParquetLogicalExtensionCodec,
+    ArrowLogicalExtensionCodec, CsvLogicalExtensionCodec, JsonLogicalExtensionCodec,
+    ParquetLogicalExtensionCodec,
 };
 use datafusion_proto::logical_plan::to_proto::serialize_expr;
 use datafusion_proto::logical_plan::{
@@ -507,6 +510,73 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test]
+async fn roundtrip_logical_plan_copy_to_json() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    // Assume create_json_scan creates a logical plan for scanning a JSON file
+    let input = create_json_scan(&ctx).await?;
+
+    let table_options =
+        TableOptions::default_from_session_config(ctx.state().config_options());
+    let mut json_format = table_options.json;
+
+    // Set specific JSON format options
+    json_format.compression = CompressionTypeVariant::GZIP;
+    json_format.schema_infer_max_rec = 1000;
+
+    let file_type = format_as_file_type(Arc::new(JsonFormatFactory::new_with_options(
+        json_format.clone(),
+    )));
+
+    let plan = LogicalPlan::Copy(CopyTo {
+        input: Arc::new(input),
+        output_url: "test.json".to_string(),
+        partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()],
+        file_type,
+        options: Default::default(),
+    });
+
+    // Assume JsonLogicalExtensionCodec is implemented similarly to CsvLogicalExtensionCodec
+    let codec = JsonLogicalExtensionCodec {};
+    let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
+    let logical_round_trip =
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+
+    match logical_round_trip {
+        LogicalPlan::Copy(copy_to) => {
+            assert_eq!("test.json", copy_to.output_url);
+            assert_eq!("json".to_string(), copy_to.file_type.get_ext());
+            assert_eq!(vec!["a", "b", "c"], copy_to.partition_by);
+
+            let file_type = copy_to
+                .file_type
+                .as_ref()
+                .as_any()
+                .downcast_ref::<DefaultFileType>()
+                .unwrap();
+
+            let format_factory = file_type.as_format_factory();
+            let json_factory = format_factory
+                .as_ref()
+                .as_any()
+                .downcast_ref::<JsonFormatFactory>()
+                .unwrap();
+            let json_config = json_factory.options.as_ref().unwrap();
+            assert_eq!(json_format.compression, json_config.compression);
+            assert_eq!(
+                json_format.schema_infer_max_rec,
+                json_config.schema_infer_max_rec
+            );
+        }
+        _ => panic!(),
+    }
+
+    Ok(())
+}
+
 async fn create_csv_scan(ctx: &SessionContext) -> Result<LogicalPlan, DataFusionError> {
     ctx.register_csv("t1", "tests/testdata/test.csv", CsvReadOptions::default())
         .await?;
@@ -515,6 +585,18 @@ async fn create_csv_scan(ctx: &SessionContext) -> Result<LogicalPlan, DataFusion
     Ok(input)
 }
 
+async fn create_json_scan(ctx: &SessionContext) -> Result<LogicalPlan, DataFusionError> {
+    ctx.register_json(
+        "t1",
+        "../core/tests/data/1.json",
+        NdJsonReadOptions::default(),
+    )
+    .await?;
+
+    let input = ctx.table("t1").await?.into_optimized_plan()?;
+    Ok(input)
+}
+
 #[tokio::test]
 async fn roundtrip_logical_plan_distinct_on() -> Result<()> {
     let ctx = SessionContext::new();

From a721be1b1d863b5b15a7a945c37ec051c449c46f Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 27 Jul 2024 12:35:09 -0400
Subject: [PATCH 162/357] Minor: improve documentation on SessionState (#11642)

---
 .../core/src/execution/session_state.rs       | 19 ++++++++++++++++---
 datafusion/execution/src/task.rs              |  6 +++---
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 226e8085341e5..ea7a65cc308fb 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -77,10 +77,23 @@ use std::fmt::Debug;
 use std::sync::Arc;
 use uuid::Uuid;
 
-/// Execution context for registering data sources and executing queries.
-/// See [`SessionContext`] for a higher level API.
+/// `SessionState` contains all the necessary state to plan and execute queries,
+/// such as configuration, functions, and runtime environment. Please see the
+/// documentation on [`SessionContext`] for more information.
 ///
-/// Use the [`SessionStateBuilder`] to build a SessionState object.
+///
+/// # Example: `SessionState` from a [`SessionContext`]
+///
+/// ```
+/// use datafusion::prelude::*;
+/// let ctx = SessionContext::new();
+/// let state = ctx.state();
+/// ```
+///
+/// # Example: `SessionState` via [`SessionStateBuilder`]
+///
+/// You can also use [`SessionStateBuilder`] to build a `SessionState` object
+/// directly:
 ///
 /// ```
 /// use datafusion::prelude::*;
diff --git a/datafusion/execution/src/task.rs b/datafusion/execution/src/task.rs
index df7fd0dbd92c0..21a644284c42e 100644
--- a/datafusion/execution/src/task.rs
+++ b/datafusion/execution/src/task.rs
@@ -32,9 +32,9 @@ use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
 
 /// Task Execution Context
 ///
-/// A [`TaskContext`] contains the state required during a single
-/// query's execution. Please see [`SessionContext`] for a user level
-/// multi-query API.
+/// A [`TaskContext`] contains the state required during a single query's
+/// execution. Please see the documentation on [`SessionContext`] for more
+/// information.
 ///
 /// [`SessionContext`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html
 #[derive(Debug)]

From 5ad60678a21dcd6203b2faaf7e30a0f3ea8f28e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alihan=20=C3=87elikcan?= <alihancelikcan@gmail.com>
Date: Sun, 28 Jul 2024 22:31:56 +0300
Subject: [PATCH 163/357] Add LimitPushdown optimization rule and
 CoalesceBatchesExec fetch (#11652)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add LimitPushdown skeleton

* Transform StreamTableExec into fetching version when skip is 0

* Transform StreamTableExec into fetching version when skip is non-zero

* Fix non-zero skip test

* Add fetch field to CoalesceBatchesExec

* Tag ProjectionExec, CoalescePartitionsExec and SortPreservingMergeExec as supporting limit pushdown

* Add `with_fetch` to SortExec

* Push limit down through supporting ExecutionPlans

* Reorder LimitPushdown optimization to before SanityCheckPlan

* Refactor LimitPushdown tests

* Refactor LimitPushdown tests

* Add more LimitPushdown tests

* Add fetch support to CoalesceBatchesExec

* Fix tests that were affected

* Refactor LimitPushdown push_down_limits

* Remove unnecessary parameter from coalesce_batches_exec

* Format files

* Apply clippy fixes

* Make CoalesceBatchesExec display consistent

* Fix slt tests according to LimitPushdown rules

* Resolve linter errors

* Minor changes

* Minor changes

* Fix GlobalLimitExec sometimes replacing LocalLimitExec

* Fix unnecessary LocalLimitExec for ProjectionExec

* Rename GlobalOrLocal into LimitExec

* Clarify pushdown recursion

* Minor changes

* Minor

* Do not display when fetch is None

* .rs removal

* Clean-up tpch plans

* Clean-up comments

* Update datafusion/core/src/physical_optimizer/optimizer.rs

* Update datafusion/physical-plan/src/coalesce_batches.rs

* Update datafusion/physical-plan/src/coalesce_batches.rs

* Update datafusion/physical-plan/src/coalesce_batches.rs

* Update datafusion/core/src/physical_optimizer/limit_pushdown.rs

* Update datafusion/core/src/physical_optimizer/limit_pushdown.rs

* Update datafusion/physical-plan/src/lib.rs

* Implement with_fetch() for other source execs

* Minor

* Merge all Global/Local-LimitExec combinations in LimitPushdown

* Fix compile errors after merge

* Update datafusion/core/src/physical_optimizer/limit_pushdown.rs

Remove redundant lınes ın docstrıng

* Avoid code duplication

* Incorporate review feedback

---------

Co-authored-by: Mustafa Akur <mustafa.akur@synnada.ai>
Co-authored-by: berkaysynnada <berkay.sahin@synnada.ai>
Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
---
 .../datasource/physical_plan/arrow_file.rs    |  13 +
 .../core/src/datasource/physical_plan/avro.rs |  13 +
 .../core/src/datasource/physical_plan/csv.rs  |  18 +
 .../core/src/datasource/physical_plan/json.rs |  12 +
 .../datasource/physical_plan/parquet/mod.rs   |  18 +
 .../src/physical_optimizer/limit_pushdown.rs  | 662 ++++++++++++++++++
 datafusion/core/src/physical_optimizer/mod.rs |   1 +
 .../core/src/physical_optimizer/optimizer.rs  |   5 +
 datafusion/core/tests/memory_limit/mod.rs     |  23 +-
 datafusion/optimizer/src/push_down_limit.rs   |  37 +-
 .../physical-plan/src/coalesce_batches.rs     | 172 ++++-
 .../physical-plan/src/coalesce_partitions.rs  |   4 +
 datafusion/physical-plan/src/lib.rs           |  16 +
 datafusion/physical-plan/src/projection.rs    |   4 +
 datafusion/physical-plan/src/sorts/sort.rs    |  11 +
 .../src/sorts/sort_preserving_merge.rs        |   4 +
 datafusion/physical-plan/src/streaming.rs     |  13 +
 .../sqllogictest/test_files/explain.slt       |  36 +-
 .../sqllogictest/test_files/group_by.slt      |  18 +-
 .../join_disable_repartition_joins.slt        |   4 +-
 datafusion/sqllogictest/test_files/limit.slt  |   6 +-
 datafusion/sqllogictest/test_files/order.slt  |  10 +-
 .../sqllogictest/test_files/repartition.slt   |   2 +-
 datafusion/sqllogictest/test_files/select.slt |   4 +-
 datafusion/sqllogictest/test_files/topk.slt   |   5 +-
 .../sqllogictest/test_files/tpch/q11.slt.part | 105 ++-
 datafusion/sqllogictest/test_files/union.slt  |  19 +-
 datafusion/sqllogictest/test_files/window.slt | 238 +++----
 28 files changed, 1186 insertions(+), 287 deletions(-)
 create mode 100644 datafusion/core/src/physical_optimizer/limit_pushdown.rs

diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index e536ae8232320..e720b4efff6f3 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -195,6 +195,19 @@ impl ExecutionPlan for ArrowExec {
     fn statistics(&self) -> Result<Statistics> {
         Ok(self.projected_statistics.clone())
     }
+
+    fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        let new_config = self.base_config.clone().with_limit(limit);
+
+        Some(Arc::new(Self {
+            base_config: new_config,
+            projected_statistics: self.projected_statistics.clone(),
+            projected_schema: self.projected_schema.clone(),
+            projected_output_ordering: self.projected_output_ordering.clone(),
+            metrics: self.metrics.clone(),
+            cache: self.cache.clone(),
+        }))
+    }
 }
 
 pub struct ArrowOpener {
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 15ee0a1f7c22d..298d117252a1a 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -164,6 +164,19 @@ impl ExecutionPlan for AvroExec {
     fn metrics(&self) -> Option<MetricsSet> {
         Some(self.metrics.clone_inner())
     }
+
+    fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        let new_config = self.base_config.clone().with_limit(limit);
+
+        Some(Arc::new(Self {
+            base_config: new_config,
+            projected_statistics: self.projected_statistics.clone(),
+            projected_schema: self.projected_schema.clone(),
+            projected_output_ordering: self.projected_output_ordering.clone(),
+            metrics: self.metrics.clone(),
+            cache: self.cache.clone(),
+        }))
+    }
 }
 
 #[cfg(feature = "avro")]
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index be437cfb94448..e9f7e5797cb0b 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -426,6 +426,24 @@ impl ExecutionPlan for CsvExec {
     fn metrics(&self) -> Option<MetricsSet> {
         Some(self.metrics.clone_inner())
     }
+
+    fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        let new_config = self.base_config.clone().with_limit(limit);
+
+        Some(Arc::new(Self {
+            base_config: new_config,
+            projected_statistics: self.projected_statistics.clone(),
+            has_header: self.has_header,
+            delimiter: self.delimiter,
+            quote: self.quote,
+            escape: self.escape,
+            comment: self.comment,
+            newlines_in_values: self.newlines_in_values,
+            metrics: self.metrics.clone(),
+            file_compression_type: self.file_compression_type,
+            cache: self.cache.clone(),
+        }))
+    }
 }
 
 /// A Config for [`CsvOpener`]
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index c051b5d9b57d9..b3f4c995ac81a 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -206,6 +206,18 @@ impl ExecutionPlan for NdJsonExec {
     fn metrics(&self) -> Option<MetricsSet> {
         Some(self.metrics.clone_inner())
     }
+
+    fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        let new_config = self.base_config.clone().with_limit(limit);
+
+        Some(Arc::new(Self {
+            base_config: new_config,
+            projected_statistics: self.projected_statistics.clone(),
+            metrics: self.metrics.clone(),
+            file_compression_type: self.file_compression_type,
+            cache: self.cache.clone(),
+        }))
+    }
 }
 
 /// A [`FileOpener`] that opens a JSON file and yields a [`FileOpenFuture`]
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 7f764059218cc..a5a7b50a008a6 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -726,6 +726,24 @@ impl ExecutionPlan for ParquetExec {
     fn statistics(&self) -> Result<Statistics> {
         Ok(self.projected_statistics.clone())
     }
+
+    fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        let new_config = self.base_config.clone().with_limit(limit);
+
+        Some(Arc::new(Self {
+            base_config: new_config,
+            projected_statistics: self.projected_statistics.clone(),
+            metrics: self.metrics.clone(),
+            predicate: self.predicate.clone(),
+            pruning_predicate: self.pruning_predicate.clone(),
+            page_pruning_predicate: self.page_pruning_predicate.clone(),
+            metadata_size_hint: self.metadata_size_hint,
+            parquet_file_reader_factory: self.parquet_file_reader_factory.clone(),
+            cache: self.cache.clone(),
+            table_parquet_options: self.table_parquet_options.clone(),
+            schema_adapter_factory: self.schema_adapter_factory.clone(),
+        }))
+    }
 }
 
 fn should_enable_page_index(
diff --git a/datafusion/core/src/physical_optimizer/limit_pushdown.rs b/datafusion/core/src/physical_optimizer/limit_pushdown.rs
new file mode 100644
index 0000000000000..4379a34a94267
--- /dev/null
+++ b/datafusion/core/src/physical_optimizer/limit_pushdown.rs
@@ -0,0 +1,662 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`LimitPushdown`] pushes `LIMIT` down through `ExecutionPlan`s to reduce
+//! data transfer as much as possible.
+
+use std::fmt::Debug;
+use std::sync::Arc;
+
+use crate::error::Result;
+use crate::physical_optimizer::PhysicalOptimizerRule;
+use crate::physical_plan::ExecutionPlan;
+
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::plan_datafusion_err;
+use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+use datafusion_optimizer::push_down_limit::combine_limit;
+use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+
+/// This rule inspects [`ExecutionPlan`]'s and pushes down the fetch limit from
+/// the parent to the child if applicable.
+#[derive(Default)]
+pub struct LimitPushdown {}
+
+impl LimitPushdown {
+    #[allow(missing_docs)]
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl PhysicalOptimizerRule for LimitPushdown {
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        _config: &ConfigOptions,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        plan.transform_down(push_down_limits).data()
+    }
+
+    fn name(&self) -> &str {
+        "LimitPushdown"
+    }
+
+    fn schema_check(&self) -> bool {
+        true
+    }
+}
+
+/// This enumeration makes `skip` and `fetch` calculations easier by providing
+/// a single API for both local and global limit operators.
+#[derive(Debug)]
+enum LimitExec {
+    Global(GlobalLimitExec),
+    Local(LocalLimitExec),
+}
+
+impl LimitExec {
+    fn input(&self) -> &Arc<dyn ExecutionPlan> {
+        match self {
+            Self::Global(global) => global.input(),
+            Self::Local(local) => local.input(),
+        }
+    }
+
+    fn fetch(&self) -> Option<usize> {
+        match self {
+            Self::Global(global) => global.fetch(),
+            Self::Local(local) => Some(local.fetch()),
+        }
+    }
+
+    fn skip(&self) -> usize {
+        match self {
+            Self::Global(global) => global.skip(),
+            Self::Local(_) => 0,
+        }
+    }
+
+    fn with_child(&self, child: Arc<dyn ExecutionPlan>) -> Self {
+        match self {
+            Self::Global(global) => {
+                Self::Global(GlobalLimitExec::new(child, global.skip(), global.fetch()))
+            }
+            Self::Local(local) => Self::Local(LocalLimitExec::new(child, local.fetch())),
+        }
+    }
+}
+
+impl From<LimitExec> for Arc<dyn ExecutionPlan> {
+    fn from(limit_exec: LimitExec) -> Self {
+        match limit_exec {
+            LimitExec::Global(global) => Arc::new(global),
+            LimitExec::Local(local) => Arc::new(local),
+        }
+    }
+}
+
+/// Pushes down the limit through the plan.
+pub fn push_down_limits(
+    plan: Arc<dyn ExecutionPlan>,
+) -> Result<Transformed<Arc<dyn ExecutionPlan>>> {
+    let maybe_modified = if let Some(limit_exec) = extract_limit(&plan) {
+        let child = limit_exec.input();
+        if let Some(child_limit) = extract_limit(child) {
+            let merged = merge_limits(&limit_exec, &child_limit);
+            // Revisit current node in case of consecutive pushdowns
+            Some(push_down_limits(merged)?.data)
+        } else if child.supports_limit_pushdown() {
+            try_push_down_limit(&limit_exec, child.clone())?
+        } else {
+            add_fetch_to_child(&limit_exec, child.clone())
+        }
+    } else {
+        None
+    };
+
+    Ok(maybe_modified.map_or(Transformed::no(plan), Transformed::yes))
+}
+
+/// Transforms the [`ExecutionPlan`] into a [`LimitExec`] if it is a
+/// [`GlobalLimitExec`] or a [`LocalLimitExec`].
+fn extract_limit(plan: &Arc<dyn ExecutionPlan>) -> Option<LimitExec> {
+    if let Some(global_limit) = plan.as_any().downcast_ref::<GlobalLimitExec>() {
+        Some(LimitExec::Global(GlobalLimitExec::new(
+            global_limit.input().clone(),
+            global_limit.skip(),
+            global_limit.fetch(),
+        )))
+    } else {
+        plan.as_any()
+            .downcast_ref::<LocalLimitExec>()
+            .map(|local_limit| {
+                LimitExec::Local(LocalLimitExec::new(
+                    local_limit.input().clone(),
+                    local_limit.fetch(),
+                ))
+            })
+    }
+}
+
+/// Merge the limits of the parent and the child. If at least one of them is a
+/// [`GlobalLimitExec`], the result is also a [`GlobalLimitExec`]. Otherwise,
+/// the result is a [`LocalLimitExec`].
+fn merge_limits(
+    parent_limit_exec: &LimitExec,
+    child_limit_exec: &LimitExec,
+) -> Arc<dyn ExecutionPlan> {
+    // We can use the logic in `combine_limit` from the logical optimizer:
+    let (skip, fetch) = combine_limit(
+        parent_limit_exec.skip(),
+        parent_limit_exec.fetch(),
+        child_limit_exec.skip(),
+        child_limit_exec.fetch(),
+    );
+    match (parent_limit_exec, child_limit_exec) {
+        (LimitExec::Local(_), LimitExec::Local(_)) => {
+            // The fetch is present in this case, can unwrap.
+            Arc::new(LocalLimitExec::new(
+                child_limit_exec.input().clone(),
+                fetch.unwrap(),
+            ))
+        }
+        _ => Arc::new(GlobalLimitExec::new(
+            child_limit_exec.input().clone(),
+            skip,
+            fetch,
+        )),
+    }
+}
+
+/// Pushes down the limit through the child. If the child has a single input
+/// partition, simply swaps the parent and the child. Otherwise, adds a
+/// [`LocalLimitExec`] after in between in addition to swapping, because of
+/// multiple input partitions.
+fn try_push_down_limit(
+    limit_exec: &LimitExec,
+    child: Arc<dyn ExecutionPlan>,
+) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+    let grandchildren = child.children();
+    if let Some(&grandchild) = grandchildren.first() {
+        // GlobalLimitExec and LocalLimitExec must have an input after pushdown
+        if combines_input_partitions(&child) {
+            // We still need a LocalLimitExec after the child
+            if let Some(fetch) = limit_exec.fetch() {
+                let new_local_limit = Arc::new(LocalLimitExec::new(
+                    grandchild.clone(),
+                    fetch + limit_exec.skip(),
+                ));
+                let new_child = child.clone().with_new_children(vec![new_local_limit])?;
+                Ok(Some(limit_exec.with_child(new_child).into()))
+            } else {
+                Ok(None)
+            }
+        } else {
+            // Swap current with child
+            let new_limit = limit_exec.with_child(grandchild.clone());
+            let new_child = child.clone().with_new_children(vec![new_limit.into()])?;
+            Ok(Some(new_child))
+        }
+    } else {
+        // Operators supporting limit push down must have a child.
+        Err(plan_datafusion_err!(
+            "{:#?} must have a child to push down limit",
+            child
+        ))
+    }
+}
+
+fn combines_input_partitions(exec: &Arc<dyn ExecutionPlan>) -> bool {
+    let exec = exec.as_any();
+    exec.is::<CoalescePartitionsExec>() || exec.is::<SortPreservingMergeExec>()
+}
+
+/// Transforms child to the fetching version if supported. Removes the parent if
+/// skip is zero. Otherwise, keeps the parent.
+fn add_fetch_to_child(
+    limit_exec: &LimitExec,
+    child: Arc<dyn ExecutionPlan>,
+) -> Option<Arc<dyn ExecutionPlan>> {
+    let fetch = limit_exec.fetch();
+    let skip = limit_exec.skip();
+
+    let child_fetch = fetch.map(|f| f + skip);
+
+    if let Some(child_with_fetch) = child.with_fetch(child_fetch) {
+        if skip > 0 {
+            Some(limit_exec.with_child(child_with_fetch).into())
+        } else {
+            Some(child_with_fetch)
+        }
+    } else {
+        None
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use arrow_schema::{DataType, Field, Schema, SchemaRef};
+    use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+    use datafusion_expr::Operator;
+    use datafusion_physical_expr::expressions::BinaryExpr;
+    use datafusion_physical_expr::Partitioning;
+    use datafusion_physical_expr_common::expressions::column::col;
+    use datafusion_physical_expr_common::expressions::lit;
+    use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
+    use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+    use datafusion_physical_plan::empty::EmptyExec;
+    use datafusion_physical_plan::filter::FilterExec;
+    use datafusion_physical_plan::get_plan_string;
+    use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+    use datafusion_physical_plan::projection::ProjectionExec;
+    use datafusion_physical_plan::repartition::RepartitionExec;
+    use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
+
+    struct DummyStreamPartition {
+        schema: SchemaRef,
+    }
+    impl PartitionStream for DummyStreamPartition {
+        fn schema(&self) -> &SchemaRef {
+            &self.schema
+        }
+        fn execute(&self, _ctx: Arc<TaskContext>) -> SendableRecordBatchStream {
+            unreachable!()
+        }
+    }
+
+    #[test]
+    fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero(
+    ) -> Result<()> {
+        let schema = create_schema();
+        let streaming_table = streaming_table_exec(schema.clone())?;
+        let global_limit = global_limit_exec(streaming_table, 0, Some(5));
+
+        let initial = get_plan_string(&global_limit);
+        let expected_initial = [
+            "GlobalLimitExec: skip=0, fetch=5",
+            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        ];
+        assert_eq!(initial, expected_initial);
+
+        let after_optimize =
+            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+        let expected = [
+            "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5"
+        ];
+        assert_eq!(get_plan_string(&after_optimize), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero(
+    ) -> Result<()> {
+        let schema = create_schema();
+        let streaming_table = streaming_table_exec(schema.clone())?;
+        let global_limit = global_limit_exec(streaming_table, 2, Some(5));
+
+        let initial = get_plan_string(&global_limit);
+        let expected_initial = [
+            "GlobalLimitExec: skip=2, fetch=5",
+            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        ];
+        assert_eq!(initial, expected_initial);
+
+        let after_optimize =
+            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+        let expected = [
+            "GlobalLimitExec: skip=2, fetch=5",
+            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7"
+        ];
+        assert_eq!(get_plan_string(&after_optimize), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit(
+    ) -> Result<()> {
+        let schema = create_schema();
+        let streaming_table = streaming_table_exec(schema.clone())?;
+        let repartition = repartition_exec(streaming_table)?;
+        let filter = filter_exec(schema.clone(), repartition)?;
+        let coalesce_batches = coalesce_batches_exec(filter);
+        let local_limit = local_limit_exec(coalesce_batches, 5);
+        let coalesce_partitions = coalesce_partitions_exec(local_limit);
+        let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
+
+        let initial = get_plan_string(&global_limit);
+        let expected_initial = [
+            "GlobalLimitExec: skip=0, fetch=5",
+            "  CoalescePartitionsExec",
+            "    LocalLimitExec: fetch=5",
+            "      CoalesceBatchesExec: target_batch_size=8192",
+            "        FilterExec: c3@2 > 0",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        ];
+        assert_eq!(initial, expected_initial);
+
+        let after_optimize =
+            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+        let expected = [
+            "GlobalLimitExec: skip=0, fetch=5",
+            "  CoalescePartitionsExec",
+            "    CoalesceBatchesExec: target_batch_size=8192, fetch=5",
+            "      FilterExec: c3@2 > 0",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        ];
+        assert_eq!(get_plan_string(&after_optimize), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn pushes_global_limit_exec_through_projection_exec() -> Result<()> {
+        let schema = create_schema();
+        let streaming_table = streaming_table_exec(schema.clone())?;
+        let filter = filter_exec(schema.clone(), streaming_table)?;
+        let projection = projection_exec(schema.clone(), filter)?;
+        let global_limit = global_limit_exec(projection, 0, Some(5));
+
+        let initial = get_plan_string(&global_limit);
+        let expected_initial = [
+            "GlobalLimitExec: skip=0, fetch=5",
+            "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+            "    FilterExec: c3@2 > 0",
+            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        ];
+        assert_eq!(initial, expected_initial);
+
+        let after_optimize =
+            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+        let expected = [
+            "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+            "  GlobalLimitExec: skip=0, fetch=5",
+            "    FilterExec: c3@2 > 0",
+            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        ];
+        assert_eq!(get_plan_string(&after_optimize), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version(
+    ) -> Result<()> {
+        let schema = create_schema();
+        let streaming_table = streaming_table_exec(schema.clone()).unwrap();
+        let coalesce_batches = coalesce_batches_exec(streaming_table);
+        let projection = projection_exec(schema.clone(), coalesce_batches)?;
+        let global_limit = global_limit_exec(projection, 0, Some(5));
+
+        let initial = get_plan_string(&global_limit);
+        let expected_initial = [
+            "GlobalLimitExec: skip=0, fetch=5",
+            "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+            "    CoalesceBatchesExec: target_batch_size=8192",
+            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        ];
+
+        assert_eq!(initial, expected_initial);
+
+        let after_optimize =
+            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+        let expected = [
+            "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+            "  CoalesceBatchesExec: target_batch_size=8192, fetch=5",
+            "    StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        ];
+        assert_eq!(get_plan_string(&after_optimize), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions(
+    ) -> Result<()> {
+        let schema = create_schema();
+        let streaming_table = streaming_table_exec(schema.clone())?;
+        let repartition = repartition_exec(streaming_table)?;
+        let filter = filter_exec(schema.clone(), repartition)?;
+        let coalesce_partitions = coalesce_partitions_exec(filter);
+        let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
+
+        let initial = get_plan_string(&global_limit);
+        let expected_initial = [
+            "GlobalLimitExec: skip=0, fetch=5",
+            "  CoalescePartitionsExec",
+            "    FilterExec: c3@2 > 0",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        ];
+        assert_eq!(initial, expected_initial);
+
+        let after_optimize =
+            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+        let expected = [
+            "GlobalLimitExec: skip=0, fetch=5",
+            "  CoalescePartitionsExec",
+            "    LocalLimitExec: fetch=5",
+            "      FilterExec: c3@2 > 0",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        ];
+        assert_eq!(get_plan_string(&after_optimize), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn merges_local_limit_with_local_limit() -> Result<()> {
+        let schema = create_schema();
+        let empty_exec = empty_exec(schema);
+        let child_local_limit = local_limit_exec(empty_exec, 10);
+        let parent_local_limit = local_limit_exec(child_local_limit, 20);
+
+        let initial = get_plan_string(&parent_local_limit);
+        let expected_initial = [
+            "LocalLimitExec: fetch=20",
+            "  LocalLimitExec: fetch=10",
+            "    EmptyExec",
+        ];
+
+        assert_eq!(initial, expected_initial);
+
+        let after_optimize =
+            LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?;
+
+        let expected = ["LocalLimitExec: fetch=10", "  EmptyExec"];
+        assert_eq!(get_plan_string(&after_optimize), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn merges_global_limit_with_global_limit() -> Result<()> {
+        let schema = create_schema();
+        let empty_exec = empty_exec(schema);
+        let child_global_limit = global_limit_exec(empty_exec, 10, Some(30));
+        let parent_global_limit = global_limit_exec(child_global_limit, 10, Some(20));
+
+        let initial = get_plan_string(&parent_global_limit);
+        let expected_initial = [
+            "GlobalLimitExec: skip=10, fetch=20",
+            "  GlobalLimitExec: skip=10, fetch=30",
+            "    EmptyExec",
+        ];
+
+        assert_eq!(initial, expected_initial);
+
+        let after_optimize =
+            LimitPushdown::new().optimize(parent_global_limit, &ConfigOptions::new())?;
+
+        let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
+        assert_eq!(get_plan_string(&after_optimize), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn merges_global_limit_with_local_limit() -> Result<()> {
+        let schema = create_schema();
+        let empty_exec = empty_exec(schema);
+        let local_limit = local_limit_exec(empty_exec, 40);
+        let global_limit = global_limit_exec(local_limit, 20, Some(30));
+
+        let initial = get_plan_string(&global_limit);
+        let expected_initial = [
+            "GlobalLimitExec: skip=20, fetch=30",
+            "  LocalLimitExec: fetch=40",
+            "    EmptyExec",
+        ];
+
+        assert_eq!(initial, expected_initial);
+
+        let after_optimize =
+            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+        let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
+        assert_eq!(get_plan_string(&after_optimize), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn merges_local_limit_with_global_limit() -> Result<()> {
+        let schema = create_schema();
+        let empty_exec = empty_exec(schema);
+        let global_limit = global_limit_exec(empty_exec, 20, Some(30));
+        let local_limit = local_limit_exec(global_limit, 20);
+
+        let initial = get_plan_string(&local_limit);
+        let expected_initial = [
+            "LocalLimitExec: fetch=20",
+            "  GlobalLimitExec: skip=20, fetch=30",
+            "    EmptyExec",
+        ];
+
+        assert_eq!(initial, expected_initial);
+
+        let after_optimize =
+            LimitPushdown::new().optimize(local_limit, &ConfigOptions::new())?;
+
+        let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
+        assert_eq!(get_plan_string(&after_optimize), expected);
+
+        Ok(())
+    }
+
+    fn create_schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            Field::new("c1", DataType::Int32, true),
+            Field::new("c2", DataType::Int32, true),
+            Field::new("c3", DataType::Int32, true),
+        ]))
+    }
+
+    fn streaming_table_exec(schema: SchemaRef) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(StreamingTableExec::try_new(
+            schema.clone(),
+            vec![Arc::new(DummyStreamPartition {
+                schema: schema.clone(),
+            }) as _],
+            None,
+            None,
+            true,
+            None,
+        )?))
+    }
+
+    fn global_limit_exec(
+        input: Arc<dyn ExecutionPlan>,
+        skip: usize,
+        fetch: Option<usize>,
+    ) -> Arc<dyn ExecutionPlan> {
+        Arc::new(GlobalLimitExec::new(input, skip, fetch))
+    }
+
+    fn local_limit_exec(
+        input: Arc<dyn ExecutionPlan>,
+        fetch: usize,
+    ) -> Arc<dyn ExecutionPlan> {
+        Arc::new(LocalLimitExec::new(input, fetch))
+    }
+
+    fn projection_exec(
+        schema: SchemaRef,
+        input: Arc<dyn ExecutionPlan>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(ProjectionExec::try_new(
+            vec![
+                (col("c1", schema.as_ref()).unwrap(), "c1".to_string()),
+                (col("c2", schema.as_ref()).unwrap(), "c2".to_string()),
+                (col("c3", schema.as_ref()).unwrap(), "c3".to_string()),
+            ],
+            input,
+        )?))
+    }
+
+    fn filter_exec(
+        schema: SchemaRef,
+        input: Arc<dyn ExecutionPlan>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(FilterExec::try_new(
+            Arc::new(BinaryExpr::new(
+                col("c3", schema.as_ref()).unwrap(),
+                Operator::Gt,
+                lit(0),
+            )),
+            input,
+        )?))
+    }
+
+    fn coalesce_batches_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
+        Arc::new(CoalesceBatchesExec::new(input, 8192))
+    }
+
+    fn coalesce_partitions_exec(
+        local_limit: Arc<dyn ExecutionPlan>,
+    ) -> Arc<dyn ExecutionPlan> {
+        Arc::new(CoalescePartitionsExec::new(local_limit))
+    }
+
+    fn repartition_exec(
+        streaming_table: Arc<dyn ExecutionPlan>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(RepartitionExec::try_new(
+            streaming_table,
+            Partitioning::RoundRobinBatch(8),
+        )?))
+    }
+
+    fn empty_exec(schema: SchemaRef) -> Arc<dyn ExecutionPlan> {
+        Arc::new(EmptyExec::new(schema))
+    }
+}
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index 7c508eeef8780..01ddab3ec97de 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -27,6 +27,7 @@ pub mod combine_partial_final_agg;
 pub mod enforce_distribution;
 pub mod enforce_sorting;
 pub mod join_selection;
+pub mod limit_pushdown;
 pub mod limited_distinct_aggregation;
 pub mod optimizer;
 pub mod projection_pushdown;
diff --git a/datafusion/core/src/physical_optimizer/optimizer.rs b/datafusion/core/src/physical_optimizer/optimizer.rs
index 6449dbea0ddf0..e09d7b28bf5f2 100644
--- a/datafusion/core/src/physical_optimizer/optimizer.rs
+++ b/datafusion/core/src/physical_optimizer/optimizer.rs
@@ -28,6 +28,7 @@ use crate::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAgg
 use crate::physical_optimizer::enforce_distribution::EnforceDistribution;
 use crate::physical_optimizer::enforce_sorting::EnforceSorting;
 use crate::physical_optimizer::join_selection::JoinSelection;
+use crate::physical_optimizer::limit_pushdown::LimitPushdown;
 use crate::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation;
 use crate::physical_optimizer::output_requirements::OutputRequirements;
 use crate::physical_optimizer::sanity_checker::SanityCheckPlan;
@@ -98,6 +99,10 @@ impl PhysicalOptimizer {
             // are not present, the load of executors such as join or union will be
             // reduced by narrowing their input tables.
             Arc::new(ProjectionPushdown::new()),
+            // The LimitPushdown rule tries to push limits down as far as possible,
+            // replacing operators with fetching variants, or adding limits
+            // past operators that support limit pushdown.
+            Arc::new(LimitPushdown::new()),
             // The SanityCheckPlan rule checks whether the order and
             // distribution requirements of each node in the plan
             // is satisfied. It will also reject non-runnable query
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index f62a019eb9607..a2bdbe64aa430 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -243,17 +243,18 @@ async fn sort_preserving_merge() {
             // SortPreservingMergeExec (not a Sort which would compete
             // with the SortPreservingMergeExec for memory)
             &[
-                "+---------------+-------------------------------------------------------------------------------------------------------------+",
-                "| plan_type     | plan                                                                                                        |",
-                "+---------------+-------------------------------------------------------------------------------------------------------------+",
-                "| logical_plan  | Limit: skip=0, fetch=10                                                                                     |",
-                "|               |   Sort: t.a ASC NULLS LAST, t.b ASC NULLS LAST, fetch=10                                                    |",
-                "|               |     TableScan: t projection=[a, b]                                                                          |",
-                "| physical_plan | GlobalLimitExec: skip=0, fetch=10                                                                           |",
-                "|               |   SortPreservingMergeExec: [a@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10                                |",
-                "|               |     MemoryExec: partitions=2, partition_sizes=[5, 5], output_ordering=a@0 ASC NULLS LAST,b@1 ASC NULLS LAST |",
-                "|               |                                                                                                             |",
-                "+---------------+-------------------------------------------------------------------------------------------------------------+",
+                "+---------------+---------------------------------------------------------------------------------------------------------------+",
+                "| plan_type     | plan                                                                                                          |",
+                "+---------------+---------------------------------------------------------------------------------------------------------------+",
+                "| logical_plan  | Limit: skip=0, fetch=10                                                                                       |",
+                "|               |   Sort: t.a ASC NULLS LAST, t.b ASC NULLS LAST, fetch=10                                                      |",
+                "|               |     TableScan: t projection=[a, b]                                                                            |",
+                "| physical_plan | GlobalLimitExec: skip=0, fetch=10                                                                             |",
+                "|               |   SortPreservingMergeExec: [a@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10                                  |",
+                "|               |     LocalLimitExec: fetch=10                                                                                  |",
+                "|               |       MemoryExec: partitions=2, partition_sizes=[5, 5], output_ordering=a@0 ASC NULLS LAST,b@1 ASC NULLS LAST |",
+                "|               |                                                                                                               |",
+                "+---------------+---------------------------------------------------------------------------------------------------------------+",
             ]
         )
         .run()
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index cd2e0b6f5ba2e..79980f8fc9ec9 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -217,16 +217,17 @@ fn transformed_limit(
     })))
 }
 
-/// Combines two limits into a single
+/// Computes the `skip` and `fetch` parameters of a single limit that would be
+/// equivalent to two consecutive limits with the given `skip`/`fetch` parameters.
 ///
-/// Returns the combined limit `(skip, fetch)`
+/// There are multiple cases to consider:
 ///
-/// # Case 0: Parent and Child are disjoint. (`child_fetch <= skip`)
+/// # Case 0: Parent and child are disjoint (`child_fetch <= skip`).
 ///
 /// ```text
 ///   Before merging:
-///                     |........skip........|---fetch-->|              Parent Limit
-///    |...child_skip...|---child_fetch-->|                             Child Limit
+///                     |........skip........|---fetch-->|     Parent limit
+///    |...child_skip...|---child_fetch-->|                    Child limit
 /// ```
 ///
 ///   After merging:
@@ -234,23 +235,12 @@ fn transformed_limit(
 ///    |.........(child_skip + skip).........|
 /// ```
 ///
-///   Before merging:
-/// ```text
-///                     |...skip...|------------fetch------------>|     Parent Limit
-///    |...child_skip...|-------------child_fetch------------>|         Child Limit
-/// ```
-///
-///   After merging:
-/// ```text
-///    |....(child_skip + skip)....|---(child_fetch - skip)-->|
-/// ```
-///
-/// # Case 1: Parent is beyond the range of Child. (`skip < child_fetch <= skip + fetch`)
+/// # Case 1: Parent is beyond child's range (`skip < child_fetch <= skip + fetch`).
 ///
 ///   Before merging:
 /// ```text
-///                     |...skip...|------------fetch------------>|     Parent Limit
-///    |...child_skip...|-------------child_fetch------------>|         Child Limit
+///                     |...skip...|------------fetch------------>|   Parent limit
+///    |...child_skip...|-------------child_fetch------------>|       Child limit
 /// ```
 ///
 ///   After merging:
@@ -258,18 +248,19 @@ fn transformed_limit(
 ///    |....(child_skip + skip)....|---(child_fetch - skip)-->|
 /// ```
 ///
-///  # Case 2: Parent is in the range of Child. (`skip + fetch < child_fetch`)
+///  # Case 2: Parent is within child's range (`skip + fetch < child_fetch`).
+///
 ///   Before merging:
 /// ```text
-///                     |...skip...|---fetch-->|                        Parent Limit
-///    |...child_skip...|-------------child_fetch------------>|         Child Limit
+///                     |...skip...|---fetch-->|                   Parent limit
+///    |...child_skip...|-------------child_fetch------------>|    Child limit
 /// ```
 ///
 ///   After merging:
 /// ```text
 ///    |....(child_skip + skip)....|---fetch-->|
 /// ```
-fn combine_limit(
+pub fn combine_limit(
     parent_skip: usize,
     parent_fetch: Option<usize>,
     child_skip: usize,
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 8cb25827ff8fb..061c849971b2c 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -18,17 +18,18 @@
 //! CoalesceBatchesExec combines small batches into larger batches for more efficient use of
 //! vectorized processing by upstream operators.
 
+use std::any::Any;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
 use crate::{
     DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
 };
-use arrow::compute::concat_batches;
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
 
+use arrow::compute::concat_batches;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::Result;
@@ -36,14 +37,20 @@ use datafusion_execution::TaskContext;
 
 use futures::stream::{Stream, StreamExt};
 
-/// CoalesceBatchesExec combines small batches into larger batches for more efficient use of
-/// vectorized processing by upstream operators.
+/// `CoalesceBatchesExec` combines small batches into larger batches for more
+/// efficient use of vectorized processing by later operators. The operator
+/// works by buffering batches until it collects `target_batch_size` rows. When
+/// only a limited number of rows are necessary (specified by the `fetch`
+/// parameter), the operator will stop buffering and return the final batch
+/// once the number of collected rows reaches the `fetch` value.
 #[derive(Debug)]
 pub struct CoalesceBatchesExec {
     /// The input plan
     input: Arc<dyn ExecutionPlan>,
     /// Minimum number of rows for coalesces batches
     target_batch_size: usize,
+    /// Maximum number of rows to fetch, `None` means fetching all rows
+    fetch: Option<usize>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     cache: PlanProperties,
@@ -56,11 +63,18 @@ impl CoalesceBatchesExec {
         Self {
             input,
             target_batch_size,
+            fetch: None,
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
         }
     }
 
+    /// Update fetch with the argument
+    pub fn with_fetch(mut self, fetch: Option<usize>) -> Self {
+        self.fetch = fetch;
+        self
+    }
+
     /// The input plan
     pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
         &self.input
@@ -94,8 +108,13 @@ impl DisplayAs for CoalesceBatchesExec {
                 write!(
                     f,
                     "CoalesceBatchesExec: target_batch_size={}",
-                    self.target_batch_size
-                )
+                    self.target_batch_size,
+                )?;
+                if let Some(fetch) = self.fetch {
+                    write!(f, ", fetch={fetch}")?;
+                };
+
+                Ok(())
             }
         }
     }
@@ -131,10 +150,10 @@ impl ExecutionPlan for CoalesceBatchesExec {
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(CoalesceBatchesExec::new(
-            Arc::clone(&children[0]),
-            self.target_batch_size,
-        )))
+        Ok(Arc::new(
+            CoalesceBatchesExec::new(Arc::clone(&children[0]), self.target_batch_size)
+                .with_fetch(self.fetch),
+        ))
     }
 
     fn execute(
@@ -146,8 +165,10 @@ impl ExecutionPlan for CoalesceBatchesExec {
             input: self.input.execute(partition, context)?,
             schema: self.input.schema(),
             target_batch_size: self.target_batch_size,
+            fetch: self.fetch,
             buffer: Vec::new(),
             buffered_rows: 0,
+            total_rows: 0,
             is_closed: false,
             baseline_metrics: BaselineMetrics::new(&self.metrics, partition),
         }))
@@ -160,6 +181,16 @@ impl ExecutionPlan for CoalesceBatchesExec {
     fn statistics(&self) -> Result<Statistics> {
         self.input.statistics()
     }
+
+    fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        Some(Arc::new(CoalesceBatchesExec {
+            input: Arc::clone(&self.input),
+            target_batch_size: self.target_batch_size,
+            fetch: limit,
+            metrics: self.metrics.clone(),
+            cache: self.cache.clone(),
+        }))
+    }
 }
 
 struct CoalesceBatchesStream {
@@ -169,10 +200,14 @@ struct CoalesceBatchesStream {
     schema: SchemaRef,
     /// Minimum number of rows for coalesces batches
     target_batch_size: usize,
+    /// Maximum number of rows to fetch, `None` means fetching all rows
+    fetch: Option<usize>,
     /// Buffered batches
     buffer: Vec<RecordBatch>,
     /// Buffered row count
     buffered_rows: usize,
+    /// Total number of rows returned
+    total_rows: usize,
     /// Whether the stream has finished returning all of its data or not
     is_closed: bool,
     /// Execution metrics
@@ -214,6 +249,29 @@ impl CoalesceBatchesStream {
             match input_batch {
                 Poll::Ready(x) => match x {
                     Some(Ok(batch)) => {
+                        // Handle fetch limit:
+                        if let Some(fetch) = self.fetch {
+                            if self.total_rows + batch.num_rows() >= fetch {
+                                // We have reached the fetch limit.
+                                let remaining_rows = fetch - self.total_rows;
+                                debug_assert!(remaining_rows > 0);
+
+                                self.is_closed = true;
+                                self.total_rows = fetch;
+                                // Trim the batch and add to buffered batches:
+                                let batch = batch.slice(0, remaining_rows);
+                                self.buffered_rows += batch.num_rows();
+                                self.buffer.push(batch);
+                                // Combine buffered batches:
+                                let batch = concat_batches(&self.schema, &self.buffer)?;
+                                // Reset the buffer state and return final batch:
+                                self.buffer.clear();
+                                self.buffered_rows = 0;
+                                return Poll::Ready(Some(Ok(batch)));
+                            }
+                        }
+                        self.total_rows += batch.num_rows();
+
                         if batch.num_rows() >= self.target_batch_size
                             && self.buffer.is_empty()
                         {
@@ -280,7 +338,7 @@ mod tests {
         let partition = create_vec_batches(&schema, 10);
         let partitions = vec![partition];
 
-        let output_partitions = coalesce_batches(&schema, partitions, 21).await?;
+        let output_partitions = coalesce_batches(&schema, partitions, 21, None).await?;
         assert_eq!(1, output_partitions.len());
 
         // input is 10 batches x 8 rows (80 rows)
@@ -295,6 +353,86 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_concat_batches_with_fetch_larger_than_input_size() -> Result<()> {
+        let schema = test_schema();
+        let partition = create_vec_batches(&schema, 10);
+        let partitions = vec![partition];
+
+        let output_partitions =
+            coalesce_batches(&schema, partitions, 21, Some(100)).await?;
+        assert_eq!(1, output_partitions.len());
+
+        // input is 10 batches x 8 rows (80 rows) with fetch limit of 100
+        // expected to behave the same as `test_concat_batches`
+        let batches = &output_partitions[0];
+        assert_eq!(4, batches.len());
+        assert_eq!(24, batches[0].num_rows());
+        assert_eq!(24, batches[1].num_rows());
+        assert_eq!(24, batches[2].num_rows());
+        assert_eq!(8, batches[3].num_rows());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_concat_batches_with_fetch_less_than_input_size() -> Result<()> {
+        let schema = test_schema();
+        let partition = create_vec_batches(&schema, 10);
+        let partitions = vec![partition];
+
+        let output_partitions =
+            coalesce_batches(&schema, partitions, 21, Some(50)).await?;
+        assert_eq!(1, output_partitions.len());
+
+        // input is 10 batches x 8 rows (80 rows) with fetch limit of 50
+        let batches = &output_partitions[0];
+        assert_eq!(3, batches.len());
+        assert_eq!(24, batches[0].num_rows());
+        assert_eq!(24, batches[1].num_rows());
+        assert_eq!(2, batches[2].num_rows());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_concat_batches_with_fetch_less_than_target_and_no_remaining_rows(
+    ) -> Result<()> {
+        let schema = test_schema();
+        let partition = create_vec_batches(&schema, 10);
+        let partitions = vec![partition];
+
+        let output_partitions =
+            coalesce_batches(&schema, partitions, 21, Some(48)).await?;
+        assert_eq!(1, output_partitions.len());
+
+        // input is 10 batches x 8 rows (80 rows) with fetch limit of 48
+        let batches = &output_partitions[0];
+        assert_eq!(2, batches.len());
+        assert_eq!(24, batches[0].num_rows());
+        assert_eq!(24, batches[1].num_rows());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_concat_batches_with_fetch_less_target_batch_size() -> Result<()> {
+        let schema = test_schema();
+        let partition = create_vec_batches(&schema, 10);
+        let partitions = vec![partition];
+
+        let output_partitions =
+            coalesce_batches(&schema, partitions, 21, Some(10)).await?;
+        assert_eq!(1, output_partitions.len());
+
+        // input is 10 batches x 8 rows (80 rows) with fetch limit of 10
+        let batches = &output_partitions[0];
+        assert_eq!(1, batches.len());
+        assert_eq!(10, batches[0].num_rows());
+
+        Ok(())
+    }
+
     fn test_schema() -> Arc<Schema> {
         Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]))
     }
@@ -303,13 +441,15 @@ mod tests {
         schema: &SchemaRef,
         input_partitions: Vec<Vec<RecordBatch>>,
         target_batch_size: usize,
+        fetch: Option<usize>,
     ) -> Result<Vec<Vec<RecordBatch>>> {
         // create physical plan
         let exec = MemoryExec::try_new(&input_partitions, Arc::clone(schema), None)?;
         let exec =
             RepartitionExec::try_new(Arc::new(exec), Partitioning::RoundRobinBatch(1))?;
-        let exec: Arc<dyn ExecutionPlan> =
-            Arc::new(CoalesceBatchesExec::new(Arc::new(exec), target_batch_size));
+        let exec: Arc<dyn ExecutionPlan> = Arc::new(
+            CoalesceBatchesExec::new(Arc::new(exec), target_batch_size).with_fetch(fetch),
+        );
 
         // execute and collect results
         let output_partition_count = exec.output_partitioning().partition_count();
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index ef6afee803079..486ae41901db3 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -174,6 +174,10 @@ impl ExecutionPlan for CoalescePartitionsExec {
     fn statistics(&self) -> Result<Statistics> {
         self.input.statistics()
     }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        true
+    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index c834005bb7c3c..19554d07f7a0f 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -428,6 +428,22 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     fn statistics(&self) -> Result<Statistics> {
         Ok(Statistics::new_unknown(&self.schema()))
     }
+
+    /// Returns `true` if a limit can be safely pushed down through this
+    /// `ExecutionPlan` node.
+    ///
+    /// If this method returns `true`, and the query plan contains a limit at
+    /// the output of this node, DataFusion will push the limit to the input
+    /// of this node.
+    fn supports_limit_pushdown(&self) -> bool {
+        false
+    }
+
+    /// Returns a fetching variant of this `ExecutionPlan` node, if it supports
+    /// fetch limits. Returns `None` otherwise.
+    fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        None
+    }
 }
 
 /// Extension trait provides an easy API to fetch various properties of
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 9efa0422ec759..d2bb8f2b0eadf 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -245,6 +245,10 @@ impl ExecutionPlan for ProjectionExec {
             Arc::clone(&self.schema),
         ))
     }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        true
+    }
 }
 
 /// If e is a direct column reference, returns the field level
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 5e82c6dab8fa1..eb77d7716848c 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -923,6 +923,17 @@ impl ExecutionPlan for SortExec {
     fn statistics(&self) -> Result<Statistics> {
         self.input.statistics()
     }
+
+    fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        Some(Arc::new(SortExec {
+            input: Arc::clone(&self.input),
+            expr: self.expr.clone(),
+            metrics_set: self.metrics_set.clone(),
+            preserve_partitioning: self.preserve_partitioning,
+            fetch: limit,
+            cache: self.cache.clone(),
+        }))
+    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 41dfd449dd829..0fedfb6296e75 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -264,6 +264,10 @@ impl ExecutionPlan for SortPreservingMergeExec {
     fn statistics(&self) -> Result<Statistics> {
         self.input.statistics()
     }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        true
+    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs
index e10e5c9a6995a..f3cca4bfbe174 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -264,6 +264,19 @@ impl ExecutionPlan for StreamingTableExec {
     fn metrics(&self) -> Option<MetricsSet> {
         Some(self.metrics.clone_inner())
     }
+
+    fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        Some(Arc::new(StreamingTableExec {
+            partitions: self.partitions.clone(),
+            projection: self.projection.clone(),
+            projected_schema: Arc::clone(&self.projected_schema),
+            projected_output_ordering: self.projected_output_ordering.clone(),
+            infinite: self.infinite,
+            limit,
+            cache: self.cache.clone(),
+            metrics: self.metrics.clone(),
+        }))
+    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index 172cbad44dca4..5a17334601208 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -79,10 +79,7 @@ logical_plan
 01)Limit: skip=0, fetch=10
 02)--Sort: aggregate_test_100_with_order.c1 ASC NULLS LAST, fetch=10
 03)----TableScan: aggregate_test_100_with_order projection=[c1]
-physical_plan
-01)GlobalLimitExec: skip=0, fetch=10
-02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_test_100_order_by_c1_asc.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
-
+physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_test_100_order_by_c1_asc.csv]]}, projection=[c1], limit=10, output_ordering=[c1@0 ASC NULLS LAST], has_header=true
 
 ## explain_physical_plan_only
 
@@ -257,6 +254,7 @@ physical_plan after coalesce_batches SAME TEXT AS ABOVE
 physical_plan after OutputRequirements CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
+physical_plan after LimitPushdown SAME TEXT AS ABOVE
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
 physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true
 physical_plan_with_stats CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]
@@ -290,9 +288,7 @@ set datafusion.explain.physical_plan_only = true;
 query TT
 EXPLAIN SELECT a, b, c FROM simple_explain_test limit 10;
 ----
-physical_plan
-01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Inexact(10), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]
-02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], limit=10, has_header=true, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]
+physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], limit=10, has_header=true, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]
 
 # Parquet scan with statistics collected
 statement ok
@@ -304,9 +300,7 @@ CREATE EXTERNAL TABLE alltypes_plain STORED AS PARQUET LOCATION '../../parquet-t
 query TT
 EXPLAIN SELECT * FROM alltypes_plain limit 10;
 ----
-physical_plan
-01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+physical_plan ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 
 # explain verbose with both collect & show statistics on
 query TT
@@ -336,13 +330,10 @@ physical_plan after OutputRequirements
 02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
+physical_plan after LimitPushdown ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
-physical_plan
-01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-physical_plan_with_schema
-01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
-02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
+physical_plan ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+physical_plan_with_schema ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
 
 
 statement ok
@@ -379,16 +370,11 @@ physical_plan after OutputRequirements
 02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
+physical_plan after LimitPushdown ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
-physical_plan
-01)GlobalLimitExec: skip=0, fetch=10
-02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10
-physical_plan_with_stats
-01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-physical_plan_with_schema
-01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
-02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
+physical_plan ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10
+physical_plan_with_stats ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
+physical_plan_with_schema ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index b7d466d8bf827..a97c979c43a34 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -4232,12 +4232,13 @@ physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--SortPreservingMergeExec: [time_chunks@0 DESC], fetch=5
 03)----ProjectionExec: expr=[date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 as time_chunks]
-04)------AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
-05)--------CoalesceBatchesExec: target_batch_size=2
-06)----------RepartitionExec: partitioning=Hash([date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0], 8), input_partitions=8, preserve_order=true, sort_exprs=date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 DESC
-07)------------AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@0) as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-09)----------------StreamingTableExec: partition_sizes=1, projection=[ts], infinite_source=true, output_ordering=[ts@0 DESC]
+04)------LocalLimitExec: fetch=5
+05)--------AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------RepartitionExec: partitioning=Hash([date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0], 8), input_partitions=8, preserve_order=true, sort_exprs=date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 DESC
+08)--------------AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@0) as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+10)------------------StreamingTableExec: partition_sizes=1, projection=[ts], infinite_source=true, output_ordering=[ts@0 DESC]
 
 query P
 SELECT date_bin('15 minutes', ts) as time_chunks
@@ -4334,8 +4335,9 @@ physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--SortPreservingMergeExec: [name@0 DESC,time_chunks@1 DESC], fetch=5
 03)----ProjectionExec: expr=[name@0 as name, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@1) as time_chunks]
-04)------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-05)--------StreamingTableExec: partition_sizes=1, projection=[name, ts], infinite_source=true, output_ordering=[name@0 DESC, ts@1 DESC]
+04)------LocalLimitExec: fetch=5
+05)--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+06)----------StreamingTableExec: partition_sizes=1, projection=[name, ts], infinite_source=true, output_ordering=[name@0 DESC, ts@1 DESC]
 
 statement ok
 drop table t1
diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
index 8de8c478fbc41..97130201fca80 100644
--- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
+++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
@@ -57,7 +57,7 @@ logical_plan
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--SortPreservingMergeExec: [a@0 ASC NULLS LAST], fetch=5
-03)----CoalesceBatchesExec: target_batch_size=8192
+03)----CoalesceBatchesExec: target_batch_size=8192, fetch=5
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@0, c@1)], projection=[a@1]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], has_header=true
 06)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
@@ -100,7 +100,7 @@ physical_plan
 01)GlobalLimitExec: skip=0, fetch=10
 02)--SortPreservingMergeExec: [a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10
 03)----ProjectionExec: expr=[a@0 as a2, b@1 as b]
-04)------CoalesceBatchesExec: target_batch_size=8192
+04)------CoalesceBatchesExec: target_batch_size=8192, fetch=10
 05)--------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)], projection=[a@0, b@1]
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
 07)----------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt
index 094017c383a67..dc3d444854c4d 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -372,7 +372,7 @@ physical_plan
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 05)--------ProjectionExec: expr=[]
 06)----------GlobalLimitExec: skip=6, fetch=3
-07)------------CoalesceBatchesExec: target_batch_size=8192
+07)------------CoalesceBatchesExec: target_batch_size=8192, fetch=9
 08)--------------FilterExec: a@0 > 3
 09)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
@@ -546,9 +546,7 @@ explain SELECT * FROM data LIMIT 3;
 logical_plan
 01)Limit: skip=0, fetch=3
 02)--TableScan: data projection=[column1, column2], fetch=3
-physical_plan
-01)GlobalLimitExec: skip=0, fetch=3
-02)--StreamingTableExec: partition_sizes=1, projection=[column1, column2], infinite_source=true, fetch=3, output_ordering=[column1@0 ASC NULLS LAST, column2@1 ASC NULLS LAST]
+physical_plan StreamingTableExec: partition_sizes=1, projection=[column1, column2], infinite_source=true, fetch=3, output_ordering=[column1@0 ASC NULLS LAST, column2@1 ASC NULLS LAST]
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index d0a6d6adc1079..3382d5ddabda4 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -1037,8 +1037,9 @@ physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--SortPreservingMergeExec: [c_bigint@0 ASC NULLS LAST], fetch=5
 03)----ProjectionExec: expr=[CAST(c@0 AS Int64) as c_bigint]
-04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
+04)------LocalLimitExec: fetch=5
+05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
 
 statement ok
 drop table ordered_table; 
@@ -1111,8 +1112,9 @@ physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--SortPreservingMergeExec: [abs_c@0 ASC NULLS LAST], fetch=5
 03)----ProjectionExec: expr=[abs(c@0) as abs_c]
-04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
+04)------LocalLimitExec: fetch=5
+05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
 
 # Boolean to integer casts preserve the order.
 statement ok
diff --git a/datafusion/sqllogictest/test_files/repartition.slt b/datafusion/sqllogictest/test_files/repartition.slt
index 8d6bceb717483..e3c204a4f4ce9 100644
--- a/datafusion/sqllogictest/test_files/repartition.slt
+++ b/datafusion/sqllogictest/test_files/repartition.slt
@@ -123,7 +123,7 @@ logical_plan
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--CoalescePartitionsExec
-03)----CoalesceBatchesExec: target_batch_size=8192
+03)----CoalesceBatchesExec: target_batch_size=8192, fetch=5
 04)------FilterExec: c3@2 > 0
 05)--------RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1
 06)----------StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index a5f31cb9b4664..f217cbab074ff 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1121,9 +1121,7 @@ logical_plan
 01)Limit: skip=0, fetch=5
 02)--Sort: annotated_data_finite2.a ASC NULLS LAST, fetch=5
 03)----TableScan: annotated_data_finite2 projection=[a]
-physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC NULLS LAST], has_header=true
+physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], limit=5, output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 query I
 SELECT a FROM annotated_data_finite2
diff --git a/datafusion/sqllogictest/test_files/topk.slt b/datafusion/sqllogictest/test_files/topk.slt
index 616794f849188..c38b9d1f1a39f 100644
--- a/datafusion/sqllogictest/test_files/topk.slt
+++ b/datafusion/sqllogictest/test_files/topk.slt
@@ -80,9 +80,8 @@ logical_plan
 02)--Sort: aggregate_test_100.c13 DESC NULLS FIRST, fetch=5
 03)----TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortExec: TopK(fetch=5), expr=[c13@12 DESC], preserve_partitioning=[false]
-03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true
+01)SortExec: TopK(fetch=5), expr=[c13@12 DESC], preserve_partitioning=[false]
+02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true
 
 
 
diff --git a/datafusion/sqllogictest/test_files/tpch/q11.slt.part b/datafusion/sqllogictest/test_files/tpch/q11.slt.part
index 24ecd9386d3b7..ce989ee33ebdc 100644
--- a/datafusion/sqllogictest/test_files/tpch/q11.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q11.slt.part
@@ -74,59 +74,58 @@ logical_plan
 25)--------------------Filter: nation.n_name = Utf8("GERMANY")
 26)----------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=10
-02)--SortExec: TopK(fetch=10), expr=[value@1 DESC], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]
-04)------NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1
-05)--------CoalescePartitionsExec
-06)----------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-09)----------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[ps_partkey@0, ps_availqty@1, ps_supplycost@2]
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_availqty@2, ps_supplycost@3, s_nationkey@5]
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
-18)----------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], has_header=false
-19)------------------------------CoalesceBatchesExec: target_batch_size=8192
-20)--------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-21)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-22)------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
-23)----------------------CoalesceBatchesExec: target_batch_size=8192
-24)------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-25)--------------------------ProjectionExec: expr=[n_nationkey@0 as n_nationkey]
-26)----------------------------CoalesceBatchesExec: target_batch_size=8192
-27)------------------------------FilterExec: n_name@1 = GERMANY
-28)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-29)----------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
-30)--------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
-31)----------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-32)------------CoalescePartitionsExec
-33)--------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-34)----------------CoalesceBatchesExec: target_batch_size=8192
-35)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1]
-36)--------------------CoalesceBatchesExec: target_batch_size=8192
-37)----------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
-38)------------------------CoalesceBatchesExec: target_batch_size=8192
-39)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4]
-40)----------------------------CoalesceBatchesExec: target_batch_size=8192
-41)------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
-42)--------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], has_header=false
-43)----------------------------CoalesceBatchesExec: target_batch_size=8192
-44)------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-45)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-46)----------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
-47)--------------------CoalesceBatchesExec: target_batch_size=8192
-48)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-49)------------------------ProjectionExec: expr=[n_nationkey@0 as n_nationkey]
-50)--------------------------CoalesceBatchesExec: target_batch_size=8192
-51)----------------------------FilterExec: n_name@1 = GERMANY
-52)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-53)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
+01)SortExec: TopK(fetch=10), expr=[value@1 DESC], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]
+03)----NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1
+04)------CoalescePartitionsExec
+05)--------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+08)--------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+09)----------------CoalesceBatchesExec: target_batch_size=8192
+10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[ps_partkey@0, ps_availqty@1, ps_supplycost@2]
+11)--------------------CoalesceBatchesExec: target_batch_size=8192
+12)----------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
+13)------------------------CoalesceBatchesExec: target_batch_size=8192
+14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_availqty@2, ps_supplycost@3, s_nationkey@5]
+15)----------------------------CoalesceBatchesExec: target_batch_size=8192
+16)------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
+17)--------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], has_header=false
+18)----------------------------CoalesceBatchesExec: target_batch_size=8192
+19)------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+20)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+21)----------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
+22)--------------------CoalesceBatchesExec: target_batch_size=8192
+23)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+24)------------------------ProjectionExec: expr=[n_nationkey@0 as n_nationkey]
+25)--------------------------CoalesceBatchesExec: target_batch_size=8192
+26)----------------------------FilterExec: n_name@1 = GERMANY
+27)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+28)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
+29)------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
+30)--------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+31)----------CoalescePartitionsExec
+32)------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+33)--------------CoalesceBatchesExec: target_batch_size=8192
+34)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1]
+35)------------------CoalesceBatchesExec: target_batch_size=8192
+36)--------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+37)----------------------CoalesceBatchesExec: target_batch_size=8192
+38)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4]
+39)--------------------------CoalesceBatchesExec: target_batch_size=8192
+40)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
+41)------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], has_header=false
+42)--------------------------CoalesceBatchesExec: target_batch_size=8192
+43)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+44)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+45)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
+46)------------------CoalesceBatchesExec: target_batch_size=8192
+47)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+48)----------------------ProjectionExec: expr=[n_nationkey@0 as n_nationkey]
+49)------------------------CoalesceBatchesExec: target_batch_size=8192
+50)--------------------------FilterExec: n_name@1 = GERMANY
+51)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+52)------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
 
 
 
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 2dc8385bf191f..126996e7398aa 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -406,15 +406,16 @@ logical_plan
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--SortPreservingMergeExec: [c9@1 DESC], fetch=5
-03)----UnionExec
-04)------SortExec: expr=[c9@1 DESC], preserve_partitioning=[true]
-05)--------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Int64) as c9]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true
-08)------SortExec: expr=[c9@1 DESC], preserve_partitioning=[true]
-09)--------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Int64) as c9]
-10)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true
+03)----LocalLimitExec: fetch=5
+04)------UnionExec
+05)--------SortExec: expr=[c9@1 DESC], preserve_partitioning=[true]
+06)----------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Int64) as c9]
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true
+09)--------SortExec: expr=[c9@1 DESC], preserve_partitioning=[true]
+10)----------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Int64) as c9]
+11)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+12)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true
 
 query TI
 SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggregate_test_100 ORDER BY c9 DESC LIMIT 5
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index e9d417c93a575..212daa05a5d21 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -1825,15 +1825,16 @@ physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5
 03)----ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c3@0 ASC NULLS LAST,c9@1 DESC], preserve_partitioning=[true]
-06)----------CoalesceBatchesExec: target_batch_size=4096
-07)------------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-09)----------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-10)------------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-11)--------------------SortExec: expr=[c3@1 DESC,c9@2 DESC,c2@0 ASC NULLS LAST], preserve_partitioning=[false]
-12)----------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], has_header=true
+04)------LocalLimitExec: fetch=5
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+06)----------SortExec: expr=[c3@0 ASC NULLS LAST,c9@1 DESC], preserve_partitioning=[true]
+07)------------CoalesceBatchesExec: target_batch_size=4096
+08)--------------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+10)------------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+11)--------------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+12)----------------------SortExec: expr=[c3@1 DESC,c9@2 DESC,c2@0 ASC NULLS LAST], preserve_partitioning=[false]
+13)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], has_header=true
 
 
 
@@ -2051,9 +2052,8 @@ physical_plan
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[array_agg(aggregate_test_100.c13)]
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------GlobalLimitExec: skip=0, fetch=1
-07)------------SortExec: TopK(fetch=1), expr=[c13@0 ASC NULLS LAST], preserve_partitioning=[false]
-08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c13], has_header=true
+06)----------SortExec: TopK(fetch=1), expr=[c13@0 ASC NULLS LAST], preserve_partitioning=[false]
+07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c13], has_header=true
 
 
 query ?
@@ -2111,16 +2111,15 @@ logical_plan
 08)--------------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]
 09)----------------TableScan: aggregate_test_100 projection=[c1, c2, c8, c9]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortExec: TopK(fetch=5), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-05)--------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c9@3 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]
-06)----------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-08)--------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-09)----------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST,c9@3 ASC NULLS LAST,c8@2 ASC NULLS LAST], preserve_partitioning=[false]
-10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], has_header=true
+01)SortExec: TopK(fetch=5), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
+04)------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c9@3 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]
+05)--------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
+07)------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+08)--------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST,c9@3 ASC NULLS LAST,c8@2 ASC NULLS LAST], preserve_partitioning=[false]
+09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], has_header=true
 
 
 
@@ -2215,14 +2214,13 @@ logical_plan
 08)--------------TableScan: aggregate_test_100 projection=[c1, c2, c9, c12]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2]
-02)--GlobalLimitExec: skip=0, fetch=5
-03)----SortExec: TopK(fetch=5), expr=[c9@2 ASC NULLS LAST], preserve_partitioning=[false]
-04)------ProjectionExec: expr=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum1, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING@4 as sum2, c9@1 as c9]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Ok(Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(3)), is_causal: false }], mode=[Sorted]
-06)----------ProjectionExec: expr=[c1@0 as c1, c9@2 as c9, c12@3 as c12, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-08)--------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[false]
-09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9, c12], has_header=true
+02)--SortExec: TopK(fetch=5), expr=[c9@2 ASC NULLS LAST], preserve_partitioning=[false]
+03)----ProjectionExec: expr=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum1, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING@4 as sum2, c9@1 as c9]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Ok(Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(3)), is_causal: false }], mode=[Sorted]
+05)--------ProjectionExec: expr=[c1@0 as c1, c9@2 as c9, c12@3 as c12, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+07)------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[false]
+08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9, c12], has_header=true
 
 query RR
 SELECT SUM(c12) OVER(ORDER BY c1, c2 GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as sum1,
@@ -2254,8 +2252,8 @@ logical_plan
 05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+01)ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+02)--GlobalLimitExec: skip=0, fetch=5
 03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
@@ -2293,8 +2291,8 @@ logical_plan
 05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+01)ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+02)--GlobalLimitExec: skip=0, fetch=5
 03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
@@ -2332,12 +2330,11 @@ logical_plan
 05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortExec: TopK(fetch=5), expr=[rn1@1 DESC], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-04)------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
+01)SortExec: TopK(fetch=5), expr=[rn1@1 DESC], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
+05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
 query II
 SELECT c9, rn1 FROM (SELECT c9,
@@ -2375,12 +2372,11 @@ logical_plan
 05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST,c9@0 ASC NULLS LAST], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-04)------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
+01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST,c9@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
+05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
 query II
 SELECT c9, rn1 FROM (SELECT c9,
@@ -2429,8 +2425,8 @@ logical_plan
 05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+01)ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+02)--GlobalLimitExec: skip=0, fetch=5
 03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
@@ -2453,8 +2449,8 @@ logical_plan
 05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c5, c9]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--ProjectionExec: expr=[c5@0 as c5, c9@1 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rn1]
+01)ProjectionExec: expr=[c5@0 as c5, c9@1 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rn1]
+02)--GlobalLimitExec: skip=0, fetch=5
 03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[CAST(c9@1 AS Int32) + c5@0 DESC], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5, c9], has_header=true
@@ -2476,8 +2472,8 @@ logical_plan
 05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--ProjectionExec: expr=[c9@0 as c9, CAST(ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 AS Int64) as rn1]
+01)ProjectionExec: expr=[c9@0 as c9, CAST(ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 AS Int64) as rn1]
+02)--GlobalLimitExec: skip=0, fetch=5
 03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
@@ -2582,15 +2578,14 @@ logical_plan
 10)------------------TableScan: annotated_data_finite projection=[ts, inc_col, desc_col]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, sum3@2 as sum3, min1@3 as min1, min2@4 as min2, min3@5 as min3, max1@6 as max1, max2@7 as max2, max3@8 as max3, cnt1@9 as cnt1, cnt2@10 as cnt2, sumr1@11 as sumr1, sumr2@12 as sumr2, sumr3@13 as sumr3, minr1@14 as minr1, minr2@15 as minr2, minr3@16 as minr3, maxr1@17 as maxr1, maxr2@18 as maxr2, maxr3@19 as maxr3, cntr1@20 as cntr1, cntr2@21 as cntr2, sum4@22 as sum4, cnt3@23 as cnt3]
-02)--GlobalLimitExec: skip=0, fetch=5
-03)----SortExec: TopK(fetch=5), expr=[inc_col@24 DESC], preserve_partitioning=[false]
-04)------ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@14 as sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@15 as sum3, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as min1, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as min2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as min3, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as max1, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as max2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as max3, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@22 as cnt1, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@23 as cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@2 as sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@3 as sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sumr3, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as minr1, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@6 as minr2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@7 as minr3, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as maxr1, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as maxr2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as maxr3, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@11 as cntr1, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@12 as cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as sum4, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@25 as cnt3, inc_col@1 as inc_col]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-06)----------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(8)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-08)--------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(8)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(2)), end_bound: Following(Int32(6)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(8)), is_causal: false }], mode=[Sorted]
-09)----------------ProjectionExec: expr=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col, desc_col@2 as desc_col]
-10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
+02)--SortExec: TopK(fetch=5), expr=[inc_col@24 DESC], preserve_partitioning=[false]
+03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@14 as sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@15 as sum3, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as min1, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as min2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as min3, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as max1, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as max2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as max3, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@22 as cnt1, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@23 as cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@2 as sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@3 as sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sumr3, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as minr1, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@6 as minr2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@7 as minr3, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as maxr1, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as maxr2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as maxr3, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@11 as cntr1, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@12 as cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as sum4, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@25 as cnt3, inc_col@1 as inc_col]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(8)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(8)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(2)), end_bound: Following(Int32(6)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(8)), is_causal: false }], mode=[Sorted]
+08)--------------ProjectionExec: expr=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col, desc_col@2 as desc_col]
+09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIII
 SELECT
@@ -2670,12 +2665,11 @@ logical_plan
 05)--------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
 06)----------TableScan: annotated_data_finite projection=[ts, inc_col]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortExec: TopK(fetch=5), expr=[ts@0 DESC], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2]
-04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }], mode=[Sorted]
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
+01)SortExec: TopK(fetch=5), expr=[ts@0 DESC], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2]
+03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }], mode=[Sorted]
+05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIIII
 SELECT
@@ -2744,13 +2738,12 @@ logical_plan
 08)--------------TableScan: annotated_data_finite projection=[ts, inc_col]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, min1@2 as min1, min2@3 as min2, max1@4 as max1, max2@5 as max2, count1@6 as count1, count2@7 as count2, avg1@8 as avg1, avg2@9 as avg2]
-02)--GlobalLimitExec: skip=0, fetch=5
-03)----SortExec: TopK(fetch=5), expr=[inc_col@10 ASC NULLS LAST], preserve_partitioning=[false]
-04)------ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }], mode=[Sorted]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }], mode=[Sorted]
-07)------------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col]
-08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
+02)--SortExec: TopK(fetch=5), expr=[inc_col@10 ASC NULLS LAST], preserve_partitioning=[false]
+03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }], mode=[Sorted]
+06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col]
+07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 query IIIIIIIIRR
 SELECT
@@ -2798,12 +2791,11 @@ logical_plan
 07)------------TableScan: annotated_data_finite projection=[ts, inc_col]
 physical_plan
 01)ProjectionExec: expr=[first_value1@0 as first_value1, first_value2@1 as first_value2, last_value1@2 as last_value1, last_value2@3 as last_value2, nth_value1@4 as nth_value1]
-02)--GlobalLimitExec: skip=0, fetch=5
-03)----SortExec: TopK(fetch=5), expr=[inc_col@5 ASC NULLS LAST], preserve_partitioning=[false]
-04)------ProjectionExec: expr=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@4 as first_value1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@2 as first_value2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as last_value1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as last_value2, NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as nth_value1, inc_col@1 as inc_col]
-05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-06)----------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted]
-07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
+02)--SortExec: TopK(fetch=5), expr=[inc_col@5 ASC NULLS LAST], preserve_partitioning=[false]
+03)----ProjectionExec: expr=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@4 as first_value1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@2 as first_value2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as last_value1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as last_value2, NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as nth_value1, inc_col@1 as inc_col]
+04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted]
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 query IIIII
 SELECT
@@ -2845,8 +2837,8 @@ logical_plan
 08)--------------TableScan: annotated_data_infinite projection=[ts, inc_col]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, count1@2 as count1, count2@3 as count2]
-02)--GlobalLimitExec: skip=0, fetch=5
-03)----ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2, ts@1 as ts]
+02)--ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2, ts@1 as ts]
+03)----GlobalLimitExec: skip=0, fetch=5
 04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted]
 06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col]
@@ -2892,8 +2884,8 @@ logical_plan
 08)--------------TableScan: annotated_data_infinite projection=[ts, inc_col]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, count1@2 as count1, count2@3 as count2]
-02)--GlobalLimitExec: skip=0, fetch=5
-03)----ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2, ts@1 as ts]
+02)--ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2, ts@1 as ts]
+03)----GlobalLimitExec: skip=0, fetch=5
 04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted]
 06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col]
@@ -3061,22 +3053,21 @@ logical_plan
 10)------------------Projection: CAST(annotated_data_finite2.c AS Int64) AS __common_expr_1, annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, annotated_data_finite2.d
 11)--------------------TableScan: annotated_data_finite2 projection=[a, b, c, d]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortExec: TopK(fetch=5), expr=[c@2 ASC NULLS LAST], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)), is_causal: true }], mode=[Sorted]
-05)--------SortExec: expr=[d@4 ASC NULLS LAST,a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-07)------------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
-08)--------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-09)----------------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
-10)------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-11)--------------------SortExec: expr=[a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
-12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]
-13)------------------------SortExec: expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
-14)--------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-15)----------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
-16)------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
+01)SortExec: TopK(fetch=5), expr=[c@2 ASC NULLS LAST], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)), is_causal: true }], mode=[Sorted]
+04)------SortExec: expr=[d@4 ASC NULLS LAST,a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+06)----------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
+08)--------------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
+09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
+10)------------------SortExec: expr=[a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
+11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]
+12)----------------------SortExec: expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
+13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
+14)--------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
+15)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 query IIIIIIIIIIIIIII
 SELECT a, b, c,
@@ -3138,8 +3129,8 @@ logical_plan
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=4096
 02)--FilterExec: rn1@5 < 50
-03)----GlobalLimitExec: skip=0, fetch=5
-04)------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1]
+03)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1]
+04)------GlobalLimitExec: skip=0, fetch=5
 05)--------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 06)----------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 
@@ -3162,12 +3153,11 @@ logical_plan
 05)--------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortExec: TopK(fetch=5), expr=[sum1@1 ASC NULLS LAST,c9@0 DESC], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum1]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
+01)SortExec: TopK(fetch=5), expr=[sum1@1 ASC NULLS LAST,c9@0 DESC], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum1]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
+05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
 # Query below should work when its input is unbounded
 # because ordering of ROW_NUMBER, RANK result is added to the ordering equivalence
@@ -3349,15 +3339,14 @@ logical_plan
 06)----------WindowAggr: windowExpr=[[MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 07)------------TableScan: aggregate_test_100 projection=[c3, c11, c12]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortExec: TopK(fetch=5), expr=[c3@0 ASC NULLS LAST], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[c3@0 as c3, MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as max1]
-04)------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
-06)----------ProjectionExec: expr=[c3@0 as c3, c12@2 as c12, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-07)------------WindowAggExec: wdw=[MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-08)--------------SortExec: expr=[c11@1 ASC NULLS LAST], preserve_partitioning=[false]
-09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c11, c12], has_header=true
+01)SortExec: TopK(fetch=5), expr=[c3@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[c3@0 as c3, MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as max1]
+03)----BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+04)------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
+05)--------ProjectionExec: expr=[c3@0 as c3, c12@2 as c12, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
+06)----------WindowAggExec: wdw=[MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+07)------------SortExec: expr=[c11@1 ASC NULLS LAST], preserve_partitioning=[false]
+08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c11, c12], has_header=true
 
 # window1 spec is used multiple times under different aggregations.
 # The query should still work.
@@ -3395,12 +3384,11 @@ logical_plan
 06)----------TableScan: aggregate_test_100 projection=[c3, c12]
 physical_plan
 01)ProjectionExec: expr=[min1@0 as min1, max1@1 as max1]
-02)--GlobalLimitExec: skip=0, fetch=5
-03)----SortExec: TopK(fetch=5), expr=[c3@2 ASC NULLS LAST], preserve_partitioning=[false]
-04)------ProjectionExec: expr=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min1, MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max1, c3@0 as c3]
-05)--------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }, MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-06)----------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
-07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c12], has_header=true
+02)--SortExec: TopK(fetch=5), expr=[c3@2 ASC NULLS LAST], preserve_partitioning=[false]
+03)----ProjectionExec: expr=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min1, MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max1, c3@0 as c3]
+04)------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }, MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+05)--------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c12], has_header=true
 
 # window2 spec is not defined
 statement error DataFusion error: Error during planning: The window window2 is not defined!
@@ -3593,8 +3581,8 @@ logical_plan
 04)------WindowAggr: windowExpr=[[NTH_VALUE(multiple_ordered_table.c, Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 05)--------TableScan: multiple_ordered_table projection=[c]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--ProjectionExec: expr=[c@0 as c, NTH_VALUE(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nv1]
+01)ProjectionExec: expr=[c@0 as c, NTH_VALUE(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nv1]
+02)--GlobalLimitExec: skip=0, fetch=5
 03)----WindowAggExec: wdw=[NTH_VALUE(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "NTH_VALUE(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int32(NULL)), is_causal: false }]
 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
 

From 823a00757711993e12d3602543bf2da7ecfd7442 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 28 Jul 2024 19:56:46 -0400
Subject: [PATCH 164/357] Update to arrow/parquet `52.2.0` (#11691)

* Update to arrow/parquet 52.2.0

* Update datafuion-cli Cargo.lock

* Update to avoid deprecated feature
---
 Cargo.toml                                    | 18 ++--
 datafusion-cli/Cargo.lock                     | 97 +++++++++----------
 datafusion-cli/Cargo.toml                     |  4 +-
 .../src/datasource/file_format/parquet.rs     |  2 +-
 4 files changed, 60 insertions(+), 61 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index cdf3d2f93b93e..90aff3f715cab 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -66,22 +66,22 @@ version = "40.0.0"
 ahash = { version = "0.8", default-features = false, features = [
     "runtime-rng",
 ] }
-arrow = { version = "52.1.0", features = [
+arrow = { version = "52.2.0", features = [
     "prettyprint",
 ] }
-arrow-array = { version = "52.1.0", default-features = false, features = [
+arrow-array = { version = "52.2.0", default-features = false, features = [
     "chrono-tz",
 ] }
-arrow-buffer = { version = "52.1.0", default-features = false }
-arrow-flight = { version = "52.1.0", features = [
+arrow-buffer = { version = "52.2.0", default-features = false }
+arrow-flight = { version = "52.2.0", features = [
     "flight-sql-experimental",
 ] }
-arrow-ipc = { version = "52.1.0", default-features = false, features = [
+arrow-ipc = { version = "52.2.0", default-features = false, features = [
     "lz4",
 ] }
-arrow-ord = { version = "52.1.0", default-features = false }
-arrow-schema = { version = "52.1.0", default-features = false }
-arrow-string = { version = "52.1.0", default-features = false }
+arrow-ord = { version = "52.2.0", default-features = false }
+arrow-schema = { version = "52.2.0", default-features = false }
+arrow-string = { version = "52.2.0", default-features = false }
 async-trait = "0.1.73"
 bigdecimal = "=0.4.1"
 bytes = "1.4"
@@ -118,7 +118,7 @@ log = "^0.4"
 num_cpus = "1.13.0"
 object_store = { version = "0.10.1", default-features = false }
 parking_lot = "0.12"
-parquet = { version = "52.1.0", default-features = false, features = [
+parquet = { version = "52.2.0", default-features = false, features = [
     "arrow",
     "async",
     "object_store",
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 89ea13cf5bc7c..e2851cfb40576 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -84,9 +84,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.7"
+version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
+checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
 
 [[package]]
 name = "apache-avro"
@@ -130,9 +130,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
 
 [[package]]
 name = "arrow"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6127ea5e585a12ec9f742232442828ebaf264dfa5eefdd71282376c599562b77"
+checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -151,9 +151,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7add7f39210b7d726e2a8efc0083e7bf06e8f2d15bdb4896b564dce4410fbf5d"
+checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -166,9 +166,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81c16ec702d3898c2f5cfdc148443c6cd7dbe5bac28399859eb0a3d38f072827"
+checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c"
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -183,9 +183,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cae6970bab043c4fbc10aee1660ceb5b306d0c42c8cc5f6ae564efcd9759b663"
+checksum = "c975484888fc95ec4a632cdc98be39c085b1bb518531b0c80c5d462063e5daa1"
 dependencies = [
  "bytes",
  "half",
@@ -194,9 +194,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c7ef44f26ef4f8edc392a048324ed5d757ad09135eff6d5509e6450d39e0398"
+checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -215,9 +215,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f843490bd258c5182b66e888161bb6f198f49f3792f7c7f98198b924ae0f564"
+checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -234,9 +234,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a769666ffac256dd301006faca1ca553d0ae7cffcf4cd07095f73f95eb226514"
+checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -246,9 +246,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dbf9c3fb57390a1af0b7bb3b5558c1ee1f63905f3eccf49ae7676a8d1e6e5a72"
+checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -261,9 +261,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "654e7f3724176b66ddfacba31af397c48e106fbe4d281c8144e7d237df5acfd7"
+checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -281,9 +281,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8008370e624e8e3c68174faaf793540287106cfda8ad1da862fdc53d8e096b4"
+checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -296,9 +296,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca5e3a6b7fda8d9fe03f3b18a2d946354ea7f3c8e4076dbdb502ad50d9d44824"
+checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -306,20 +306,19 @@ dependencies = [
  "arrow-data",
  "arrow-schema",
  "half",
- "hashbrown 0.14.5",
 ]
 
 [[package]]
 name = "arrow-schema"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dab1c12b40e29d9f3b699e0203c2a73ba558444c05e388a4377208f8f9c97eee"
+checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8"
 
 [[package]]
 name = "arrow-select"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e80159088ffe8c48965cb9b1a7c968b2729f29f37363df7eca177fc3281fe7c3"
+checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -331,9 +330,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fd04a6ea7de183648edbcb7a6dd925bbd04c210895f6384c780e27a9b54afcd"
+checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -348,9 +347,9 @@ dependencies = [
 
 [[package]]
 name = "assert_cmd"
-version = "2.0.14"
+version = "2.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8"
+checksum = "bc65048dd435533bb1baf2ed9956b9a278fbfdcf90301b39ee117f06c0199d37"
 dependencies = [
  "anstyle",
  "bstr",
@@ -815,9 +814,9 @@ dependencies = [
 
 [[package]]
 name = "bstr"
-version = "1.9.1"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
+checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
 dependencies = [
  "memchr",
  "regex-automata",
@@ -2621,9 +2620,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "52.1.0"
+version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f22ba0d95db56dde8685e3fadcb915cdaadda31ab8abbe3ff7f0ad1ef333267"
+checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -2776,9 +2775,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 
 [[package]]
 name = "predicates"
-version = "3.1.0"
+version = "3.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68b87bfd4605926cdfefc1c3b5f8fe560e3feca9d5552cf68c466d3d8236c7e8"
+checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97"
 dependencies = [
  "anstyle",
  "difflib",
@@ -2790,15 +2789,15 @@ dependencies = [
 
 [[package]]
 name = "predicates-core"
-version = "1.0.6"
+version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174"
+checksum = "ae8177bee8e75d6846599c6b9ff679ed51e882816914eec639944d7c9aa11931"
 
 [[package]]
 name = "predicates-tree"
-version = "1.0.9"
+version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf"
+checksum = "41b740d195ed3166cd147c8047ec98db0e22ec019eb8eeb76d343b795304fb13"
 dependencies = [
  "predicates-core",
  "termtree",
@@ -3703,9 +3702,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.39.1"
+version = "1.39.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d040ac2b29ab03b09d4129c2f5bbd012a3ac2f79d38ff506a4bf8dd34b0eac8a"
+checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1"
 dependencies = [
  "backtrace",
  "bytes",
@@ -3958,9 +3957,9 @@ dependencies = [
 
 [[package]]
 name = "version_check"
-version = "0.9.4"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
 [[package]]
 name = "vsimd"
@@ -4370,9 +4369,9 @@ dependencies = [
 
 [[package]]
 name = "zstd-sys"
-version = "2.0.11+zstd.1.5.6"
+version = "2.0.12+zstd.1.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75652c55c0b6f3e6f12eb786fe1bc960396bf05a1eb3bf1f3691c3610ac2e6d4"
+checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13"
 dependencies = [
  "cc",
  "pkg-config",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 860dc123fa94a..ff6673dc97273 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -30,7 +30,7 @@ rust-version = "1.76"
 readme = "README.md"
 
 [dependencies]
-arrow = { version = "52.1.0" }
+arrow = { version = "52.2.0" }
 async-trait = "0.1.41"
 aws-config = "0.55"
 aws-credential-types = "0.55"
@@ -51,7 +51,7 @@ futures = "0.3"
 mimalloc = { version = "0.1", default-features = false }
 object_store = { version = "0.10.1", features = ["aws", "gcp", "http"] }
 parking_lot = { version = "0.12" }
-parquet = { version = "52.1.0", default-features = false }
+parquet = { version = "52.2.0", default-features = false }
 regex = "1.8"
 rustyline = "11.0"
 tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] }
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 3250b59fa1d13..b50e9389ad9ec 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -1321,7 +1321,7 @@ mod tests {
             .map(|i| i.to_string())
             .collect();
         let coll: Vec<_> = schema
-            .all_fields()
+            .flattened_fields()
             .into_iter()
             .map(|i| i.name().to_string())
             .collect();

From 75ced386055d38ba2719bc1dd4064fa0e87ec1c2 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 29 Jul 2024 06:51:08 -0400
Subject: [PATCH 165/357] Minor: make field names match metric name (#11478)

---
 datafusion/physical-plan/src/repartition/mod.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 4870e9e95eb54..f09324c4019cf 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -415,7 +415,7 @@ struct RepartitionMetrics {
     /// Time in nanos to execute child operator and fetch batches
     fetch_time: metrics::Time,
     /// Time in nanos to perform repartitioning
-    repartition_time: metrics::Time,
+    repart_time: metrics::Time,
     /// Time in nanos for sending resulting batches to channels.
     ///
     /// One metric per output partition.
@@ -449,7 +449,7 @@ impl RepartitionMetrics {
 
         Self {
             fetch_time,
-            repartition_time: repart_time,
+            repart_time,
             send_time,
         }
     }
@@ -775,7 +775,7 @@ impl RepartitionExec {
         context: Arc<TaskContext>,
     ) -> Result<()> {
         let mut partitioner =
-            BatchPartitioner::try_new(partitioning, metrics.repartition_time.clone())?;
+            BatchPartitioner::try_new(partitioning, metrics.repart_time.clone())?;
 
         // execute the child operator
         let timer = metrics.fetch_time.timer();

From c50fd88faf40a0e3f991e1035deeca6d1e283f42 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 29 Jul 2024 06:57:34 -0400
Subject: [PATCH 166/357] Rename `ColumnOptions` to `ParquetColumnOptions`
 (#11512)

* Rename `ColumnOptions` to `ParquetColumnOptions`

* Update error message
---
 datafusion/common/src/config.rs               |  4 +-
 .../common/src/file_options/parquet_writer.rs | 10 ++--
 .../proto/datafusion_common.proto             |  8 ++--
 datafusion/proto-common/src/from_proto/mod.rs | 10 ++--
 datafusion/proto-common/src/to_proto/mod.rs   |  9 ++--
 .../src/generated/datafusion_proto_common.rs  | 48 ++++++++++---------
 datafusion/sqllogictest/test_files/copy.slt   |  2 +-
 7 files changed, 50 insertions(+), 41 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 8af71d5abbb36..2b932b26cad68 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1414,7 +1414,7 @@ pub struct TableParquetOptions {
     /// Global Parquet options that propagates to all columns.
     pub global: ParquetOptions,
     /// Column specific options. Default usage is parquet.XX::column.
-    pub column_specific_options: HashMap<String, ColumnOptions>,
+    pub column_specific_options: HashMap<String, ParquetColumnOptions>,
     /// Additional file-level metadata to include. Inserted into the key_value_metadata
     /// for the written [`FileMetaData`](https://docs.rs/parquet/latest/parquet/file/metadata/struct.FileMetaData.html).
     ///
@@ -1555,7 +1555,7 @@ config_namespace_with_hashmap! {
     /// Options controlling parquet format for individual columns.
     ///
     /// See [`ParquetOptions`] for more details
-    pub struct ColumnOptions {
+    pub struct ParquetColumnOptions {
         /// Sets if bloom filter is enabled for the column path.
         pub bloom_filter_enabled: Option<bool>, default = None
 
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index 34b7379823f87..80b751858398d 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -379,7 +379,7 @@ mod tests {
     };
     use std::collections::HashMap;
 
-    use crate::config::{ColumnOptions, ParquetOptions};
+    use crate::config::{ParquetColumnOptions, ParquetOptions};
 
     use super::*;
 
@@ -388,8 +388,8 @@ mod tests {
     /// Take the column defaults provided in [`ParquetOptions`], and generate a non-default col config.
     fn column_options_with_non_defaults(
         src_col_defaults: &ParquetOptions,
-    ) -> ColumnOptions {
-        ColumnOptions {
+    ) -> ParquetColumnOptions {
+        ParquetColumnOptions {
             compression: Some("zstd(22)".into()),
             dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v),
             statistics_enabled: Some("none".into()),
@@ -446,10 +446,10 @@ mod tests {
     fn extract_column_options(
         props: &WriterProperties,
         col: ColumnPath,
-    ) -> ColumnOptions {
+    ) -> ParquetColumnOptions {
         let bloom_filter_default_props = props.bloom_filter_properties(&col);
 
-        ColumnOptions {
+        ParquetColumnOptions {
             bloom_filter_enabled: Some(bloom_filter_default_props.is_some()),
             encoding: props.encoding(&col).map(|s| s.to_string()),
             dictionary_enabled: Some(props.dictionary_enabled(&col)),
diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto
index 85983dddf6ae5..752f2cf768733 100644
--- a/datafusion/proto-common/proto/datafusion_common.proto
+++ b/datafusion/proto-common/proto/datafusion_common.proto
@@ -433,15 +433,15 @@ message JsonOptions {
 
 message TableParquetOptions {
   ParquetOptions global = 1;
-  repeated ColumnSpecificOptions column_specific_options = 2;
+  repeated ParquetColumnSpecificOptions column_specific_options = 2;
 }
 
-message ColumnSpecificOptions {
+message ParquetColumnSpecificOptions {
   string column_name = 1;
-  ColumnOptions options = 2;
+  ParquetColumnOptions options = 2;
 }
 
-message ColumnOptions {
+message ParquetColumnOptions {
   oneof bloom_filter_enabled_opt {
     bool bloom_filter_enabled = 1;
   }
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index 5fe9d937f7c4e..21db66a127018 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -33,7 +33,8 @@ use arrow::ipc::{reader::read_record_batch, root_as_message};
 use datafusion_common::{
     arrow_datafusion_err,
     config::{
-        ColumnOptions, CsvOptions, JsonOptions, ParquetOptions, TableParquetOptions,
+        CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions,
+        TableParquetOptions,
     },
     file_options::{csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions},
     parsers::CompressionTypeVariant,
@@ -960,12 +961,12 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
     }
 }
 
-impl TryFrom<&protobuf::ColumnOptions> for ColumnOptions {
+impl TryFrom<&protobuf::ColumnOptions> for ParquetColumnOptions {
     type Error = DataFusionError;
     fn try_from(
         value: &protobuf::ColumnOptions,
     ) -> datafusion_common::Result<Self, Self::Error> {
-        Ok(ColumnOptions {
+        Ok(ParquetColumnOptions {
             compression: value.compression_opt.clone().map(|opt| match opt {
                 protobuf::column_options::CompressionOpt::Compression(v) => Some(v),
             }).unwrap_or(None),
@@ -1013,7 +1014,8 @@ impl TryFrom<&protobuf::TableParquetOptions> for TableParquetOptions {
     fn try_from(
         value: &protobuf::TableParquetOptions,
     ) -> datafusion_common::Result<Self, Self::Error> {
-        let mut column_specific_options: HashMap<String, ColumnOptions> = HashMap::new();
+        let mut column_specific_options: HashMap<String, ParquetColumnOptions> =
+            HashMap::new();
         for protobuf::ColumnSpecificOptions {
             column_name,
             options: maybe_options,
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index c15da2895b7cf..24083e8b72768 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -30,7 +30,8 @@ use arrow::datatypes::{
 use arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator};
 use datafusion_common::{
     config::{
-        ColumnOptions, CsvOptions, JsonOptions, ParquetOptions, TableParquetOptions,
+        CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions,
+        TableParquetOptions,
     },
     file_options::{csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions},
     parsers::CompressionTypeVariant,
@@ -830,10 +831,12 @@ impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions {
     }
 }
 
-impl TryFrom<&ColumnOptions> for protobuf::ColumnOptions {
+impl TryFrom<&ParquetColumnOptions> for protobuf::ColumnOptions {
     type Error = DataFusionError;
 
-    fn try_from(value: &ColumnOptions) -> datafusion_common::Result<Self, Self::Error> {
+    fn try_from(
+        value: &ParquetColumnOptions,
+    ) -> datafusion_common::Result<Self, Self::Error> {
         Ok(protobuf::ColumnOptions {
             compression_opt: value
                 .compression
diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs
index bf198a24c811b..b36624e391c2b 100644
--- a/datafusion/proto/src/generated/datafusion_proto_common.rs
+++ b/datafusion/proto/src/generated/datafusion_proto_common.rs
@@ -670,46 +670,50 @@ pub struct TableParquetOptions {
     #[prost(message, optional, tag = "1")]
     pub global: ::core::option::Option<ParquetOptions>,
     #[prost(message, repeated, tag = "2")]
-    pub column_specific_options: ::prost::alloc::vec::Vec<ColumnSpecificOptions>,
+    pub column_specific_options: ::prost::alloc::vec::Vec<ParquetColumnSpecificOptions>,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ColumnSpecificOptions {
+pub struct ParquetColumnSpecificOptions {
     #[prost(string, tag = "1")]
     pub column_name: ::prost::alloc::string::String,
     #[prost(message, optional, tag = "2")]
-    pub options: ::core::option::Option<ColumnOptions>,
+    pub options: ::core::option::Option<ParquetColumnOptions>,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ColumnOptions {
-    #[prost(oneof = "column_options::BloomFilterEnabledOpt", tags = "1")]
+pub struct ParquetColumnOptions {
+    #[prost(oneof = "parquet_column_options::BloomFilterEnabledOpt", tags = "1")]
     pub bloom_filter_enabled_opt: ::core::option::Option<
-        column_options::BloomFilterEnabledOpt,
+        parquet_column_options::BloomFilterEnabledOpt,
     >,
-    #[prost(oneof = "column_options::EncodingOpt", tags = "2")]
-    pub encoding_opt: ::core::option::Option<column_options::EncodingOpt>,
-    #[prost(oneof = "column_options::DictionaryEnabledOpt", tags = "3")]
+    #[prost(oneof = "parquet_column_options::EncodingOpt", tags = "2")]
+    pub encoding_opt: ::core::option::Option<parquet_column_options::EncodingOpt>,
+    #[prost(oneof = "parquet_column_options::DictionaryEnabledOpt", tags = "3")]
     pub dictionary_enabled_opt: ::core::option::Option<
-        column_options::DictionaryEnabledOpt,
+        parquet_column_options::DictionaryEnabledOpt,
     >,
-    #[prost(oneof = "column_options::CompressionOpt", tags = "4")]
-    pub compression_opt: ::core::option::Option<column_options::CompressionOpt>,
-    #[prost(oneof = "column_options::StatisticsEnabledOpt", tags = "5")]
+    #[prost(oneof = "parquet_column_options::CompressionOpt", tags = "4")]
+    pub compression_opt: ::core::option::Option<parquet_column_options::CompressionOpt>,
+    #[prost(oneof = "parquet_column_options::StatisticsEnabledOpt", tags = "5")]
     pub statistics_enabled_opt: ::core::option::Option<
-        column_options::StatisticsEnabledOpt,
+        parquet_column_options::StatisticsEnabledOpt,
     >,
-    #[prost(oneof = "column_options::BloomFilterFppOpt", tags = "6")]
-    pub bloom_filter_fpp_opt: ::core::option::Option<column_options::BloomFilterFppOpt>,
-    #[prost(oneof = "column_options::BloomFilterNdvOpt", tags = "7")]
-    pub bloom_filter_ndv_opt: ::core::option::Option<column_options::BloomFilterNdvOpt>,
-    #[prost(oneof = "column_options::MaxStatisticsSizeOpt", tags = "8")]
+    #[prost(oneof = "parquet_column_options::BloomFilterFppOpt", tags = "6")]
+    pub bloom_filter_fpp_opt: ::core::option::Option<
+        parquet_column_options::BloomFilterFppOpt,
+    >,
+    #[prost(oneof = "parquet_column_options::BloomFilterNdvOpt", tags = "7")]
+    pub bloom_filter_ndv_opt: ::core::option::Option<
+        parquet_column_options::BloomFilterNdvOpt,
+    >,
+    #[prost(oneof = "parquet_column_options::MaxStatisticsSizeOpt", tags = "8")]
     pub max_statistics_size_opt: ::core::option::Option<
-        column_options::MaxStatisticsSizeOpt,
+        parquet_column_options::MaxStatisticsSizeOpt,
     >,
 }
-/// Nested message and enum types in `ColumnOptions`.
-pub mod column_options {
+/// Nested message and enum types in `ParquetColumnOptions`.
+pub mod parquet_column_options {
     #[allow(clippy::derive_partial_eq_without_eq)]
     #[derive(Clone, PartialEq, ::prost::Oneof)]
     pub enum BloomFilterEnabledOpt {
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index 7af4c52c654b6..ff7040926caa8 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -358,7 +358,7 @@ OPTIONS (
 )
 
 # errors for invalid property (not stating `format.metadata`)
-statement error DataFusion error: Invalid or Unsupported Configuration: Config value "wrong-metadata" not found on ColumnOptions
+statement error DataFusion error: Invalid or Unsupported Configuration: Config value "wrong-metadata" not found on ParquetColumnOptions
 COPY source_table
 TO 'test_files/scratch/copy/table_with_metadata/'
 STORED AS PARQUET

From ea8c287855ae799573a56697db63e7b16edf3a37 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Mon, 29 Jul 2024 12:59:37 +0200
Subject: [PATCH 167/357] Update cache key used in rust CI script (#11641)

* Remove misplaced comment

The comment is in `linux-build-lib` and refers to `linux-build-lib` as
to some other thing.

* Hash all Cargo.toml files for cargo cache key on CI

* Hash all Cargo.lock files for cargo cache key on CI
---
 .github/workflows/rust.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 4a41fd542e5d4..45abeb8f6fe26 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -62,8 +62,7 @@ jobs:
             ~/.cargo/git/db/
             ./target/
             ./datafusion-cli/target/
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-benchmark-${{ hashFiles('datafusion/**/Cargo.toml', 'benchmarks/Cargo.toml', 'datafusion-cli/Cargo.toml') }}
+          key: cargo-cache-${{ hashFiles('**/Cargo.toml', '**/Cargo.lock') }}
 
       - name: Check datafusion without default features
         # Some of the test binaries require the parquet feature still

From a591301a1fe46899f1bfe66edd9c7741ed726ac3 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 29 Jul 2024 16:52:52 -0400
Subject: [PATCH 168/357] Merge `string-view2` branch: reading from parquet up
 to 2x faster for some ClickBench queries  (not on by default) (#11667)

* Pin to pre-release version of arrow 52.2.0

* Update for deprecated method

* Add a config to force using string view in benchmark (#11514)

* add a knob to force string view in benchmark

* fix sql logic test

* update doc

* fix ci

* fix ci only test

* Update benchmarks/src/util/options.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Update datafusion/common/src/config.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* update tests

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Add String view helper functions (#11517)

* add functions

* add tests for hash util

* Add ArrowBytesViewMap and ArrowBytesViewSet (#11515)

* Update `string-view` branch to arrow-rs main (#10966)

* Pin to arrow main

* Fix clippy with latest arrow

* Uncomment test that needs new arrow-rs to work

* Update datafusion-cli Cargo.lock

* Update Cargo.lock

* tapelo

* merge

* update cast

* consistent dep

* fix ci

* add more tests

* make doc happy

* update new implementation

* fix bug

* avoid unused dep

* update dep

* update

* fix cargo check

* update doc

* pick up the comments change again

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Enable `GroupValueBytesView` for aggregation with StringView types (#11519)

* add functions

* Update `string-view` branch to arrow-rs main (#10966)

* Pin to arrow main

* Fix clippy with latest arrow

* Uncomment test that needs new arrow-rs to work

* Update datafusion-cli Cargo.lock

* Update Cargo.lock

* tapelo

* merge

* update cast

* consistent dep

* fix ci

* avoid unused dep

* update dep

* update

* fix cargo check

* better group value view aggregation

* update

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Initial support for regex_replace on `StringViewArray` (#11556)

* initial support for string view regex

* update tests

* Add support for Utf8View for date/temporal codepaths (#11518)

* Add StringView support for date_part and make_date funcs

* run cargo update in datafusion-cli

* cargo fmt

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* GC `StringViewArray` in `CoalesceBatchesStream` (#11587)

* gc string view when appropriate

* make clippy happy

* address comments

* make doc happy

* update style

* Add comments and tests for gc_string_view_batch

* better herustic

* update test

* Update datafusion/physical-plan/src/coalesce_batches.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* [Bug] fix bug in return type inference of `utf8_to_int_type` (#11662)

* fix bug in return type inference

* update doc

* add tests

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Fix clippy

* Increase ByteViewMap block size to 2MB (#11674)

* better default block size

* fix related test

* Change `--string-view` to only apply to parquet formats (#11663)

* use inferenced schema, don't load schema again

* move config to parquet-only

* update

* update

* better format

* format

* update

* Implement native support StringView for character length (#11676)

* native support for character length

* Update datafusion/functions/src/unicode/character_length.rs

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Remove uneeded patches

* cargo fmt

---------

Co-authored-by: Xiangpeng Hao <haoxiangpeng123@gmail.com>
Co-authored-by: Xiangpeng Hao <me@haoxp.xyz>
Co-authored-by: Andrew Duffy <a10y@users.noreply.github.com>
---
 benchmarks/src/clickbench.rs                  |   8 +-
 benchmarks/src/tpch/run.rs                    |   7 +
 benchmarks/src/util/options.rs                |   5 +
 datafusion-cli/Cargo.lock                     |   5 +-
 datafusion/common/src/cast.rs                 |  11 +
 datafusion/common/src/config.rs               |   4 +
 .../common/src/file_options/parquet_writer.rs |   4 +
 datafusion/common/src/hash_utils.rs           | 125 +++-
 .../core/src/datasource/file_format/mod.rs    |  23 +
 .../src/datasource/file_format/parquet.rs     |  13 +-
 .../core/src/datasource/listing/table.rs      |   4 +-
 .../datasource/physical_plan/parquet/mod.rs   |   4 +
 .../physical_plan/parquet/opener.rs           |  25 +-
 datafusion/expr/src/type_coercion/binary.rs   |  26 +-
 datafusion/functions-aggregate/src/count.rs   |   4 +
 .../functions/src/datetime/date_part.rs       |  30 +-
 .../functions/src/datetime/date_trunc.rs      |  25 +-
 .../functions/src/datetime/make_date.rs       |   4 +-
 .../functions/src/regex/regexpreplace.rs      | 217 ++++--
 .../functions/src/unicode/character_length.rs | 131 ++--
 datafusion/functions/src/utils.rs             |  23 +-
 .../src/aggregate/count_distinct/bytes.rs     |  61 ++
 .../src/aggregate/count_distinct/mod.rs       |   1 +
 .../physical-expr-common/src/binary_map.rs    |   6 +
 .../src/binary_view_map.rs                    | 690 ++++++++++++++++++
 datafusion/physical-expr-common/src/lib.rs    |   1 +
 .../physical-expr/src/aggregate/min_max.rs    |  39 +
 .../src/aggregates/group_values/bytes_view.rs | 129 ++++
 .../src/aggregates/group_values/mod.rs        |  33 +-
 .../physical-plan/src/coalesce_batches.rs     | 191 ++++-
 .../proto/datafusion_common.proto             |   1 +
 datafusion/proto-common/src/from_proto/mod.rs |   2 +-
 .../proto-common/src/generated/pbjson.rs      |  18 +
 .../proto-common/src/generated/prost.rs       |   3 +
 datafusion/proto-common/src/to_proto/mod.rs   |   1 +
 .../src/generated/datafusion_proto_common.rs  |   3 +
 .../engines/datafusion_engine/normalize.rs    |   5 +
 .../test_files/information_schema.slt         |   2 +
 .../sqllogictest/test_files/string_view.slt   |  21 +
 docs/source/user-guide/configs.md             |   1 +
 40 files changed, 1714 insertions(+), 192 deletions(-)
 create mode 100644 datafusion/physical-expr-common/src/binary_view_map.rs
 create mode 100644 datafusion/physical-plan/src/aggregates/group_values/bytes_view.rs

diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs
index 41dffc55f371e..a0f051d176234 100644
--- a/benchmarks/src/clickbench.rs
+++ b/benchmarks/src/clickbench.rs
@@ -116,7 +116,13 @@ impl RunOpt {
             None => queries.min_query_id()..=queries.max_query_id(),
         };
 
-        let config = self.common.config();
+        let mut config = self.common.config();
+        config
+            .options_mut()
+            .execution
+            .parquet
+            .schema_force_string_view = self.common.string_view;
+
         let ctx = SessionContext::new_with_config(config);
         self.register_hits(&ctx).await?;
 
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index f2a93d2ea5495..a72dfaa0f58ca 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -120,6 +120,11 @@ impl RunOpt {
             .config()
             .with_collect_statistics(!self.disable_statistics);
         config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join;
+        config
+            .options_mut()
+            .execution
+            .parquet
+            .schema_force_string_view = self.common.string_view;
         let ctx = SessionContext::new_with_config(config);
 
         // register tables
@@ -339,6 +344,7 @@ mod tests {
             partitions: Some(2),
             batch_size: 8192,
             debug: false,
+            string_view: false,
         };
         let opt = RunOpt {
             query: Some(query),
@@ -372,6 +378,7 @@ mod tests {
             partitions: Some(2),
             batch_size: 8192,
             debug: false,
+            string_view: false,
         };
         let opt = RunOpt {
             query: Some(query),
diff --git a/benchmarks/src/util/options.rs b/benchmarks/src/util/options.rs
index b9398e5b522f2..02591e293272e 100644
--- a/benchmarks/src/util/options.rs
+++ b/benchmarks/src/util/options.rs
@@ -37,6 +37,11 @@ pub struct CommonOpt {
     /// Activate debug mode to see more details
     #[structopt(short, long)]
     pub debug: bool,
+
+    /// If true, will use StringView/BinaryViewArray instead of String/BinaryArray
+    /// when reading ParquetFiles
+    #[structopt(long)]
+    pub string_view: bool,
 }
 
 impl CommonOpt {
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index e2851cfb40576..5884e424c781c 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -3356,11 +3356,12 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.120"
+version = "1.0.121"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5"
+checksum = "4ab380d7d9f22ef3f21ad3e6c1ebe8e4fc7a2000ccba2e4d71fc96f15b2cb609"
 dependencies = [
  "itoa",
+ "memchr",
  "ryu",
  "serde",
 ]
diff --git a/datafusion/common/src/cast.rs b/datafusion/common/src/cast.rs
index 0dc0532bbb6fb..0586fcf5e2ae9 100644
--- a/datafusion/common/src/cast.rs
+++ b/datafusion/common/src/cast.rs
@@ -36,6 +36,7 @@ use arrow::{
     },
     datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType},
 };
+use arrow_array::{BinaryViewArray, StringViewArray};
 
 // Downcast ArrayRef to Date32Array
 pub fn as_date32_array(array: &dyn Array) -> Result<&Date32Array> {
@@ -87,6 +88,11 @@ pub fn as_string_array(array: &dyn Array) -> Result<&StringArray> {
     Ok(downcast_value!(array, StringArray))
 }
 
+// Downcast ArrayRef to StringViewArray
+pub fn as_string_view_array(array: &dyn Array) -> Result<&StringViewArray> {
+    Ok(downcast_value!(array, StringViewArray))
+}
+
 // Downcast ArrayRef to UInt32Array
 pub fn as_uint32_array(array: &dyn Array) -> Result<&UInt32Array> {
     Ok(downcast_value!(array, UInt32Array))
@@ -221,6 +227,11 @@ pub fn as_binary_array(array: &dyn Array) -> Result<&BinaryArray> {
     Ok(downcast_value!(array, BinaryArray))
 }
 
+// Downcast ArrayRef to BinaryViewArray
+pub fn as_binary_view_array(array: &dyn Array) -> Result<&BinaryViewArray> {
+    Ok(downcast_value!(array, BinaryViewArray))
+}
+
 // Downcast ArrayRef to FixedSizeListArray
 pub fn as_fixed_size_list_array(array: &dyn Array) -> Result<&FixedSizeListArray> {
     Ok(downcast_value!(array, FixedSizeListArray))
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 2b932b26cad68..9f8aa1cbdcaae 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -469,6 +469,10 @@ config_namespace! {
         /// writing out already in-memory data, such as from a cached
         /// data frame.
         pub maximum_buffered_record_batches_per_stream: usize, default = 2
+
+        /// (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`,
+        /// and `Binary/BinaryLarge` with `BinaryView`.
+        pub schema_force_string_view: bool, default = false
     }
 }
 
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index 80b751858398d..4a229fe01b54f 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -175,6 +175,7 @@ impl ParquetOptions {
             maximum_parallel_row_group_writers: _,
             maximum_buffered_record_batches_per_stream: _,
             bloom_filter_on_read: _, // reads not used for writer props
+            schema_force_string_view: _,
         } = self;
 
         let mut builder = WriterProperties::builder()
@@ -440,6 +441,7 @@ mod tests {
             maximum_buffered_record_batches_per_stream: defaults
                 .maximum_buffered_record_batches_per_stream,
             bloom_filter_on_read: defaults.bloom_filter_on_read,
+            schema_force_string_view: defaults.schema_force_string_view,
         }
     }
 
@@ -540,6 +542,8 @@ mod tests {
                 maximum_buffered_record_batches_per_stream: global_options_defaults
                     .maximum_buffered_record_batches_per_stream,
                 bloom_filter_on_read: global_options_defaults.bloom_filter_on_read,
+                schema_force_string_view: global_options_defaults
+                    .schema_force_string_view,
             },
             column_specific_options,
             key_value_metadata,
diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs
index 5e1324e80702c..f57ec0152e3fd 100644
--- a/datafusion/common/src/hash_utils.rs
+++ b/datafusion/common/src/hash_utils.rs
@@ -31,9 +31,9 @@ use arrow_buffer::IntervalMonthDayNano;
 
 #[cfg(not(feature = "force_hash_collisions"))]
 use crate::cast::{
-    as_boolean_array, as_fixed_size_list_array, as_generic_binary_array,
-    as_large_list_array, as_list_array, as_map_array, as_primitive_array,
-    as_string_array, as_struct_array,
+    as_binary_view_array, as_boolean_array, as_fixed_size_list_array,
+    as_generic_binary_array, as_large_list_array, as_list_array, as_map_array,
+    as_primitive_array, as_string_array, as_string_view_array, as_struct_array,
 };
 use crate::error::Result;
 #[cfg(not(feature = "force_hash_collisions"))]
@@ -415,8 +415,10 @@ pub fn create_hashes<'a>(
             DataType::Null => hash_null(random_state, hashes_buffer, rehash),
             DataType::Boolean => hash_array(as_boolean_array(array)?, random_state, hashes_buffer, rehash),
             DataType::Utf8 => hash_array(as_string_array(array)?, random_state, hashes_buffer, rehash),
+            DataType::Utf8View => hash_array(as_string_view_array(array)?, random_state, hashes_buffer, rehash),
             DataType::LargeUtf8 => hash_array(as_largestring_array(array), random_state, hashes_buffer, rehash),
             DataType::Binary => hash_array(as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, rehash),
+            DataType::BinaryView => hash_array(as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
             DataType::LargeBinary => hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
             DataType::FixedSizeBinary(_) => {
                 let array: &FixedSizeBinaryArray = array.as_any().downcast_ref().unwrap();
@@ -540,22 +542,57 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn create_hashes_binary() -> Result<()> {
-        let byte_array = Arc::new(BinaryArray::from_vec(vec![
-            &[4, 3, 2],
-            &[4, 3, 2],
-            &[1, 2, 3],
-        ]));
+    macro_rules! create_hash_binary {
+        ($NAME:ident, $ARRAY:ty) => {
+            #[cfg(not(feature = "force_hash_collisions"))]
+            #[test]
+            fn $NAME() {
+                let binary = [
+                    Some(b"short".to_byte_slice()),
+                    None,
+                    Some(b"long but different 12 bytes string"),
+                    Some(b"short2"),
+                    Some(b"Longer than 12 bytes string"),
+                    Some(b"short"),
+                    Some(b"Longer than 12 bytes string"),
+                ];
+
+                let binary_array = Arc::new(binary.iter().cloned().collect::<$ARRAY>());
+                let ref_array = Arc::new(binary.iter().cloned().collect::<BinaryArray>());
+
+                let random_state = RandomState::with_seeds(0, 0, 0, 0);
+
+                let mut binary_hashes = vec![0; binary.len()];
+                create_hashes(&[binary_array], &random_state, &mut binary_hashes)
+                    .unwrap();
+
+                let mut ref_hashes = vec![0; binary.len()];
+                create_hashes(&[ref_array], &random_state, &mut ref_hashes).unwrap();
+
+                // Null values result in a zero hash,
+                for (val, hash) in binary.iter().zip(binary_hashes.iter()) {
+                    match val {
+                        Some(_) => assert_ne!(*hash, 0),
+                        None => assert_eq!(*hash, 0),
+                    }
+                }
 
-        let random_state = RandomState::with_seeds(0, 0, 0, 0);
-        let hashes_buff = &mut vec![0; byte_array.len()];
-        let hashes = create_hashes(&[byte_array], &random_state, hashes_buff)?;
-        assert_eq!(hashes.len(), 3,);
+                // same logical values should hash to the same hash value
+                assert_eq!(binary_hashes, ref_hashes);
 
-        Ok(())
+                // Same values should map to same hash values
+                assert_eq!(binary[0], binary[5]);
+                assert_eq!(binary[4], binary[6]);
+
+                // different binary should map to different hash values
+                assert_ne!(binary[0], binary[2]);
+            }
+        };
     }
 
+    create_hash_binary!(binary_array, BinaryArray);
+    create_hash_binary!(binary_view_array, BinaryViewArray);
+
     #[test]
     fn create_hashes_fixed_size_binary() -> Result<()> {
         let input_arg = vec![vec![1, 2], vec![5, 6], vec![5, 6]];
@@ -571,6 +608,64 @@ mod tests {
         Ok(())
     }
 
+    macro_rules! create_hash_string {
+        ($NAME:ident, $ARRAY:ty) => {
+            #[cfg(not(feature = "force_hash_collisions"))]
+            #[test]
+            fn $NAME() {
+                let strings = [
+                    Some("short"),
+                    None,
+                    Some("long but different 12 bytes string"),
+                    Some("short2"),
+                    Some("Longer than 12 bytes string"),
+                    Some("short"),
+                    Some("Longer than 12 bytes string"),
+                ];
+
+                let string_array = Arc::new(strings.iter().cloned().collect::<$ARRAY>());
+                let dict_array = Arc::new(
+                    strings
+                        .iter()
+                        .cloned()
+                        .collect::<DictionaryArray<Int8Type>>(),
+                );
+
+                let random_state = RandomState::with_seeds(0, 0, 0, 0);
+
+                let mut string_hashes = vec![0; strings.len()];
+                create_hashes(&[string_array], &random_state, &mut string_hashes)
+                    .unwrap();
+
+                let mut dict_hashes = vec![0; strings.len()];
+                create_hashes(&[dict_array], &random_state, &mut dict_hashes).unwrap();
+
+                // Null values result in a zero hash,
+                for (val, hash) in strings.iter().zip(string_hashes.iter()) {
+                    match val {
+                        Some(_) => assert_ne!(*hash, 0),
+                        None => assert_eq!(*hash, 0),
+                    }
+                }
+
+                // same logical values should hash to the same hash value
+                assert_eq!(string_hashes, dict_hashes);
+
+                // Same values should map to same hash values
+                assert_eq!(strings[0], strings[5]);
+                assert_eq!(strings[4], strings[6]);
+
+                // different strings should map to different hash values
+                assert_ne!(strings[0], strings[2]);
+            }
+        };
+    }
+
+    create_hash_string!(string_array, StringArray);
+    create_hash_string!(large_string_array, LargeStringArray);
+    create_hash_string!(string_view_array, StringArray);
+    create_hash_string!(dict_string_array, DictionaryArray<Int8Type>);
+
     #[test]
     // Tests actual values of hashes, which are different if forcing collisions
     #[cfg(not(feature = "force_hash_collisions"))]
diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
index 7154b50b9dd9e..a324a45784247 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -42,6 +42,7 @@ use crate::error::Result;
 use crate::execution::context::SessionState;
 use crate::physical_plan::{ExecutionPlan, Statistics};
 
+use arrow_schema::{DataType, Field, Schema};
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::{internal_err, not_impl_err, GetExt};
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
@@ -204,6 +205,28 @@ pub fn file_type_to_format(
     }
 }
 
+/// Transform a schema to use view types for Utf8 and Binary
+pub fn transform_schema_to_view(schema: &Schema) -> Schema {
+    let transformed_fields: Vec<Arc<Field>> = schema
+        .fields
+        .iter()
+        .map(|field| match field.data_type() {
+            DataType::Utf8 | DataType::LargeUtf8 => Arc::new(Field::new(
+                field.name(),
+                DataType::Utf8View,
+                field.is_nullable(),
+            )),
+            DataType::Binary | DataType::LargeBinary => Arc::new(Field::new(
+                field.name(),
+                DataType::BinaryView,
+                field.is_nullable(),
+            )),
+            _ => field.clone(),
+        })
+        .collect();
+    Schema::new_with_metadata(transformed_fields, schema.metadata.clone())
+}
+
 #[cfg(test)]
 pub(crate) mod test_util {
     use std::ops::Range;
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index b50e9389ad9ec..8a1cd2a147c73 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
 
 use super::write::demux::start_demuxer_task;
 use super::write::{create_writer, SharedBuffer};
-use super::{FileFormat, FileFormatFactory, FileScanConfig};
+use super::{transform_schema_to_view, FileFormat, FileFormatFactory, FileScanConfig};
 use crate::arrow::array::RecordBatch;
 use crate::arrow::datatypes::{Fields, Schema, SchemaRef};
 use crate::datasource::file_format::file_compression_type::FileCompressionType;
@@ -316,6 +316,17 @@ impl FileFormat for ParquetFormat {
             Schema::try_merge(schemas)
         }?;
 
+        let schema = if state
+            .config_options()
+            .execution
+            .parquet
+            .schema_force_string_view
+        {
+            transform_schema_to_view(&schema)
+        } else {
+            schema
+        };
+
         Ok(Arc::new(schema))
     }
 
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 3af4d41bcf037..72c6e0d84c049 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -410,7 +410,9 @@ impl ListingOptions {
             .try_collect()
             .await?;
 
-        self.format.infer_schema(state, &store, &files).await
+        let schema = self.format.infer_schema(state, &store, &files).await?;
+
+        Ok(schema)
     }
 
     /// Infers the partition columns stored in `LOCATION` and compares
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index a5a7b50a008a6..ed71d871b3fd3 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -711,6 +711,10 @@ impl ExecutionPlan for ParquetExec {
             enable_page_index: self.enable_page_index(),
             enable_bloom_filter: self.bloom_filter_on_read(),
             schema_adapter_factory,
+            schema_force_string_view: self
+                .table_parquet_options
+                .global
+                .schema_force_string_view,
         };
 
         let stream =
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs
index ffe879eb8de00..4edc0ac525de6 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs
@@ -17,6 +17,7 @@
 
 //! [`ParquetOpener`] for opening Parquet files
 
+use crate::datasource::file_format::transform_schema_to_view;
 use crate::datasource::physical_plan::parquet::page_filter::PagePruningAccessPlanFilter;
 use crate::datasource::physical_plan::parquet::row_group_filter::RowGroupAccessPlanFilter;
 use crate::datasource::physical_plan::parquet::{
@@ -33,7 +34,7 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 use futures::{StreamExt, TryStreamExt};
 use log::debug;
-use parquet::arrow::arrow_reader::ArrowReaderOptions;
+use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
 use std::sync::Arc;
@@ -56,6 +57,7 @@ pub(super) struct ParquetOpener {
     pub enable_page_index: bool,
     pub enable_bloom_filter: bool,
     pub schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    pub schema_force_string_view: bool,
 }
 
 impl FileOpener for ParquetOpener {
@@ -66,7 +68,7 @@ impl FileOpener for ParquetOpener {
         let file_metrics =
             ParquetFileMetrics::new(self.partition_index, &file_name, &self.metrics);
 
-        let reader: Box<dyn AsyncFileReader> =
+        let mut reader: Box<dyn AsyncFileReader> =
             self.parquet_file_reader_factory.create_reader(
                 self.partition_index,
                 file_meta,
@@ -90,12 +92,27 @@ impl FileOpener for ParquetOpener {
         );
         let enable_bloom_filter = self.enable_bloom_filter;
         let limit = self.limit;
+        let schema_force_string_view = self.schema_force_string_view;
 
         Ok(Box::pin(async move {
             let options = ArrowReaderOptions::new().with_page_index(enable_page_index);
+
+            let metadata =
+                ArrowReaderMetadata::load_async(&mut reader, options.clone()).await?;
+            let mut schema = metadata.schema().clone();
+
+            if schema_force_string_view {
+                schema = Arc::new(transform_schema_to_view(&schema));
+            }
+
+            let options = ArrowReaderOptions::new()
+                .with_page_index(enable_page_index)
+                .with_schema(schema.clone());
+            let metadata =
+                ArrowReaderMetadata::try_new(metadata.metadata().clone(), options)?;
+
             let mut builder =
-                ParquetRecordBatchStreamBuilder::new_with_options(reader, options)
-                    .await?;
+                ParquetRecordBatchStreamBuilder::new_with_metadata(reader, metadata);
 
             let file_schema = builder.schema().clone();
 
diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index ae5bdc88b115f..a657f4df0e3d5 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -527,7 +527,7 @@ fn string_numeric_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
 }
 
 /// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation
-/// where one is temporal and one is `Utf8`/`LargeUtf8`.
+/// where one is temporal and one is `Utf8View`/`Utf8`/`LargeUtf8`.
 ///
 /// Note this cannot be performed in case of arithmetic as there is insufficient information
 /// to correctly determine the type of argument. Consider
@@ -547,19 +547,21 @@ fn string_temporal_coercion(
 
     fn match_rule(l: &DataType, r: &DataType) -> Option<DataType> {
         match (l, r) {
-            // Coerce Utf8/LargeUtf8 to Date32/Date64/Time32/Time64/Timestamp
-            (Utf8, temporal) | (LargeUtf8, temporal) => match temporal {
-                Date32 | Date64 => Some(temporal.clone()),
-                Time32(_) | Time64(_) => {
-                    if is_time_with_valid_unit(temporal.to_owned()) {
-                        Some(temporal.to_owned())
-                    } else {
-                        None
+            // Coerce Utf8View/Utf8/LargeUtf8 to Date32/Date64/Time32/Time64/Timestamp
+            (Utf8, temporal) | (LargeUtf8, temporal) | (Utf8View, temporal) => {
+                match temporal {
+                    Date32 | Date64 => Some(temporal.clone()),
+                    Time32(_) | Time64(_) => {
+                        if is_time_with_valid_unit(temporal.to_owned()) {
+                            Some(temporal.to_owned())
+                        } else {
+                            None
+                        }
                     }
+                    Timestamp(_, tz) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())),
+                    _ => None,
                 }
-                Timestamp(_, tz) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())),
-                _ => None,
-            },
+            }
             _ => None,
         }
     }
diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index 56850d0e02a16..69eac84f890d3 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use ahash::RandomState;
+use datafusion_physical_expr_common::aggregate::count_distinct::BytesViewDistinctCountAccumulator;
 use std::collections::HashSet;
 use std::ops::BitAnd;
 use std::{fmt::Debug, sync::Arc};
@@ -235,6 +236,9 @@ impl AggregateUDFImpl for Count {
             DataType::Utf8 => {
                 Box::new(BytesDistinctCountAccumulator::<i32>::new(OutputType::Utf8))
             }
+            DataType::Utf8View => {
+                Box::new(BytesViewDistinctCountAccumulator::new(OutputType::Utf8))
+            }
             DataType::LargeUtf8 => {
                 Box::new(BytesDistinctCountAccumulator::<i64>::new(OutputType::Utf8))
             }
diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs
index e1efb4811ec0d..e24b11aeb71ff 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -21,7 +21,7 @@ use std::sync::Arc;
 use arrow::array::{Array, ArrayRef, Float64Array};
 use arrow::compute::{binary, cast, date_part, DatePart};
 use arrow::datatypes::DataType::{
-    Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8,
+    Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8, Utf8View,
 };
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
 use arrow::datatypes::{DataType, TimeUnit};
@@ -56,31 +56,57 @@ impl DatePartFunc {
             signature: Signature::one_of(
                 vec![
                     Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
+                    Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
                     Exact(vec![
                         Utf8,
                         Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
+                    Exact(vec![
+                        Utf8View,
+                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
+                    ]),
                     Exact(vec![Utf8, Timestamp(Millisecond, None)]),
+                    Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
                     Exact(vec![
                         Utf8,
                         Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
+                    Exact(vec![
+                        Utf8View,
+                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
+                    ]),
                     Exact(vec![Utf8, Timestamp(Microsecond, None)]),
+                    Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
                     Exact(vec![
                         Utf8,
                         Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
+                    Exact(vec![
+                        Utf8View,
+                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
+                    ]),
                     Exact(vec![Utf8, Timestamp(Second, None)]),
+                    Exact(vec![Utf8View, Timestamp(Second, None)]),
                     Exact(vec![
                         Utf8,
                         Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
                     ]),
+                    Exact(vec![
+                        Utf8View,
+                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
+                    ]),
                     Exact(vec![Utf8, Date64]),
+                    Exact(vec![Utf8View, Date64]),
                     Exact(vec![Utf8, Date32]),
+                    Exact(vec![Utf8View, Date32]),
                     Exact(vec![Utf8, Time32(Second)]),
+                    Exact(vec![Utf8View, Time32(Second)]),
                     Exact(vec![Utf8, Time32(Millisecond)]),
+                    Exact(vec![Utf8View, Time32(Millisecond)]),
                     Exact(vec![Utf8, Time64(Microsecond)]),
+                    Exact(vec![Utf8View, Time64(Microsecond)]),
                     Exact(vec![Utf8, Time64(Nanosecond)]),
+                    Exact(vec![Utf8View, Time64(Nanosecond)]),
                 ],
                 Volatility::Immutable,
             ),
@@ -114,6 +140,8 @@ impl ScalarUDFImpl for DatePartFunc {
 
         let part = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = part {
             v
+        } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = part {
+            v
         } else {
             return exec_err!(
                 "First argument of `DATE_PART` must be non-null scalar Utf8"
diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs
index 6b52507a9c6f0..308ea668d3d7b 100644
--- a/datafusion/functions/src/datetime/date_trunc.rs
+++ b/datafusion/functions/src/datetime/date_trunc.rs
@@ -29,7 +29,7 @@ use arrow::array::types::{
     TimestampNanosecondType, TimestampSecondType,
 };
 use arrow::array::{Array, PrimitiveArray};
-use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8};
+use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
 use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
 use datafusion_common::cast::as_primitive_array;
 use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue};
@@ -61,25 +61,45 @@ impl DateTruncFunc {
             signature: Signature::one_of(
                 vec![
                     Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
+                    Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
                     Exact(vec![
                         Utf8,
                         Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
+                    Exact(vec![
+                        Utf8View,
+                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
+                    ]),
                     Exact(vec![Utf8, Timestamp(Microsecond, None)]),
+                    Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
                     Exact(vec![
                         Utf8,
                         Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
+                    Exact(vec![
+                        Utf8View,
+                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
+                    ]),
                     Exact(vec![Utf8, Timestamp(Millisecond, None)]),
+                    Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
                     Exact(vec![
                         Utf8,
                         Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
+                    Exact(vec![
+                        Utf8View,
+                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
+                    ]),
                     Exact(vec![Utf8, Timestamp(Second, None)]),
+                    Exact(vec![Utf8View, Timestamp(Second, None)]),
                     Exact(vec![
                         Utf8,
                         Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
                     ]),
+                    Exact(vec![
+                        Utf8View,
+                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
+                    ]),
                 ],
                 Volatility::Immutable,
             ),
@@ -119,6 +139,9 @@ impl ScalarUDFImpl for DateTruncFunc {
 
         let granularity = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
             granularity
+        {
+            v.to_lowercase()
+        } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
         {
             v.to_lowercase()
         } else {
diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs
index 6aa72572bc4d5..ded7b454f9eb8 100644
--- a/datafusion/functions/src/datetime/make_date.rs
+++ b/datafusion/functions/src/datetime/make_date.rs
@@ -23,7 +23,7 @@ use arrow::array::cast::AsArray;
 use arrow::array::types::{Date32Type, Int32Type};
 use arrow::array::PrimitiveArray;
 use arrow::datatypes::DataType;
-use arrow::datatypes::DataType::{Date32, Int32, Int64, UInt32, UInt64, Utf8};
+use arrow::datatypes::DataType::{Date32, Int32, Int64, UInt32, UInt64, Utf8, Utf8View};
 use chrono::prelude::*;
 
 use datafusion_common::{exec_err, Result, ScalarValue};
@@ -45,7 +45,7 @@ impl MakeDateFunc {
         Self {
             signature: Signature::uniform(
                 3,
-                vec![Int32, Int64, UInt32, UInt64, Utf8],
+                vec![Int32, Int64, UInt32, UInt64, Utf8, Utf8View],
                 Volatility::Immutable,
             ),
         }
diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs
index d820f991be18e..d28c6cd36d65f 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -17,11 +17,14 @@
 
 //! Regx expressions
 use arrow::array::new_null_array;
+use arrow::array::ArrayAccessor;
 use arrow::array::ArrayDataBuilder;
 use arrow::array::BufferBuilder;
 use arrow::array::GenericStringArray;
+use arrow::array::StringViewBuilder;
 use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
 use arrow::datatypes::DataType;
+use datafusion_common::cast::as_string_view_array;
 use datafusion_common::exec_err;
 use datafusion_common::plan_err;
 use datafusion_common::ScalarValue;
@@ -54,6 +57,7 @@ impl RegexpReplaceFunc {
             signature: Signature::one_of(
                 vec![
                     Exact(vec![Utf8, Utf8, Utf8]),
+                    Exact(vec![Utf8View, Utf8, Utf8]),
                     Exact(vec![Utf8, Utf8, Utf8, Utf8]),
                 ],
                 Volatility::Immutable,
@@ -80,6 +84,7 @@ impl ScalarUDFImpl for RegexpReplaceFunc {
         Ok(match &arg_types[0] {
             LargeUtf8 | LargeBinary => LargeUtf8,
             Utf8 | Binary => Utf8,
+            Utf8View | BinaryView => Utf8View,
             Null => Null,
             Dictionary(_, t) => match **t {
                 LargeUtf8 | LargeBinary => LargeUtf8,
@@ -118,15 +123,18 @@ impl ScalarUDFImpl for RegexpReplaceFunc {
         }
     }
 }
+
 fn regexp_replace_func(args: &[ColumnarValue]) -> Result<ArrayRef> {
     match args[0].data_type() {
         DataType::Utf8 => specialize_regexp_replace::<i32>(args),
         DataType::LargeUtf8 => specialize_regexp_replace::<i64>(args),
+        DataType::Utf8View => specialize_regexp_replace::<i32>(args),
         other => {
             internal_err!("Unsupported data type {other:?} for function regexp_replace")
         }
     }
 }
+
 /// replace POSIX capture groups (like \1) with Rust Regex group (like ${1})
 /// used by regexp_replace
 fn regex_replace_posix_groups(replacement: &str) -> String {
@@ -280,8 +288,8 @@ pub fn regexp_replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
     }
 }
 
-fn _regexp_replace_early_abort<T: OffsetSizeTrait>(
-    input_array: &GenericStringArray<T>,
+fn _regexp_replace_early_abort<T: ArrayAccessor>(
+    input_array: T,
     sz: usize,
 ) -> Result<ArrayRef> {
     // Mimicking the existing behavior of regexp_replace, if any of the scalar arguments
@@ -290,13 +298,14 @@ fn _regexp_replace_early_abort<T: OffsetSizeTrait>(
     // Also acts like an early abort mechanism when the input array is empty.
     Ok(new_null_array(input_array.data_type(), sz))
 }
+
 /// Get the first argument from the given string array.
 ///
 /// Note: If the array is empty or the first argument is null,
 /// then calls the given early abort function.
 macro_rules! fetch_string_arg {
     ($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) => {{
-        let array = as_generic_string_array::<T>($ARG)?;
+        let array = as_generic_string_array::<$T>($ARG)?;
         if array.len() == 0 || array.is_null(0) {
             return $EARLY_ABORT(array, $ARRAY_SIZE);
         } else {
@@ -313,25 +322,24 @@ macro_rules! fetch_string_arg {
 fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
     args: &[ArrayRef],
 ) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-    let array_size = string_array.len();
+    let array_size = args[0].len();
     let pattern = fetch_string_arg!(
         &args[1],
         "pattern",
-        T,
+        i32,
         _regexp_replace_early_abort,
         array_size
     );
     let replacement = fetch_string_arg!(
         &args[2],
         "replacement",
-        T,
+        i32,
         _regexp_replace_early_abort,
         array_size
     );
     let flags = match args.len() {
         3 => None,
-        4 => Some(fetch_string_arg!(&args[3], "flags", T, _regexp_replace_early_abort, array_size)),
+        4 => Some(fetch_string_arg!(&args[3], "flags", i32, _regexp_replace_early_abort, array_size)),
         other => {
             return exec_err!(
                 "regexp_replace was called with {other} arguments. It requires at least 3 and at most 4."
@@ -358,32 +366,61 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
     // with rust ones.
     let replacement = regex_replace_posix_groups(replacement);
 
-    // We are going to create the underlying string buffer from its parts
-    // to be able to re-use the existing null buffer for sparse arrays.
-    let mut vals = BufferBuilder::<u8>::new({
-        let offsets = string_array.value_offsets();
-        (offsets[string_array.len()] - offsets[0])
-            .to_usize()
-            .expect("Failed to convert usize")
-    });
-    let mut new_offsets = BufferBuilder::<T>::new(string_array.len() + 1);
-    new_offsets.append(T::zero());
-
-    string_array.iter().for_each(|val| {
-        if let Some(val) = val {
-            let result = re.replacen(val, limit, replacement.as_str());
-            vals.append_slice(result.as_bytes());
+    let string_array_type = args[0].data_type();
+    match string_array_type {
+        DataType::Utf8 | DataType::LargeUtf8 => {
+            let string_array = as_generic_string_array::<T>(&args[0])?;
+
+            // We are going to create the underlying string buffer from its parts
+            // to be able to re-use the existing null buffer for sparse arrays.
+            let mut vals = BufferBuilder::<u8>::new({
+                let offsets = string_array.value_offsets();
+                (offsets[string_array.len()] - offsets[0])
+                    .to_usize()
+                    .unwrap()
+            });
+            let mut new_offsets = BufferBuilder::<T>::new(string_array.len() + 1);
+            new_offsets.append(T::zero());
+
+            string_array.iter().for_each(|val| {
+                if let Some(val) = val {
+                    let result = re.replacen(val, limit, replacement.as_str());
+                    vals.append_slice(result.as_bytes());
+                }
+                new_offsets.append(T::from_usize(vals.len()).unwrap());
+            });
+
+            let data = ArrayDataBuilder::new(GenericStringArray::<T>::DATA_TYPE)
+                .len(string_array.len())
+                .nulls(string_array.nulls().cloned())
+                .buffers(vec![new_offsets.finish(), vals.finish()])
+                .build()?;
+            let result_array = GenericStringArray::<T>::from(data);
+            Ok(Arc::new(result_array) as ArrayRef)
         }
-        new_offsets.append(T::from_usize(vals.len()).unwrap());
-    });
-
-    let data = ArrayDataBuilder::new(GenericStringArray::<T>::DATA_TYPE)
-        .len(string_array.len())
-        .nulls(string_array.nulls().cloned())
-        .buffers(vec![new_offsets.finish(), vals.finish()])
-        .build()?;
-    let result_array = GenericStringArray::<T>::from(data);
-    Ok(Arc::new(result_array) as ArrayRef)
+        DataType::Utf8View => {
+            let string_view_array = as_string_view_array(&args[0])?;
+
+            let mut builder = StringViewBuilder::with_capacity(string_view_array.len())
+                .with_block_size(1024 * 1024 * 2);
+
+            for val in string_view_array.iter() {
+                if let Some(val) = val {
+                    let result = re.replacen(val, limit, replacement.as_str());
+                    builder.append_value(result);
+                } else {
+                    builder.append_null();
+                }
+            }
+
+            let result = builder.finish();
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        _ => unreachable!(
+            "Invalid data type for regexp_replace: {}",
+            string_array_type
+        ),
+    }
 }
 
 /// Determine which implementation of the regexp_replace to use based
@@ -469,43 +506,91 @@ mod tests {
 
     use super::*;
 
-    #[test]
-    fn test_static_pattern_regexp_replace() {
-        let values = StringArray::from(vec!["abc"; 5]);
-        let patterns = StringArray::from(vec!["b"; 5]);
-        let replacements = StringArray::from(vec!["foo"; 5]);
-        let expected = StringArray::from(vec!["afooc"; 5]);
-
-        let re = _regexp_replace_static_pattern_replace::<i32>(&[
-            Arc::new(values),
-            Arc::new(patterns),
-            Arc::new(replacements),
-        ])
-        .unwrap();
-
-        assert_eq!(re.as_ref(), &expected);
+    macro_rules! static_pattern_regexp_replace {
+        ($name:ident, $T:ty, $O:ty) => {
+            #[test]
+            fn $name() {
+                let values = vec!["abc", "acd", "abcd1234567890123", "123456789012abc"];
+                let patterns = vec!["b"; 4];
+                let replacement = vec!["foo"; 4];
+                let expected =
+                    vec!["afooc", "acd", "afoocd1234567890123", "123456789012afooc"];
+
+                let values = <$T>::from(values);
+                let patterns = StringArray::from(patterns);
+                let replacements = StringArray::from(replacement);
+                let expected = <$T>::from(expected);
+
+                let re = _regexp_replace_static_pattern_replace::<$O>(&[
+                    Arc::new(values),
+                    Arc::new(patterns),
+                    Arc::new(replacements),
+                ])
+                .unwrap();
+
+                assert_eq!(re.as_ref(), &expected);
+            }
+        };
     }
 
-    #[test]
-    fn test_static_pattern_regexp_replace_with_flags() {
-        let values = StringArray::from(vec!["abc", "ABC", "aBc", "AbC", "aBC"]);
-        let patterns = StringArray::from(vec!["b"; 5]);
-        let replacements = StringArray::from(vec!["foo"; 5]);
-        let flags = StringArray::from(vec!["i"; 5]);
-        let expected =
-            StringArray::from(vec!["afooc", "AfooC", "afooc", "AfooC", "afooC"]);
-
-        let re = _regexp_replace_static_pattern_replace::<i32>(&[
-            Arc::new(values),
-            Arc::new(patterns),
-            Arc::new(replacements),
-            Arc::new(flags),
-        ])
-        .unwrap();
-
-        assert_eq!(re.as_ref(), &expected);
+    static_pattern_regexp_replace!(string_array, StringArray, i32);
+    static_pattern_regexp_replace!(string_view_array, StringViewArray, i32);
+    static_pattern_regexp_replace!(large_string_array, LargeStringArray, i64);
+
+    macro_rules! static_pattern_regexp_replace_with_flags {
+        ($name:ident, $T:ty, $O: ty) => {
+            #[test]
+            fn $name() {
+                let values = vec![
+                    "abc",
+                    "aBc",
+                    "acd",
+                    "abcd1234567890123",
+                    "aBcd1234567890123",
+                    "123456789012abc",
+                    "123456789012aBc",
+                ];
+                let expected = vec![
+                    "afooc",
+                    "afooc",
+                    "acd",
+                    "afoocd1234567890123",
+                    "afoocd1234567890123",
+                    "123456789012afooc",
+                    "123456789012afooc",
+                ];
+
+                let values = <$T>::from(values);
+                let patterns = StringArray::from(vec!["b"; 7]);
+                let replacements = StringArray::from(vec!["foo"; 7]);
+                let flags = StringArray::from(vec!["i"; 5]);
+                let expected = <$T>::from(expected);
+
+                let re = _regexp_replace_static_pattern_replace::<$O>(&[
+                    Arc::new(values),
+                    Arc::new(patterns),
+                    Arc::new(replacements),
+                    Arc::new(flags),
+                ])
+                .unwrap();
+
+                assert_eq!(re.as_ref(), &expected);
+            }
+        };
     }
 
+    static_pattern_regexp_replace_with_flags!(string_array_with_flags, StringArray, i32);
+    static_pattern_regexp_replace_with_flags!(
+        string_view_array_with_flags,
+        StringViewArray,
+        i32
+    );
+    static_pattern_regexp_replace_with_flags!(
+        large_string_array_with_flags,
+        LargeStringArray,
+        i64
+    );
+
     #[test]
     fn test_static_pattern_regexp_replace_early_abort() {
         let values = StringArray::from(vec!["abc"; 5]);
diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs
index 4f32f4c177766..cee1a57bc6d9d 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -17,11 +17,10 @@
 
 use crate::utils::{make_scalar_function, utf8_to_int_type};
 use arrow::array::{
-    ArrayRef, ArrowPrimitiveType, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
+    Array, ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray,
+    OffsetSizeTrait, PrimitiveArray,
 };
 use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
-use datafusion_common::cast::as_generic_string_array;
-use datafusion_common::exec_err;
 use datafusion_common::Result;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
@@ -71,17 +70,7 @@ impl ScalarUDFImpl for CharacterLengthFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(character_length::<Int32Type>, vec![])(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(character_length::<Int64Type>, vec![])(args)
-            }
-            other => {
-                exec_err!("Unsupported data type {other:?} for function character_length")
-            }
-        }
+        make_scalar_function(character_length, vec![])(args)
     }
 
     fn aliases(&self) -> &[String] {
@@ -92,15 +81,32 @@ impl ScalarUDFImpl for CharacterLengthFunc {
 /// Returns number of characters in the string.
 /// character_length('josé') = 4
 /// The implementation counts UTF-8 code points to count the number of characters
-fn character_length<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
+fn character_length(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args[0].data_type() {
+        DataType::Utf8 => {
+            let string_array = args[0].as_string::<i32>();
+            character_length_general::<Int32Type, _>(string_array)
+        }
+        DataType::LargeUtf8 => {
+            let string_array = args[0].as_string::<i64>();
+            character_length_general::<Int64Type, _>(string_array)
+        }
+        DataType::Utf8View => {
+            let string_array = args[0].as_string_view();
+            character_length_general::<Int32Type, _>(string_array)
+        }
+        _ => unreachable!(),
+    }
+}
+
+fn character_length_general<'a, T: ArrowPrimitiveType, V: ArrayAccessor<Item = &'a str>>(
+    array: V,
+) -> Result<ArrayRef>
 where
     T::Native: OffsetSizeTrait,
 {
-    let string_array: &GenericStringArray<T::Native> =
-        as_generic_string_array::<T::Native>(&args[0])?;
-
-    let result = string_array
-        .iter()
+    let iter = ArrayIter::new(array);
+    let result = iter
         .map(|string| {
             string.map(|string: &str| {
                 T::Native::from_usize(string.chars().count())
@@ -116,55 +122,54 @@ where
 mod tests {
     use crate::unicode::character_length::CharacterLengthFunc;
     use crate::utils::test::test_function;
-    use arrow::array::{Array, Int32Array};
-    use arrow::datatypes::DataType::Int32;
+    use arrow::array::{Array, Int32Array, Int64Array};
+    use arrow::datatypes::DataType::{Int32, Int64};
     use datafusion_common::{Result, ScalarValue};
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
 
+    macro_rules! test_character_length {
+        ($INPUT:expr, $EXPECTED:expr) => {
+            test_function!(
+                CharacterLengthFunc::new(),
+                &[ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))],
+                $EXPECTED,
+                i32,
+                Int32,
+                Int32Array
+            );
+
+            test_function!(
+                CharacterLengthFunc::new(),
+                &[ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))],
+                $EXPECTED,
+                i64,
+                Int64,
+                Int64Array
+            );
+
+            test_function!(
+                CharacterLengthFunc::new(),
+                &[ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))],
+                $EXPECTED,
+                i32,
+                Int32,
+                Int32Array
+            );
+        };
+    }
+
     #[test]
     fn test_functions() -> Result<()> {
         #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLengthFunc::new(),
-            &[ColumnarValue::Scalar(ScalarValue::Utf8(Some(
-                String::from("chars")
-            )))],
-            Ok(Some(5)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLengthFunc::new(),
-            &[ColumnarValue::Scalar(ScalarValue::Utf8(Some(
-                String::from("josé")
-            )))],
-            Ok(Some(4)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLengthFunc::new(),
-            &[ColumnarValue::Scalar(ScalarValue::Utf8(Some(
-                String::from("")
-            )))],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLengthFunc::new(),
-            &[ColumnarValue::Scalar(ScalarValue::Utf8(None))],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
+        {
+            test_character_length!(Some(String::from("chars")), Ok(Some(5)));
+            test_character_length!(Some(String::from("josé")), Ok(Some(4)));
+            // test long strings (more than 12 bytes for StringView)
+            test_character_length!(Some(String::from("joséjoséjoséjosé")), Ok(Some(16)));
+            test_character_length!(Some(String::from("")), Ok(Some(0)));
+            test_character_length!(None, Ok(None));
+        }
+
         #[cfg(not(feature = "unicode_expressions"))]
         test_function!(
             CharacterLengthFunc::new(),
diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs
index 393dcc456a889..7b367174006d4 100644
--- a/datafusion/functions/src/utils.rs
+++ b/datafusion/functions/src/utils.rs
@@ -15,12 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::sync::Arc;
+
 use arrow::array::ArrayRef;
 use arrow::datatypes::DataType;
+
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::function::Hint;
 use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation};
-use std::sync::Arc;
 
 /// Creates a function to identify the optimal return type of a string function given
 /// the type of its first argument.
@@ -29,6 +31,8 @@ use std::sync::Arc;
 /// `$largeUtf8Type`,
 ///
 /// If the input type is `Utf8` or `Binary` the return type is `$utf8Type`,
+///
+/// If the input type is `Utf8View` the return type is $utf8Type,
 macro_rules! get_optimal_return_type {
     ($FUNC:ident, $largeUtf8Type:expr, $utf8Type:expr) => {
         pub(crate) fn $FUNC(arg_type: &DataType, name: &str) -> Result<DataType> {
@@ -37,6 +41,8 @@ macro_rules! get_optimal_return_type {
                 DataType::LargeUtf8 | DataType::LargeBinary => $largeUtf8Type,
                 // Binary inputs are automatically coerced to Utf8
                 DataType::Utf8 | DataType::Binary => $utf8Type,
+                // Utf8View max offset size is u32::MAX, the same as UTF8
+                DataType::Utf8View | DataType::BinaryView => $utf8Type,
                 DataType::Null => DataType::Null,
                 DataType::Dictionary(_, value_type) => match **value_type {
                     DataType::LargeUtf8 | DataType::LargeBinary => $largeUtf8Type,
@@ -177,6 +183,21 @@ pub mod test {
         };
     }
 
+    use arrow::datatypes::DataType;
     #[allow(unused_imports)]
     pub(crate) use test_function;
+
+    use super::*;
+
+    #[test]
+    fn string_to_int_type() {
+        let v = utf8_to_int_type(&DataType::Utf8, "test").unwrap();
+        assert_eq!(v, DataType::Int32);
+
+        let v = utf8_to_int_type(&DataType::Utf8View, "test").unwrap();
+        assert_eq!(v, DataType::Int32);
+
+        let v = utf8_to_int_type(&DataType::LargeUtf8, "test").unwrap();
+        assert_eq!(v, DataType::Int64);
+    }
 }
diff --git a/datafusion/physical-expr-common/src/aggregate/count_distinct/bytes.rs b/datafusion/physical-expr-common/src/aggregate/count_distinct/bytes.rs
index 27094b0c819a7..360d64ce01419 100644
--- a/datafusion/physical-expr-common/src/aggregate/count_distinct/bytes.rs
+++ b/datafusion/physical-expr-common/src/aggregate/count_distinct/bytes.rs
@@ -18,6 +18,7 @@
 //! [`BytesDistinctCountAccumulator`] for Utf8/LargeUtf8/Binary/LargeBinary values
 
 use crate::binary_map::{ArrowBytesSet, OutputType};
+use crate::binary_view_map::ArrowBytesViewSet;
 use arrow::array::{ArrayRef, OffsetSizeTrait};
 use datafusion_common::cast::as_list_array;
 use datafusion_common::utils::array_into_list_array_nullable;
@@ -88,3 +89,63 @@ impl<O: OffsetSizeTrait> Accumulator for BytesDistinctCountAccumulator<O> {
         std::mem::size_of_val(self) + self.0.size()
     }
 }
+
+/// Specialized implementation of
+/// `COUNT DISTINCT` for [`StringViewArray`] and [`BinaryViewArray`].
+///
+/// [`StringViewArray`]: arrow::array::StringViewArray
+/// [`BinaryViewArray`]: arrow::array::BinaryViewArray
+#[derive(Debug)]
+pub struct BytesViewDistinctCountAccumulator(ArrowBytesViewSet);
+
+impl BytesViewDistinctCountAccumulator {
+    pub fn new(output_type: OutputType) -> Self {
+        Self(ArrowBytesViewSet::new(output_type))
+    }
+}
+
+impl Accumulator for BytesViewDistinctCountAccumulator {
+    fn state(&mut self) -> datafusion_common::Result<Vec<ScalarValue>> {
+        let set = self.0.take();
+        let arr = set.into_state();
+        let list = Arc::new(array_into_list_array_nullable(arr));
+        Ok(vec![ScalarValue::List(list)])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
+        if values.is_empty() {
+            return Ok(());
+        }
+
+        self.0.insert(&values[0]);
+
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> datafusion_common::Result<()> {
+        if states.is_empty() {
+            return Ok(());
+        }
+        assert_eq!(
+            states.len(),
+            1,
+            "count_distinct states must be single array"
+        );
+
+        let arr = as_list_array(&states[0])?;
+        arr.iter().try_for_each(|maybe_list| {
+            if let Some(list) = maybe_list {
+                self.0.insert(&list);
+            };
+            Ok(())
+        })
+    }
+
+    fn evaluate(&mut self) -> datafusion_common::Result<ScalarValue> {
+        Ok(ScalarValue::Int64(Some(self.0.non_null_len() as i64)))
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of_val(self) + self.0.size()
+    }
+}
diff --git a/datafusion/physical-expr-common/src/aggregate/count_distinct/mod.rs b/datafusion/physical-expr-common/src/aggregate/count_distinct/mod.rs
index f216406d0dd74..7d772f7c649dc 100644
--- a/datafusion/physical-expr-common/src/aggregate/count_distinct/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/count_distinct/mod.rs
@@ -19,5 +19,6 @@ mod bytes;
 mod native;
 
 pub use bytes::BytesDistinctCountAccumulator;
+pub use bytes::BytesViewDistinctCountAccumulator;
 pub use native::FloatDistinctCountAccumulator;
 pub use native::PrimitiveDistinctCountAccumulator;
diff --git a/datafusion/physical-expr-common/src/binary_map.rs b/datafusion/physical-expr-common/src/binary_map.rs
index a5da05d2a5351..edf608a2054f3 100644
--- a/datafusion/physical-expr-common/src/binary_map.rs
+++ b/datafusion/physical-expr-common/src/binary_map.rs
@@ -40,8 +40,12 @@ use std::sync::Arc;
 pub enum OutputType {
     /// `StringArray` or `LargeStringArray`
     Utf8,
+    /// `StringViewArray`
+    Utf8View,
     /// `BinaryArray` or `LargeBinaryArray`
     Binary,
+    /// `BinaryViewArray`
+    BinaryView,
 }
 
 /// HashSet optimized for storing string or binary values that can produce that
@@ -318,6 +322,7 @@ where
                     observe_payload_fn,
                 )
             }
+            _ => unreachable!("View types should use `ArrowBytesViewMap`"),
         };
     }
 
@@ -516,6 +521,7 @@ where
                     GenericStringArray::new_unchecked(offsets, values, nulls)
                 })
             }
+            _ => unreachable!("View types should use `ArrowBytesViewMap`"),
         }
     }
 
diff --git a/datafusion/physical-expr-common/src/binary_view_map.rs b/datafusion/physical-expr-common/src/binary_view_map.rs
new file mode 100644
index 0000000000000..18bc6801aa60f
--- /dev/null
+++ b/datafusion/physical-expr-common/src/binary_view_map.rs
@@ -0,0 +1,690 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ArrowBytesViewMap`] and [`ArrowBytesViewSet`] for storing maps/sets of values from
+//! `StringViewArray`/`BinaryViewArray`.
+//! Much of the code is from `binary_map.rs`, but with simpler implementation because we directly use the
+//! [`GenericByteViewBuilder`].
+use ahash::RandomState;
+use arrow::array::cast::AsArray;
+use arrow::array::{Array, ArrayBuilder, ArrayRef, GenericByteViewBuilder};
+use arrow::datatypes::{BinaryViewType, ByteViewType, DataType, StringViewType};
+use datafusion_common::hash_utils::create_hashes;
+use datafusion_common::utils::proxy::{RawTableAllocExt, VecAllocExt};
+use std::fmt::Debug;
+use std::sync::Arc;
+
+use crate::binary_map::OutputType;
+
+/// HashSet optimized for storing string or binary values that can produce that
+/// the final set as a `GenericBinaryViewArray` with minimal copies.
+#[derive(Debug)]
+pub struct ArrowBytesViewSet(ArrowBytesViewMap<()>);
+
+impl ArrowBytesViewSet {
+    pub fn new(output_type: OutputType) -> Self {
+        Self(ArrowBytesViewMap::new(output_type))
+    }
+
+    /// Inserts each value from `values` into the set
+    pub fn insert(&mut self, values: &ArrayRef) {
+        fn make_payload_fn(_value: Option<&[u8]>) {}
+        fn observe_payload_fn(_payload: ()) {}
+        self.0
+            .insert_if_new(values, make_payload_fn, observe_payload_fn);
+    }
+
+    /// Return the contents of this map and replace it with a new empty map with
+    /// the same output type
+    pub fn take(&mut self) -> Self {
+        let mut new_self = Self::new(self.0.output_type);
+        std::mem::swap(self, &mut new_self);
+        new_self
+    }
+
+    /// Converts this set into a `StringViewArray` or `BinaryViewArray`
+    /// containing each distinct value that was interned.
+    /// This is done without copying the values.
+    pub fn into_state(self) -> ArrayRef {
+        self.0.into_state()
+    }
+
+    /// Returns the total number of distinct values (including nulls) seen so far
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    /// returns the total number of distinct values (not including nulls) seen so far
+    pub fn non_null_len(&self) -> usize {
+        self.0.non_null_len()
+    }
+
+    /// Return the total size, in bytes, of memory used to store the data in
+    /// this set, not including `self`
+    pub fn size(&self) -> usize {
+        self.0.size()
+    }
+}
+
+/// Optimized map for storing Arrow "byte view" types (`StringView`, `BinaryView`)
+/// values that can produce the set of keys on
+/// output as `GenericBinaryViewArray` without copies.
+///
+/// Equivalent to `HashSet<String, V>` but with better performance for arrow
+/// data.
+///
+/// # Generic Arguments
+///
+/// * `V`: payload type
+///
+/// # Description
+///
+/// This is a specialized HashMap with the following properties:
+///
+/// 1. Optimized for storing and emitting Arrow byte types  (e.g.
+///    `StringViewArray` / `BinaryViewArray`) very efficiently by minimizing copying of
+///    the string values themselves, both when inserting and when emitting the
+///    final array.
+///
+/// 2. Retains the insertion order of entries in the final array. The values are
+///    in the same order as they were inserted.
+///
+/// Note this structure can be used as a `HashSet` by specifying the value type
+/// as `()`, as is done by [`ArrowBytesViewSet`].
+///
+/// This map is used by the special `COUNT DISTINCT` aggregate function to
+/// store the distinct values, and by the `GROUP BY` operator to store
+/// group values when they are a single string array.
+
+pub struct ArrowBytesViewMap<V>
+where
+    V: Debug + PartialEq + Eq + Clone + Copy + Default,
+{
+    /// Should the output be StringView or BinaryView?
+    output_type: OutputType,
+    /// Underlying hash set for each distinct value
+    map: hashbrown::raw::RawTable<Entry<V>>,
+    /// Total size of the map in bytes
+    map_size: usize,
+
+    /// Builder for output array
+    builder: GenericByteViewBuilder<BinaryViewType>,
+    /// random state used to generate hashes
+    random_state: RandomState,
+    /// buffer that stores hash values (reused across batches to save allocations)
+    hashes_buffer: Vec<u64>,
+    /// `(payload, null_index)` for the 'null' value, if any
+    /// NOTE null_index is the logical index in the final array, not the index
+    /// in the buffer
+    null: Option<(V, usize)>,
+}
+
+/// The size, in number of entries, of the initial hash table
+const INITIAL_MAP_CAPACITY: usize = 512;
+
+impl<V> ArrowBytesViewMap<V>
+where
+    V: Debug + PartialEq + Eq + Clone + Copy + Default,
+{
+    pub fn new(output_type: OutputType) -> Self {
+        Self {
+            output_type,
+            map: hashbrown::raw::RawTable::with_capacity(INITIAL_MAP_CAPACITY),
+            map_size: 0,
+            builder: GenericByteViewBuilder::new().with_block_size(2 * 1024 * 1024),
+            random_state: RandomState::new(),
+            hashes_buffer: vec![],
+            null: None,
+        }
+    }
+
+    /// Return the contents of this map and replace it with a new empty map with
+    /// the same output type
+    pub fn take(&mut self) -> Self {
+        let mut new_self = Self::new(self.output_type);
+        std::mem::swap(self, &mut new_self);
+        new_self
+    }
+
+    /// Inserts each value from `values` into the map, invoking `payload_fn` for
+    /// each value if *not* already present, deferring the allocation of the
+    /// payload until it is needed.
+    ///
+    /// Note that this is different than a normal map that would replace the
+    /// existing entry
+    ///
+    /// # Arguments:
+    ///
+    /// `values`: array whose values are inserted
+    ///
+    /// `make_payload_fn`:  invoked for each value that is not already present
+    /// to create the payload, in order of the values in `values`
+    ///
+    /// `observe_payload_fn`: invoked once, for each value in `values`, that was
+    /// already present in the map, with corresponding payload value.
+    ///
+    /// # Returns
+    ///
+    /// The payload value for the entry, either the existing value or
+    /// the newly inserted value
+    ///
+    /// # Safety:
+    ///
+    /// Note that `make_payload_fn` and `observe_payload_fn` are only invoked
+    /// with valid values from `values`, not for the `NULL` value.
+    pub fn insert_if_new<MP, OP>(
+        &mut self,
+        values: &ArrayRef,
+        make_payload_fn: MP,
+        observe_payload_fn: OP,
+    ) where
+        MP: FnMut(Option<&[u8]>) -> V,
+        OP: FnMut(V),
+    {
+        // Sanity check array type
+        match self.output_type {
+            OutputType::BinaryView => {
+                assert!(matches!(values.data_type(), DataType::BinaryView));
+                self.insert_if_new_inner::<MP, OP, BinaryViewType>(
+                    values,
+                    make_payload_fn,
+                    observe_payload_fn,
+                )
+            }
+            OutputType::Utf8View => {
+                assert!(matches!(values.data_type(), DataType::Utf8View));
+                self.insert_if_new_inner::<MP, OP, StringViewType>(
+                    values,
+                    make_payload_fn,
+                    observe_payload_fn,
+                )
+            }
+            _ => unreachable!("Utf8/Binary should use `ArrowBytesSet`"),
+        };
+    }
+
+    /// Generic version of [`Self::insert_if_new`] that handles `ByteViewType`
+    /// (both StringView and BinaryView)
+    ///
+    /// Note this is the only function that is generic on [`ByteViewType`], which
+    /// avoids having to template the entire structure,  making the code
+    /// simpler and understand and reducing code bloat due to duplication.
+    ///
+    /// See comments on `insert_if_new` for more details
+    fn insert_if_new_inner<MP, OP, B>(
+        &mut self,
+        values: &ArrayRef,
+        mut make_payload_fn: MP,
+        mut observe_payload_fn: OP,
+    ) where
+        MP: FnMut(Option<&[u8]>) -> V,
+        OP: FnMut(V),
+        B: ByteViewType,
+    {
+        // step 1: compute hashes
+        let batch_hashes = &mut self.hashes_buffer;
+        batch_hashes.clear();
+        batch_hashes.resize(values.len(), 0);
+        create_hashes(&[values.clone()], &self.random_state, batch_hashes)
+            // hash is supported for all types and create_hashes only
+            // returns errors for unsupported types
+            .unwrap();
+
+        // step 2: insert each value into the set, if not already present
+        let values = values.as_byte_view::<B>();
+
+        // Ensure lengths are equivalent
+        assert_eq!(values.len(), batch_hashes.len());
+
+        for (value, &hash) in values.iter().zip(batch_hashes.iter()) {
+            // handle null value
+            let Some(value) = value else {
+                let payload = if let Some(&(payload, _offset)) = self.null.as_ref() {
+                    payload
+                } else {
+                    let payload = make_payload_fn(None);
+                    let null_index = self.builder.len();
+                    self.builder.append_null();
+                    self.null = Some((payload, null_index));
+                    payload
+                };
+                observe_payload_fn(payload);
+                continue;
+            };
+
+            // get the value as bytes
+            let value: &[u8] = value.as_ref();
+
+            let entry = self.map.get_mut(hash, |header| {
+                let v = self.builder.get_value(header.view_idx);
+
+                if v.len() != value.len() {
+                    return false;
+                }
+
+                v == value
+            });
+
+            let payload = if let Some(entry) = entry {
+                entry.payload
+            } else {
+                // no existing value, make a new one.
+                let payload = make_payload_fn(Some(value));
+
+                let inner_view_idx = self.builder.len();
+                let new_header = Entry {
+                    view_idx: inner_view_idx,
+                    hash,
+                    payload,
+                };
+
+                self.builder.append_value(value);
+
+                self.map
+                    .insert_accounted(new_header, |h| h.hash, &mut self.map_size);
+                payload
+            };
+            observe_payload_fn(payload);
+        }
+    }
+
+    /// Converts this set into a `StringViewArray`, or `BinaryViewArray`,
+    /// containing each distinct value
+    /// that was inserted. This is done without copying the values.
+    ///
+    /// The values are guaranteed to be returned in the same order in which
+    /// they were first seen.
+    pub fn into_state(self) -> ArrayRef {
+        let mut builder = self.builder;
+        match self.output_type {
+            OutputType::BinaryView => {
+                let array = builder.finish();
+
+                Arc::new(array)
+            }
+            OutputType::Utf8View => {
+                // SAFETY:
+                // we asserted the input arrays were all the correct type and
+                // thus since all the values that went in were valid (e.g. utf8)
+                // so are all the values that come out
+                let array = builder.finish();
+                let array = unsafe { array.to_string_view_unchecked() };
+                Arc::new(array)
+            }
+            _ => {
+                unreachable!("Utf8/Binary should use `ArrowBytesMap`")
+            }
+        }
+    }
+
+    /// Total number of entries (including null, if present)
+    pub fn len(&self) -> usize {
+        self.non_null_len() + self.null.map(|_| 1).unwrap_or(0)
+    }
+
+    /// Is the set empty?
+    pub fn is_empty(&self) -> bool {
+        self.map.is_empty() && self.null.is_none()
+    }
+
+    /// Number of non null entries
+    pub fn non_null_len(&self) -> usize {
+        self.map.len()
+    }
+
+    /// Return the total size, in bytes, of memory used to store the data in
+    /// this set, not including `self`
+    pub fn size(&self) -> usize {
+        self.map_size
+            + self.builder.allocated_size()
+            + self.hashes_buffer.allocated_size()
+    }
+}
+
+impl<V> Debug for ArrowBytesViewMap<V>
+where
+    V: Debug + PartialEq + Eq + Clone + Copy + Default,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ArrowBytesMap")
+            .field("map", &"<map>")
+            .field("map_size", &self.map_size)
+            .field("view_builder", &self.builder)
+            .field("random_state", &self.random_state)
+            .field("hashes_buffer", &self.hashes_buffer)
+            .finish()
+    }
+}
+
+/// Entry in the hash table -- see [`ArrowBytesViewMap`] for more details
+#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
+struct Entry<V>
+where
+    V: Debug + PartialEq + Eq + Clone + Copy + Default,
+{
+    /// The idx into the views array
+    view_idx: usize,
+
+    hash: u64,
+
+    /// value stored by the entry
+    payload: V,
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::array::{BinaryViewArray, GenericByteViewArray, StringViewArray};
+    use hashbrown::HashMap;
+
+    use super::*;
+
+    // asserts that the set contains the expected strings, in the same order
+    fn assert_set(set: ArrowBytesViewSet, expected: &[Option<&str>]) {
+        let strings = set.into_state();
+        let strings = strings.as_string_view();
+        let state = strings.into_iter().collect::<Vec<_>>();
+        assert_eq!(state, expected);
+    }
+
+    #[test]
+    fn string_view_set_empty() {
+        let mut set = ArrowBytesViewSet::new(OutputType::Utf8View);
+        let array: ArrayRef = Arc::new(StringViewArray::new_null(0));
+        set.insert(&array);
+        assert_eq!(set.len(), 0);
+        assert_eq!(set.non_null_len(), 0);
+        assert_set(set, &[]);
+    }
+
+    #[test]
+    fn string_view_set_one_null() {
+        let mut set = ArrowBytesViewSet::new(OutputType::Utf8View);
+        let array: ArrayRef = Arc::new(StringViewArray::new_null(1));
+        set.insert(&array);
+        assert_eq!(set.len(), 1);
+        assert_eq!(set.non_null_len(), 0);
+        assert_set(set, &[None]);
+    }
+
+    #[test]
+    fn string_view_set_many_null() {
+        let mut set = ArrowBytesViewSet::new(OutputType::Utf8View);
+        let array: ArrayRef = Arc::new(StringViewArray::new_null(11));
+        set.insert(&array);
+        assert_eq!(set.len(), 1);
+        assert_eq!(set.non_null_len(), 0);
+        assert_set(set, &[None]);
+    }
+
+    #[test]
+    fn test_string_view_set_basic() {
+        // basic test for mixed small and large string values
+        let values = GenericByteViewArray::from(vec![
+            Some("a"),
+            Some("b"),
+            Some("CXCCCCCCCCAABB"), // 14 bytes
+            Some(""),
+            Some("cbcxx"), // 5 bytes
+            None,
+            Some("AAAAAAAA"),     // 8 bytes
+            Some("BBBBBQBBBAAA"), // 12 bytes
+            Some("a"),
+            Some("cbcxx"),
+            Some("b"),
+            Some("cbcxx"),
+            Some(""),
+            None,
+            Some("BBBBBQBBBAAA"),
+            Some("BBBBBQBBBAAA"),
+            Some("AAAAAAAA"),
+            Some("CXCCCCCCCCAABB"),
+        ]);
+
+        let mut set = ArrowBytesViewSet::new(OutputType::Utf8View);
+        let array: ArrayRef = Arc::new(values);
+        set.insert(&array);
+        // values mut appear be in the order they were inserted
+        assert_set(
+            set,
+            &[
+                Some("a"),
+                Some("b"),
+                Some("CXCCCCCCCCAABB"),
+                Some(""),
+                Some("cbcxx"),
+                None,
+                Some("AAAAAAAA"),
+                Some("BBBBBQBBBAAA"),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_string_set_non_utf8() {
+        // basic test for mixed small and large string values
+        let values = GenericByteViewArray::from(vec![
+            Some("a"),
+            Some("✨🔥✨🔥✨🔥✨🔥✨🔥✨🔥✨🔥✨🔥"),
+            Some("🔥"),
+            Some("✨✨✨"),
+            Some("foobarbaz"),
+            Some("🔥"),
+            Some("✨🔥✨🔥✨🔥✨🔥✨🔥✨🔥✨🔥✨🔥"),
+        ]);
+
+        let mut set = ArrowBytesViewSet::new(OutputType::Utf8View);
+        let array: ArrayRef = Arc::new(values);
+        set.insert(&array);
+        // strings mut appear be in the order they were inserted
+        assert_set(
+            set,
+            &[
+                Some("a"),
+                Some("✨🔥✨🔥✨🔥✨🔥✨🔥✨🔥✨🔥✨🔥"),
+                Some("🔥"),
+                Some("✨✨✨"),
+                Some("foobarbaz"),
+            ],
+        );
+    }
+
+    // Test use of binary output type
+    #[test]
+    fn test_binary_set() {
+        let v: Vec<Option<&[u8]>> = vec![
+            Some(b"a"),
+            Some(b"CXCCCCCCCCCCCCC"),
+            None,
+            Some(b"CXCCCCCCCCCCCCC"),
+        ];
+        let values: ArrayRef = Arc::new(BinaryViewArray::from(v));
+
+        let expected: Vec<Option<&[u8]>> =
+            vec![Some(b"a"), Some(b"CXCCCCCCCCCCCCC"), None];
+        let expected: ArrayRef = Arc::new(GenericByteViewArray::from(expected));
+
+        let mut set = ArrowBytesViewSet::new(OutputType::BinaryView);
+        set.insert(&values);
+        assert_eq!(&set.into_state(), &expected);
+    }
+
+    // inserting strings into the set does not increase reported memory
+    #[test]
+    fn test_string_set_memory_usage() {
+        let strings1 = StringViewArray::from(vec![
+            Some("a"),
+            Some("b"),
+            Some("CXCCCCCCCCCCC"), // 13 bytes
+            Some("AAAAAAAA"),      // 8 bytes
+            Some("BBBBBQBBB"),     // 9 bytes
+        ]);
+        let total_strings1_len = strings1
+            .iter()
+            .map(|s| s.map(|s| s.len()).unwrap_or(0))
+            .sum::<usize>();
+        let values1: ArrayRef = Arc::new(StringViewArray::from(strings1));
+
+        // Much larger strings in strings2
+        let strings2 = StringViewArray::from(vec![
+            "FOO".repeat(1000),
+            "BAR larger than 12 bytes.".repeat(100_000),
+            "more unique.".repeat(1000),
+            "more unique2.".repeat(1000),
+            "FOO".repeat(3000),
+        ]);
+        let total_strings2_len = strings2
+            .iter()
+            .map(|s| s.map(|s| s.len()).unwrap_or(0))
+            .sum::<usize>();
+        let values2: ArrayRef = Arc::new(StringViewArray::from(strings2));
+
+        let mut set = ArrowBytesViewSet::new(OutputType::Utf8View);
+        let size_empty = set.size();
+
+        set.insert(&values1);
+        let size_after_values1 = set.size();
+        assert!(size_empty < size_after_values1);
+        assert!(
+            size_after_values1 > total_strings1_len,
+            "expect {size_after_values1} to be more than {total_strings1_len}"
+        );
+        assert!(size_after_values1 < total_strings1_len + total_strings2_len);
+
+        // inserting the same strings should not affect the size
+        set.insert(&values1);
+        assert_eq!(set.size(), size_after_values1);
+        assert_eq!(set.len(), 5);
+
+        // inserting the large strings should increase the reported size
+        set.insert(&values2);
+        let size_after_values2 = set.size();
+        assert!(size_after_values2 > size_after_values1);
+
+        assert_eq!(set.len(), 10);
+    }
+
+    #[derive(Debug, PartialEq, Eq, Default, Clone, Copy)]
+    struct TestPayload {
+        // store the string value to check against input
+        index: usize, // store the index of the string (each new string gets the next sequential input)
+    }
+
+    /// Wraps an [`ArrowBytesViewMap`], validating its invariants
+    struct TestMap {
+        map: ArrowBytesViewMap<TestPayload>,
+        // stores distinct strings seen, in order
+        strings: Vec<Option<String>>,
+        // map strings to index in strings
+        indexes: HashMap<Option<String>, usize>,
+    }
+
+    impl TestMap {
+        /// creates a map with TestPayloads for the given strings and then
+        /// validates the payloads
+        fn new() -> Self {
+            Self {
+                map: ArrowBytesViewMap::new(OutputType::Utf8View),
+                strings: vec![],
+                indexes: HashMap::new(),
+            }
+        }
+
+        /// Inserts strings into the map
+        fn insert(&mut self, strings: &[Option<&str>]) {
+            let string_array = StringViewArray::from(strings.to_vec());
+            let arr: ArrayRef = Arc::new(string_array);
+
+            let mut next_index = self.indexes.len();
+            let mut actual_new_strings = vec![];
+            let mut actual_seen_indexes = vec![];
+            // update self with new values, keeping track of newly added values
+            for str in strings {
+                let str = str.map(|s| s.to_string());
+                let index = self.indexes.get(&str).cloned().unwrap_or_else(|| {
+                    actual_new_strings.push(str.clone());
+                    let index = self.strings.len();
+                    self.strings.push(str.clone());
+                    self.indexes.insert(str, index);
+                    index
+                });
+                actual_seen_indexes.push(index);
+            }
+
+            // insert the values into the map, recording what we did
+            let mut seen_new_strings = vec![];
+            let mut seen_indexes = vec![];
+            self.map.insert_if_new(
+                &arr,
+                |s| {
+                    let value = s
+                        .map(|s| String::from_utf8(s.to_vec()).expect("Non utf8 string"));
+                    let index = next_index;
+                    next_index += 1;
+                    seen_new_strings.push(value);
+                    TestPayload { index }
+                },
+                |payload| {
+                    seen_indexes.push(payload.index);
+                },
+            );
+
+            assert_eq!(actual_seen_indexes, seen_indexes);
+            assert_eq!(actual_new_strings, seen_new_strings);
+        }
+
+        /// Call `self.map.into_array()` validating that the strings are in the same
+        /// order as they were inserted
+        fn into_array(self) -> ArrayRef {
+            let Self {
+                map,
+                strings,
+                indexes: _,
+            } = self;
+
+            let arr = map.into_state();
+            let expected: ArrayRef = Arc::new(StringViewArray::from(strings));
+            assert_eq!(&arr, &expected);
+            arr
+        }
+    }
+
+    #[test]
+    fn test_map() {
+        let input = vec![
+            // Note mix of short/long strings
+            Some("A"),
+            Some("bcdefghijklmnop1234567"),
+            Some("X"),
+            Some("Y"),
+            None,
+            Some("qrstuvqxyzhjwya"),
+            Some("✨🔥"),
+            Some("🔥"),
+            Some("🔥🔥🔥🔥🔥🔥"),
+        ];
+
+        let mut test_map = TestMap::new();
+        test_map.insert(&input);
+        test_map.insert(&input); // put it in twice
+        let expected_output: ArrayRef = Arc::new(StringViewArray::from(input));
+        assert_eq!(&test_map.into_array(), &expected_output);
+    }
+}
diff --git a/datafusion/physical-expr-common/src/lib.rs b/datafusion/physical-expr-common/src/lib.rs
index 8d50e0b964e5b..f03eedd4cf65c 100644
--- a/datafusion/physical-expr-common/src/lib.rs
+++ b/datafusion/physical-expr-common/src/lib.rs
@@ -17,6 +17,7 @@
 
 pub mod aggregate;
 pub mod binary_map;
+pub mod binary_view_map;
 pub mod datum;
 pub mod expressions;
 pub mod physical_expr;
diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index 9987e97b38d38..f9362db30196f 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -45,6 +45,7 @@ use arrow_array::types::{
     Decimal128Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
     UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
+use arrow_array::{BinaryViewArray, StringViewArray};
 use datafusion_common::internal_err;
 use datafusion_common::ScalarValue;
 use datafusion_common::{downcast_value, DataFusionError, Result};
@@ -453,6 +454,14 @@ fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
         DataType::LargeUtf8 => {
             typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, min_string)
         }
+        DataType::Utf8View => {
+            typed_min_max_batch_string!(
+                values,
+                StringViewArray,
+                Utf8View,
+                min_string_view
+            )
+        }
         DataType::Boolean => {
             typed_min_max_batch!(values, BooleanArray, Boolean, min_boolean)
         }
@@ -467,6 +476,14 @@ fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
                 min_binary
             )
         }
+        DataType::BinaryView => {
+            typed_min_max_batch_binary!(
+                &values,
+                BinaryViewArray,
+                BinaryView,
+                min_binary_view
+            )
+        }
         _ => min_max_batch!(values, min),
     })
 }
@@ -480,12 +497,28 @@ fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
         DataType::LargeUtf8 => {
             typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, max_string)
         }
+        DataType::Utf8View => {
+            typed_min_max_batch_string!(
+                values,
+                StringViewArray,
+                Utf8View,
+                max_string_view
+            )
+        }
         DataType::Boolean => {
             typed_min_max_batch!(values, BooleanArray, Boolean, max_boolean)
         }
         DataType::Binary => {
             typed_min_max_batch_binary!(&values, BinaryArray, Binary, max_binary)
         }
+        DataType::BinaryView => {
+            typed_min_max_batch_binary!(
+                &values,
+                BinaryViewArray,
+                BinaryView,
+                max_binary_view
+            )
+        }
         DataType::LargeBinary => {
             typed_min_max_batch_binary!(
                 &values,
@@ -629,12 +662,18 @@ macro_rules! min_max {
             (ScalarValue::LargeUtf8(lhs), ScalarValue::LargeUtf8(rhs)) => {
                 typed_min_max_string!(lhs, rhs, LargeUtf8, $OP)
             }
+            (ScalarValue::Utf8View(lhs), ScalarValue::Utf8View(rhs)) => {
+                typed_min_max_string!(lhs, rhs, Utf8View, $OP)
+            }
             (ScalarValue::Binary(lhs), ScalarValue::Binary(rhs)) => {
                 typed_min_max_string!(lhs, rhs, Binary, $OP)
             }
             (ScalarValue::LargeBinary(lhs), ScalarValue::LargeBinary(rhs)) => {
                 typed_min_max_string!(lhs, rhs, LargeBinary, $OP)
             }
+            (ScalarValue::BinaryView(lhs), ScalarValue::BinaryView(rhs)) => {
+                typed_min_max_string!(lhs, rhs, BinaryView, $OP)
+            }
             (ScalarValue::TimestampSecond(lhs, l_tz), ScalarValue::TimestampSecond(rhs, _)) => {
                 typed_min_max!(lhs, rhs, TimestampSecond, $OP, l_tz)
             }
diff --git a/datafusion/physical-plan/src/aggregates/group_values/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/bytes_view.rs
new file mode 100644
index 0000000000000..1a0cb90a16d47
--- /dev/null
+++ b/datafusion/physical-plan/src/aggregates/group_values/bytes_view.rs
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::aggregates::group_values::GroupValues;
+use arrow_array::{Array, ArrayRef, RecordBatch};
+use datafusion_expr::EmitTo;
+use datafusion_physical_expr::binary_map::OutputType;
+use datafusion_physical_expr_common::binary_view_map::ArrowBytesViewMap;
+
+/// A [`GroupValues`] storing single column of Utf8View/BinaryView values
+///
+/// This specialization is significantly faster than using the more general
+/// purpose `Row`s format
+pub struct GroupValuesBytesView {
+    /// Map string/binary values to group index
+    map: ArrowBytesViewMap<usize>,
+    /// The total number of groups so far (used to assign group_index)
+    num_groups: usize,
+}
+
+impl GroupValuesBytesView {
+    pub fn new(output_type: OutputType) -> Self {
+        Self {
+            map: ArrowBytesViewMap::new(output_type),
+            num_groups: 0,
+        }
+    }
+}
+
+impl GroupValues for GroupValuesBytesView {
+    fn intern(
+        &mut self,
+        cols: &[ArrayRef],
+        groups: &mut Vec<usize>,
+    ) -> datafusion_common::Result<()> {
+        assert_eq!(cols.len(), 1);
+
+        // look up / add entries in the table
+        let arr = &cols[0];
+
+        groups.clear();
+        self.map.insert_if_new(
+            arr,
+            // called for each new group
+            |_value| {
+                // assign new group index on each insert
+                let group_idx = self.num_groups;
+                self.num_groups += 1;
+                group_idx
+            },
+            // called for each group
+            |group_idx| {
+                groups.push(group_idx);
+            },
+        );
+
+        // ensure we assigned a group to for each row
+        assert_eq!(groups.len(), arr.len());
+        Ok(())
+    }
+
+    fn size(&self) -> usize {
+        self.map.size() + std::mem::size_of::<Self>()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.num_groups == 0
+    }
+
+    fn len(&self) -> usize {
+        self.num_groups
+    }
+
+    fn emit(&mut self, emit_to: EmitTo) -> datafusion_common::Result<Vec<ArrayRef>> {
+        // Reset the map to default, and convert it into a single array
+        let map_contents = self.map.take().into_state();
+
+        let group_values = match emit_to {
+            EmitTo::All => {
+                self.num_groups -= map_contents.len();
+                map_contents
+            }
+            EmitTo::First(n) if n == self.len() => {
+                self.num_groups -= map_contents.len();
+                map_contents
+            }
+            EmitTo::First(n) => {
+                // if we only wanted to take the first n, insert the rest back
+                // into the map we could potentially avoid this reallocation, at
+                // the expense of much more complex code.
+                // see https://github.com/apache/datafusion/issues/9195
+                let emit_group_values = map_contents.slice(0, n);
+                let remaining_group_values =
+                    map_contents.slice(n, map_contents.len() - n);
+
+                self.num_groups = 0;
+                let mut group_indexes = vec![];
+                self.intern(&[remaining_group_values], &mut group_indexes)?;
+
+                // Verify that the group indexes were assigned in the correct order
+                assert_eq!(0, group_indexes[0]);
+
+                emit_group_values
+            }
+        };
+
+        Ok(vec![group_values])
+    }
+
+    fn clear_shrink(&mut self, _batch: &RecordBatch) {
+        // in theory we could potentially avoid this reallocation and clear the
+        // contents of the maps, but for now we just reset the map from the beginning
+        self.map.take();
+    }
+}
diff --git a/datafusion/physical-plan/src/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs
index b5bc923b467d2..be7ac934d7bcc 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/mod.rs
@@ -18,6 +18,7 @@
 use arrow::record_batch::RecordBatch;
 use arrow_array::{downcast_primitive, ArrayRef};
 use arrow_schema::{DataType, SchemaRef};
+use bytes_view::GroupValuesBytesView;
 use datafusion_common::Result;
 
 pub(crate) mod primitive;
@@ -28,6 +29,7 @@ mod row;
 use row::GroupValuesRows;
 
 mod bytes;
+mod bytes_view;
 use bytes::GroupValuesByes;
 use datafusion_physical_expr::binary_map::OutputType;
 
@@ -67,17 +69,26 @@ pub fn new_group_values(schema: SchemaRef) -> Result<Box<dyn GroupValues>> {
             _ => {}
         }
 
-        if let DataType::Utf8 = d {
-            return Ok(Box::new(GroupValuesByes::<i32>::new(OutputType::Utf8)));
-        }
-        if let DataType::LargeUtf8 = d {
-            return Ok(Box::new(GroupValuesByes::<i64>::new(OutputType::Utf8)));
-        }
-        if let DataType::Binary = d {
-            return Ok(Box::new(GroupValuesByes::<i32>::new(OutputType::Binary)));
-        }
-        if let DataType::LargeBinary = d {
-            return Ok(Box::new(GroupValuesByes::<i64>::new(OutputType::Binary)));
+        match d {
+            DataType::Utf8 => {
+                return Ok(Box::new(GroupValuesByes::<i32>::new(OutputType::Utf8)));
+            }
+            DataType::LargeUtf8 => {
+                return Ok(Box::new(GroupValuesByes::<i64>::new(OutputType::Utf8)));
+            }
+            DataType::Utf8View => {
+                return Ok(Box::new(GroupValuesBytesView::new(OutputType::Utf8View)));
+            }
+            DataType::Binary => {
+                return Ok(Box::new(GroupValuesByes::<i32>::new(OutputType::Binary)));
+            }
+            DataType::LargeBinary => {
+                return Ok(Box::new(GroupValuesByes::<i64>::new(OutputType::Binary)));
+            }
+            DataType::BinaryView => {
+                return Ok(Box::new(GroupValuesBytesView::new(OutputType::BinaryView)));
+            }
+            _ => {}
         }
     }
 
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 061c849971b2c..038727daa7d87 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -23,19 +23,22 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
-use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
-use crate::{
-    DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
-};
-
+use arrow::array::{AsArray, StringViewBuilder};
 use arrow::compute::concat_batches;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
+use arrow_array::{Array, ArrayRef};
+use futures::stream::{Stream, StreamExt};
+
 use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 
-use futures::stream::{Stream, StreamExt};
+use crate::{
+    DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
+};
+
+use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
+use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
 
 /// `CoalesceBatchesExec` combines small batches into larger batches for more
 /// efficient use of vectorized processing by later operators. The operator
@@ -249,6 +252,8 @@ impl CoalesceBatchesStream {
             match input_batch {
                 Poll::Ready(x) => match x {
                     Some(Ok(batch)) => {
+                        let batch = gc_string_view_batch(&batch);
+
                         // Handle fetch limit:
                         if let Some(fetch) = self.fetch {
                             if self.total_rows + batch.num_rows() >= fetch {
@@ -324,13 +329,84 @@ impl RecordBatchStream for CoalesceBatchesStream {
     }
 }
 
+/// Heuristically compact `StringViewArray`s to reduce memory usage, if needed
+///
+/// This function decides when to consolidate the StringView into a new buffer
+/// to reduce memory usage and improve string locality for better performance.
+///
+/// This differs from `StringViewArray::gc` because:
+/// 1. It may not compact the array depending on a heuristic.
+/// 2. It uses a precise block size to reduce the number of buffers to track.
+///
+/// # Heuristic
+///
+/// If the average size of each view is larger than 32 bytes, we compact the array.
+///
+/// `StringViewArray` include pointers to buffer that hold the underlying data.
+/// One of the great benefits of `StringViewArray` is that many operations
+/// (e.g., `filter`) can be done without copying the underlying data.
+///
+/// However, after a while (e.g., after `FilterExec` or `HashJoinExec`) the
+/// `StringViewArray` may only refer to a small portion of the buffer,
+/// significantly increasing memory usage.
+fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
+    let new_columns: Vec<ArrayRef> = batch
+        .columns()
+        .iter()
+        .map(|c| {
+            // Try to re-create the `StringViewArray` to prevent holding the underlying buffer too long.
+            let Some(s) = c.as_string_view_opt() else {
+                return Arc::clone(c);
+            };
+            let ideal_buffer_size: usize = s
+                .views()
+                .iter()
+                .map(|v| {
+                    let len = (*v as u32) as usize;
+                    if len > 12 {
+                        len
+                    } else {
+                        0
+                    }
+                })
+                .sum();
+            let actual_buffer_size = s.get_buffer_memory_size();
+
+            // Re-creating the array copies data and can be time consuming.
+            // We only do it if the array is sparse
+            if actual_buffer_size > (ideal_buffer_size * 2) {
+                // We set the block size to `ideal_buffer_size` so that the new StringViewArray only has one buffer, which accelerate later concat_batches.
+                // See https://github.com/apache/arrow-rs/issues/6094 for more details.
+                let mut builder = StringViewBuilder::with_capacity(s.len())
+                    .with_block_size(ideal_buffer_size as u32);
+
+                for v in s.iter() {
+                    builder.append_option(v);
+                }
+
+                let gc_string = builder.finish();
+
+                debug_assert!(gc_string.data_buffers().len() <= 1); // buffer count can be 0 if the `ideal_buffer_size` is 0
+
+                Arc::new(gc_string)
+            } else {
+                Arc::clone(c)
+            }
+        })
+        .collect();
+    RecordBatch::try_new(batch.schema(), new_columns)
+        .expect("Failed to re-create the gc'ed record batch")
+}
+
 #[cfg(test)]
 mod tests {
-    use super::*;
+    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow_array::builder::ArrayBuilder;
+    use arrow_array::{StringViewArray, UInt32Array};
+
     use crate::{memory::MemoryExec, repartition::RepartitionExec, Partitioning};
 
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow_array::UInt32Array;
+    use super::*;
 
     #[tokio::test(flavor = "multi_thread")]
     async fn test_concat_batches() -> Result<()> {
@@ -485,4 +561,99 @@ mod tests {
         )
         .unwrap()
     }
+
+    #[test]
+    fn test_gc_string_view_batch_small_no_compact() {
+        // view with only short strings (no buffers) --> no need to compact
+        let array = StringViewTest {
+            rows: 1000,
+            strings: vec![Some("a"), Some("b"), Some("c")],
+        }
+        .build();
+
+        let gc_array = do_gc(array.clone());
+        compare_string_array_values(&array, &gc_array);
+        assert_eq!(array.data_buffers().len(), 0);
+        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
+    }
+
+    #[test]
+    fn test_gc_string_view_batch_large_no_compact() {
+        // view with large strings (has buffers) but full --> no need to compact
+        let array = StringViewTest {
+            rows: 1000,
+            strings: vec![Some("This string is longer than 12 bytes")],
+        }
+        .build();
+
+        let gc_array = do_gc(array.clone());
+        compare_string_array_values(&array, &gc_array);
+        assert_eq!(array.data_buffers().len(), 5);
+        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
+    }
+
+    #[test]
+    fn test_gc_string_view_batch_large_slice_compact() {
+        // view with large strings (has buffers) and only partially used  --> no need to compact
+        let array = StringViewTest {
+            rows: 1000,
+            strings: vec![Some("this string is longer than 12 bytes")],
+        }
+        .build();
+
+        // slice only 11 rows, so most of the buffer is not used
+        let array = array.slice(11, 22);
+
+        let gc_array = do_gc(array.clone());
+        compare_string_array_values(&array, &gc_array);
+        assert_eq!(array.data_buffers().len(), 5);
+        assert_eq!(gc_array.data_buffers().len(), 1); // compacted into a single buffer
+    }
+
+    /// Compares the values of two string view arrays
+    fn compare_string_array_values(arr1: &StringViewArray, arr2: &StringViewArray) {
+        assert_eq!(arr1.len(), arr2.len());
+        for (s1, s2) in arr1.iter().zip(arr2.iter()) {
+            assert_eq!(s1, s2);
+        }
+    }
+
+    /// runs garbage collection on string view array
+    /// and ensures the number of rows are the same
+    fn do_gc(array: StringViewArray) -> StringViewArray {
+        let batch =
+            RecordBatch::try_from_iter(vec![("a", Arc::new(array) as ArrayRef)]).unwrap();
+        let gc_batch = gc_string_view_batch(&batch);
+        assert_eq!(batch.num_rows(), gc_batch.num_rows());
+        assert_eq!(batch.schema(), gc_batch.schema());
+        gc_batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<StringViewArray>()
+            .unwrap()
+            .clone()
+    }
+
+    /// Describes parameters for creating a `StringViewArray`
+    struct StringViewTest {
+        /// The number of rows in the array
+        rows: usize,
+        /// The strings to use in the array (repeated over and over
+        strings: Vec<Option<&'static str>>,
+    }
+
+    impl StringViewTest {
+        /// Create a `StringViewArray` with the parameters specified in this struct
+        fn build(self) -> StringViewArray {
+            let mut builder = StringViewBuilder::with_capacity(100);
+            loop {
+                for &v in self.strings.iter() {
+                    builder.append_option(v);
+                    if builder.len() >= self.rows {
+                        return builder.finish();
+                    }
+                }
+            }
+        }
+    }
 }
diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto
index 752f2cf768733..c59aaa2d42bbe 100644
--- a/datafusion/proto-common/proto/datafusion_common.proto
+++ b/datafusion/proto-common/proto/datafusion_common.proto
@@ -491,6 +491,7 @@ message ParquetOptions {
   uint64 maximum_buffered_record_batches_per_stream = 25; // default = 2
   bool bloom_filter_on_read = 26; // default = true
   bool bloom_filter_on_write = 27; // default = false
+  bool schema_force_string_view = 28; // default = false
 
   oneof metadata_size_hint_opt {
     uint64 metadata_size_hint = 4;
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index 21db66a127018..45703d8b9fed4 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -956,7 +956,7 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
             allow_single_file_parallelism: value.allow_single_file_parallelism,
             maximum_parallel_row_group_writers: value.maximum_parallel_row_group_writers as usize,
             maximum_buffered_record_batches_per_stream: value.maximum_buffered_record_batches_per_stream as usize,
-
+            schema_force_string_view: value.schema_force_string_view,
         })
     }
 }
diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs
index 4ac6517ed739a..23dd5746929d8 100644
--- a/datafusion/proto-common/src/generated/pbjson.rs
+++ b/datafusion/proto-common/src/generated/pbjson.rs
@@ -4871,6 +4871,9 @@ impl serde::Serialize for ParquetOptions {
         if self.bloom_filter_on_write {
             len += 1;
         }
+        if self.schema_force_string_view {
+            len += 1;
+        }
         if self.dictionary_page_size_limit != 0 {
             len += 1;
         }
@@ -4954,6 +4957,9 @@ impl serde::Serialize for ParquetOptions {
         if self.bloom_filter_on_write {
             struct_ser.serialize_field("bloomFilterOnWrite", &self.bloom_filter_on_write)?;
         }
+        if self.schema_force_string_view {
+            struct_ser.serialize_field("schemaForceStringView", &self.schema_force_string_view)?;
+        }
         if self.dictionary_page_size_limit != 0 {
             #[allow(clippy::needless_borrow)]
             struct_ser.serialize_field("dictionaryPageSizeLimit", ToString::to_string(&self.dictionary_page_size_limit).as_str())?;
@@ -5071,6 +5077,8 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
             "bloomFilterOnRead",
             "bloom_filter_on_write",
             "bloomFilterOnWrite",
+            "schema_force_string_view",
+            "schemaForceStringView",
             "dictionary_page_size_limit",
             "dictionaryPageSizeLimit",
             "data_page_row_count_limit",
@@ -5112,6 +5120,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
             MaximumBufferedRecordBatchesPerStream,
             BloomFilterOnRead,
             BloomFilterOnWrite,
+            SchemaForceStringView,
             DictionaryPageSizeLimit,
             DataPageRowCountLimit,
             MaxRowGroupSize,
@@ -5159,6 +5168,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                             "maximumBufferedRecordBatchesPerStream" | "maximum_buffered_record_batches_per_stream" => Ok(GeneratedField::MaximumBufferedRecordBatchesPerStream),
                             "bloomFilterOnRead" | "bloom_filter_on_read" => Ok(GeneratedField::BloomFilterOnRead),
                             "bloomFilterOnWrite" | "bloom_filter_on_write" => Ok(GeneratedField::BloomFilterOnWrite),
+                            "schemaForceStringView" | "schema_force_string_view" => Ok(GeneratedField::SchemaForceStringView),
                             "dictionaryPageSizeLimit" | "dictionary_page_size_limit" => Ok(GeneratedField::DictionaryPageSizeLimit),
                             "dataPageRowCountLimit" | "data_page_row_count_limit" => Ok(GeneratedField::DataPageRowCountLimit),
                             "maxRowGroupSize" | "max_row_group_size" => Ok(GeneratedField::MaxRowGroupSize),
@@ -5204,6 +5214,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                 let mut maximum_buffered_record_batches_per_stream__ = None;
                 let mut bloom_filter_on_read__ = None;
                 let mut bloom_filter_on_write__ = None;
+                let mut schema_force_string_view__ = None;
                 let mut dictionary_page_size_limit__ = None;
                 let mut data_page_row_count_limit__ = None;
                 let mut max_row_group_size__ = None;
@@ -5305,6 +5316,12 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                             }
                             bloom_filter_on_write__ = Some(map_.next_value()?);
                         }
+                        GeneratedField::SchemaForceStringView => {
+                            if schema_force_string_view__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("schemaForceStringView"));
+                            }
+                            schema_force_string_view__ = Some(map_.next_value()?);
+                        }
                         GeneratedField::DictionaryPageSizeLimit => {
                             if dictionary_page_size_limit__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("dictionaryPageSizeLimit"));
@@ -5405,6 +5422,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                     maximum_buffered_record_batches_per_stream: maximum_buffered_record_batches_per_stream__.unwrap_or_default(),
                     bloom_filter_on_read: bloom_filter_on_read__.unwrap_or_default(),
                     bloom_filter_on_write: bloom_filter_on_write__.unwrap_or_default(),
+                    schema_force_string_view: schema_force_string_view__.unwrap_or_default(),
                     dictionary_page_size_limit: dictionary_page_size_limit__.unwrap_or_default(),
                     data_page_row_count_limit: data_page_row_count_limit__.unwrap_or_default(),
                     max_row_group_size: max_row_group_size__.unwrap_or_default(),
diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs
index bf198a24c811b..9bea9be89e1d3 100644
--- a/datafusion/proto-common/src/generated/prost.rs
+++ b/datafusion/proto-common/src/generated/prost.rs
@@ -805,6 +805,9 @@ pub struct ParquetOptions {
     /// default = false
     #[prost(bool, tag = "27")]
     pub bloom_filter_on_write: bool,
+    /// default = false
+    #[prost(bool, tag = "28")]
+    pub schema_force_string_view: bool,
     #[prost(uint64, tag = "12")]
     pub dictionary_page_size_limit: u64,
     #[prost(uint64, tag = "18")]
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index 24083e8b72768..a61a026089fc6 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -827,6 +827,7 @@ impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions {
             allow_single_file_parallelism: value.allow_single_file_parallelism,
             maximum_parallel_row_group_writers: value.maximum_parallel_row_group_writers as u64,
             maximum_buffered_record_batches_per_stream: value.maximum_buffered_record_batches_per_stream as u64,
+            schema_force_string_view: value.schema_force_string_view,
         })
     }
 }
diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs
index b36624e391c2b..f48b05e8d3dc3 100644
--- a/datafusion/proto/src/generated/datafusion_proto_common.rs
+++ b/datafusion/proto/src/generated/datafusion_proto_common.rs
@@ -809,6 +809,9 @@ pub struct ParquetOptions {
     /// default = false
     #[prost(bool, tag = "27")]
     pub bloom_filter_on_write: bool,
+    /// default = false
+    #[prost(bool, tag = "28")]
+    pub schema_force_string_view: bool,
     #[prost(uint64, tag = "12")]
     pub dictionary_page_size_limit: u64,
     #[prost(uint64, tag = "18")]
diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
index 520b6b53b32da..66ffeadf8cec8 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
@@ -233,6 +233,11 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result<String> {
             DataType::Utf8 => {
                 Ok(varchar_to_str(get_row_value!(array::StringArray, col, row)))
             }
+            DataType::Utf8View => Ok(varchar_to_str(get_row_value!(
+                array::StringViewArray,
+                col,
+                row
+            ))),
             _ => {
                 let f = ArrayFormatter::try_new(col.as_ref(), &DEFAULT_FORMAT_OPTIONS);
                 Ok(f.unwrap().value(row).to_string())
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index e85159fd137a7..fef7bfe821744 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -201,6 +201,7 @@ datafusion.execution.parquet.metadata_size_hint NULL
 datafusion.execution.parquet.pruning true
 datafusion.execution.parquet.pushdown_filters false
 datafusion.execution.parquet.reorder_filters false
+datafusion.execution.parquet.schema_force_string_view false
 datafusion.execution.parquet.skip_metadata true
 datafusion.execution.parquet.statistics_enabled page
 datafusion.execution.parquet.write_batch_size 1024
@@ -287,6 +288,7 @@ datafusion.execution.parquet.metadata_size_hint NULL (reading) If specified, the
 datafusion.execution.parquet.pruning true (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file
 datafusion.execution.parquet.pushdown_filters false (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".
 datafusion.execution.parquet.reorder_filters false (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query
+datafusion.execution.parquet.schema_force_string_view false (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.
 datafusion.execution.parquet.skip_metadata true (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata
 datafusion.execution.parquet.statistics_enabled page (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting
 datafusion.execution.parquet.write_batch_size 1024 (writing) Sets write_batch_size in bytes
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 3ba4e271c2f64..3f9a4793f655d 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -324,3 +324,24 @@ logical_plan
 
 statement ok
 drop table test;
+
+# coercion from stringview to integer, as input to make_date
+query D
+select make_date(arrow_cast('2024', 'Utf8View'), arrow_cast('01', 'Utf8View'), arrow_cast('23', 'Utf8View'))
+----
+2024-01-23
+
+# coercions between stringview and date types
+statement ok
+create table dates (dt date) as values
+    (date '2024-01-23'),
+    (date '2023-11-30');
+
+query D
+select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt;
+----
+2024-01-23
+
+
+statement ok
+drop table dates;
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 5814d88c7dd87..78d0d7b0239ff 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -76,6 +76,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.allow_single_file_parallelism              | true                      | (writing) Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.                                                                                                                                                                                                                                                                                                                          |
 | datafusion.execution.parquet.maximum_parallel_row_group_writers         | 1                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                          |
 | datafusion.execution.parquet.maximum_buffered_record_batches_per_stream | 2                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                                                          |
+| datafusion.execution.parquet.schema_force_string_view                   | false                     | (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.aggregate.scalar_update_factor                     | 10                        | Specifies the threshold for using `ScalarValue`s to update accumulators during high-cardinality aggregations for each input batch. The aggregation is considered high-cardinality if the number of affected groups is greater than or equal to `batch_size / scalar_update_factor`. In such cases, `ScalarValue`s are utilized for updating accumulators, rather than the default batch-slice approach. This can lead to performance improvements. By adjusting the `scalar_update_factor`, you can balance the trade-off between more efficient accumulator updates and the number of groups affected. |
 | datafusion.execution.planning_concurrency                               | 0                         | Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.sort_spill_reservation_bytes                       | 10485760                  | Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).                                                                                                                                                                       |

From 2f5e73c6aa82a3c45ff348ce0d1ea4eec4fc2a0d Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Tue, 30 Jul 2024 11:06:11 +0800
Subject: [PATCH 169/357] Fix bug in `remove_join_expressions` (#11693)

* Fix bug in `remove_join_expressions`

* Update datafusion/optimizer/src/eliminate_cross_join.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* fmt

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../optimizer/src/eliminate_cross_join.rs     | 34 +++++++++++++++----
 datafusion/sqllogictest/test_files/join.slt   | 15 +++++++-
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs
index 729c45426ff29..fc4eaef809033 100644
--- a/datafusion/optimizer/src/eliminate_cross_join.rs
+++ b/datafusion/optimizer/src/eliminate_cross_join.rs
@@ -386,9 +386,7 @@ fn remove_join_expressions(expr: Expr, join_keys: &JoinKeySet) -> Option<Expr> {
             None
         }
         // Fix for issue#78 join predicates from inside of OR expr also pulled up properly.
-        Expr::BinaryExpr(BinaryExpr { left, op, right })
-            if matches!(op, Operator::And | Operator::Or) =>
-        {
+        Expr::BinaryExpr(BinaryExpr { left, op, right }) if op == Operator::And => {
             let l = remove_join_expressions(*left, join_keys);
             let r = remove_join_expressions(*right, join_keys);
             match (l, r) {
@@ -402,7 +400,20 @@ fn remove_join_expressions(expr: Expr, join_keys: &JoinKeySet) -> Option<Expr> {
                 _ => None,
             }
         }
-
+        Expr::BinaryExpr(BinaryExpr { left, op, right }) if op == Operator::Or => {
+            let l = remove_join_expressions(*left, join_keys);
+            let r = remove_join_expressions(*right, join_keys);
+            match (l, r) {
+                (Some(ll), Some(rr)) => Some(Expr::BinaryExpr(BinaryExpr::new(
+                    Box::new(ll),
+                    op,
+                    Box::new(rr),
+                ))),
+                // When either `left` or `right` is empty, it means they are `true`
+                // so OR'ing anything with them will also be true
+                _ => None,
+            }
+        }
         _ => Some(expr),
     }
 }
@@ -995,6 +1006,7 @@ mod tests {
         let t4 = test_table_scan_with_name("t4")?;
 
         // could eliminate to inner join
+        // filter: (t1.a = t2.a OR t2.c < 15) AND (t1.a = t2.a AND tc.2 = 688)
         let plan1 = LogicalPlanBuilder::from(t1)
             .cross_join(t2)?
             .filter(binary_expr(
@@ -1012,6 +1024,10 @@ mod tests {
         let plan2 = LogicalPlanBuilder::from(t3).cross_join(t4)?.build()?;
 
         // could eliminate to inner join
+        // filter:
+        //   ((t3.a = t1.a AND t4.c < 15) OR (t3.a = t1.a AND t4.c = 688))
+        //     AND
+        //   ((t3.a = t4.a AND t4.c < 15) OR (t3.a = t4.a AND t3.c = 688) OR (t3.a = t4.a AND t3.b = t4.b))
         let plan = LogicalPlanBuilder::from(plan1)
             .cross_join(plan2)?
             .filter(binary_expr(
@@ -1057,7 +1073,7 @@ mod tests {
             "Filter: (t4.c < UInt32(15) OR t4.c = UInt32(688)) AND (t4.c < UInt32(15) OR t3.c = UInt32(688) OR t3.b = t4.b) [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
             "  Inner Join: t3.a = t4.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
             "    Inner Join: t1.a = t3.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
-            "      Filter: t2.c < UInt32(15) AND t2.c = UInt32(688) [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
+            "      Filter: t2.c = UInt32(688) [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
             "        Inner Join: t1.a = t2.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
             "          TableScan: t1 [a:UInt32, b:UInt32, c:UInt32]",
             "          TableScan: t2 [a:UInt32, b:UInt32, c:UInt32]",
@@ -1084,6 +1100,12 @@ mod tests {
         let plan2 = LogicalPlanBuilder::from(t3).cross_join(t4)?.build()?;
 
         // could eliminate to inner join
+        // Filter:
+        //  ((t3.a = t1.a AND t4.c < 15) OR (t3.a = t1.a AND t4.c = 688))
+        //      AND
+        //  ((t3.a = t4.a AND t4.c < 15) OR (t3.a = t4.a AND t3.c = 688) OR (t3.a = t4.a AND t3.b = t4.b))
+        //      AND
+        //  ((t1.a = t2.a OR t2.c < 15) AND (t1.a = t2.a AND t2.c = 688))
         let plan = LogicalPlanBuilder::from(plan1)
             .cross_join(plan2)?
             .filter(binary_expr(
@@ -1142,7 +1164,7 @@ mod tests {
             .build()?;
 
         let expected = vec![
-            "Filter: (t4.c < UInt32(15) OR t4.c = UInt32(688)) AND (t4.c < UInt32(15) OR t3.c = UInt32(688) OR t3.b = t4.b) AND t2.c < UInt32(15) AND t2.c = UInt32(688) [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
+            "Filter: (t4.c < UInt32(15) OR t4.c = UInt32(688)) AND (t4.c < UInt32(15) OR t3.c = UInt32(688) OR t3.b = t4.b) AND t2.c = UInt32(688) [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
             "  Inner Join: t3.a = t4.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
             "    Inner Join: t1.a = t3.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
             "      Inner Join: t1.a = t2.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32, b:UInt32, c:UInt32]",
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index 4f01d2b2c72bf..84aeb3ebd7661 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -1023,7 +1023,6 @@ statement ok
 DROP TABLE t3;
 
 
-# Test issue: https://github.com/apache/datafusion/issues/11275
 statement ok
 CREATE TABLE t0 (v1 BOOLEAN) AS VALUES (false), (null);
 
@@ -1033,6 +1032,7 @@ CREATE TABLE t1 (v1 BOOLEAN) AS VALUES (false), (null), (false);
 statement ok
 CREATE TABLE t2 (v1 BOOLEAN) AS VALUES (false), (true);
 
+# Test issue: https://github.com/apache/datafusion/issues/11275
 query BB
 SELECT t2.v1, t1.v1 FROM t0, t1, t2 WHERE t2.v1 IS DISTINCT FROM t0.v1 ORDER BY 1,2;
 ----
@@ -1046,6 +1046,19 @@ true false
 true NULL
 true NULL
 
+# Test issue: https://github.com/apache/datafusion/issues/11621
+query BB
+SELECT * FROM t1 JOIN t2 ON t1.v1 = t2.v1 WHERE (t1.v1 == t2.v1) OR t1.v1;
+----
+false false
+false false
+
+query BB
+SELECT * FROM t1 JOIN t2 ON t1.v1 = t2.v1 WHERE t1.v1 OR (t1.v1 == t2.v1);
+----
+false false
+false false
+
 statement ok
 DROP TABLE t0;
 

From 35c2e7e7eb04e80877bbbc1fa4a5b06f31a4e4bc Mon Sep 17 00:00:00 2001
From: Edmondo Porcu <edmondo.porcu@gmail.com>
Date: Tue, 30 Jul 2024 08:31:29 -0400
Subject: [PATCH 170/357] =?UTF-8?q?Initial=20changes=20to=20support=20usin?=
 =?UTF-8?q?g=20udaf=20min/max=20for=20statistics=20and=20opti=E2=80=A6=20(?=
 =?UTF-8?q?#11696)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Initial changes to support using udaf min/max for statistics and optimizations

* Listening to Clippy on CI

* Implementing feedback from PR
---
 datafusion/expr/src/udaf.rs                    | 18 ++++++++++++++++++
 .../physical-expr-common/src/aggregate/mod.rs  |  6 ++++++
 2 files changed, 24 insertions(+)

diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index 3f4a99749cf65..af964b615445f 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -249,6 +249,14 @@ impl AggregateUDF {
     pub fn simplify(&self) -> Option<AggregateFunctionSimplification> {
         self.inner.simplify()
     }
+
+    /// Returns true if the function is max, false if the function is min
+    /// None in all other cases, used in certain optimizations or
+    /// or aggregate
+    ///
+    pub fn is_descending(&self) -> Option<bool> {
+        self.inner.is_descending()
+    }
 }
 
 impl<F> From<F> for AggregateUDF
@@ -536,6 +544,16 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
         self.signature().hash(hasher);
         hasher.finish()
     }
+
+    /// If this function is max, return true
+    /// if the function is min, return false
+    /// otherwise return None (the default)
+    ///
+    ///
+    /// Note: this is used to use special aggregate implementations in certain conditions
+    fn is_descending(&self) -> Option<bool> {
+        None
+    }
 }
 
 pub enum ReversedUDAF {
diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
index 4eede6567504a..aa7273648c636 100644
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -730,6 +730,12 @@ impl AggregateExpr for AggregateFunctionExpr {
             }
         }
     }
+
+    fn get_minmax_desc(&self) -> Option<(Field, bool)> {
+        self.fun
+            .is_descending()
+            .and_then(|flag| self.field().ok().map(|f| (f, flag)))
+    }
 }
 
 impl PartialEq<dyn Any> for AggregateFunctionExpr {

From 66a85706f6c5dc5eabcc09b0990d84c6f8879b81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Wed, 31 Jul 2024 01:56:30 +0800
Subject: [PATCH 171/357] Rename `input_type` --> `input_types` on
 AggregateFunctionExpr / AccumulatorArgs / StateFieldsArgs (#11666)

* UDAF input types

* Rename

* Update COMMENTS.md

* Update datafusion/functions-aggregate/COMMENTS.md

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../tests/user_defined/user_defined_plan.rs   |  8 ++--
 datafusion/expr/src/function.rs               |  8 ++--
 datafusion/functions-aggregate/COMMENTS.md    |  2 +-
 .../src/approx_distinct.rs                    |  2 +-
 .../functions-aggregate/src/approx_median.rs  |  2 +-
 .../src/approx_percentile_cont.rs             |  2 +-
 .../functions-aggregate/src/array_agg.rs      | 12 +++---
 datafusion/functions-aggregate/src/average.rs | 16 ++++----
 datafusion/functions-aggregate/src/count.rs   |  4 +-
 .../functions-aggregate/src/first_last.rs     |  4 +-
 datafusion/functions-aggregate/src/median.rs  |  4 +-
 .../functions-aggregate/src/nth_value.rs      |  4 +-
 datafusion/functions-aggregate/src/stddev.rs  |  4 +-
 .../physical-expr-common/src/aggregate/mod.rs | 38 ++++++++++---------
 14 files changed, 57 insertions(+), 53 deletions(-)

diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index a44f522ba95ac..47804b927e641 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -68,6 +68,10 @@ use arrow::{
     record_batch::RecordBatch,
     util::pretty::pretty_format_batches,
 };
+use async_trait::async_trait;
+use futures::{Stream, StreamExt};
+
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::{
     common::cast::{as_int64_array, as_string_array},
     common::{arrow_datafusion_err, internal_err, DFSchemaRef},
@@ -90,16 +94,12 @@ use datafusion::{
     physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner},
     prelude::{SessionConfig, SessionContext},
 };
-
-use async_trait::async_trait;
-use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::ScalarValue;
 use datafusion_expr::Projection;
 use datafusion_optimizer::optimizer::ApplyOrder;
 use datafusion_optimizer::AnalyzerRule;
-use futures::{Stream, StreamExt};
 
 /// Execute the specified sql and return the resulting record batches
 /// pretty printed as a String.
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index d722e55de487c..d8be2b4347323 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -94,8 +94,8 @@ pub struct AccumulatorArgs<'a> {
     /// ```
     pub is_distinct: bool,
 
-    /// The input type of the aggregate function.
-    pub input_type: &'a DataType,
+    /// The input types of the aggregate function.
+    pub input_types: &'a [DataType],
 
     /// The logical expression of arguments the aggregate function takes.
     pub input_exprs: &'a [Expr],
@@ -109,8 +109,8 @@ pub struct StateFieldsArgs<'a> {
     /// The name of the aggregate function.
     pub name: &'a str,
 
-    /// The input type of the aggregate function.
-    pub input_type: &'a DataType,
+    /// The input types of the aggregate function.
+    pub input_types: &'a [DataType],
 
     /// The return type of the aggregate function.
     pub return_type: &'a DataType,
diff --git a/datafusion/functions-aggregate/COMMENTS.md b/datafusion/functions-aggregate/COMMENTS.md
index 23a996faf0075..e669e13557115 100644
--- a/datafusion/functions-aggregate/COMMENTS.md
+++ b/datafusion/functions-aggregate/COMMENTS.md
@@ -54,7 +54,7 @@ first argument and the definition looks like this:
 // `input_type` :  data type of the first argument
 let mut fields = vec![Field::new_list(
     format_state_name(self.name(), "nth_value"),
-    Field::new("item", args.input_type.clone(), true /* nullable of list item */ ),
+    Field::new("item", args.input_types[0].clone(), true /* nullable of list item */ ),
     false, // nullable of list itself
 )];
 ```
diff --git a/datafusion/functions-aggregate/src/approx_distinct.rs b/datafusion/functions-aggregate/src/approx_distinct.rs
index 7c6aef9944f69..56ef32e7ebe07 100644
--- a/datafusion/functions-aggregate/src/approx_distinct.rs
+++ b/datafusion/functions-aggregate/src/approx_distinct.rs
@@ -277,7 +277,7 @@ impl AggregateUDFImpl for ApproxDistinct {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let accumulator: Box<dyn Accumulator> = match acc_args.input_type {
+        let accumulator: Box<dyn Accumulator> = match &acc_args.input_types[0] {
             // TODO u8, i8, u16, i16 shall really be done using bitmap, not HLL
             // TODO support for boolean (trivial case)
             // https://github.com/apache/datafusion/issues/1109
diff --git a/datafusion/functions-aggregate/src/approx_median.rs b/datafusion/functions-aggregate/src/approx_median.rs
index bc723c8629539..e12e3445a83ed 100644
--- a/datafusion/functions-aggregate/src/approx_median.rs
+++ b/datafusion/functions-aggregate/src/approx_median.rs
@@ -113,7 +113,7 @@ impl AggregateUDFImpl for ApproxMedian {
 
         Ok(Box::new(ApproxPercentileAccumulator::new(
             0.5_f64,
-            acc_args.input_type.clone(),
+            acc_args.input_types[0].clone(),
         )))
     }
 }
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index dfb94a84cbecc..16837dc80748c 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -104,7 +104,7 @@ impl ApproxPercentileCont {
             None
         };
 
-        let accumulator: ApproxPercentileAccumulator = match args.input_type {
+        let accumulator: ApproxPercentileAccumulator = match &args.input_types[0] {
             t @ (DataType::UInt8
             | DataType::UInt16
             | DataType::UInt32
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index c25d592428bbd..36c9d6a0d7c81 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -90,7 +90,7 @@ impl AggregateUDFImpl for ArrayAgg {
             return Ok(vec![Field::new_list(
                 format_state_name(args.name, "distinct_array_agg"),
                 // See COMMENTS.md to understand why nullable is set to true
-                Field::new("item", args.input_type.clone(), true),
+                Field::new("item", args.input_types[0].clone(), true),
                 true,
             )]);
         }
@@ -98,7 +98,7 @@ impl AggregateUDFImpl for ArrayAgg {
         let mut fields = vec![Field::new_list(
             format_state_name(args.name, "array_agg"),
             // See COMMENTS.md to understand why nullable is set to true
-            Field::new("item", args.input_type.clone(), true),
+            Field::new("item", args.input_types[0].clone(), true),
             true,
         )];
 
@@ -119,12 +119,14 @@ impl AggregateUDFImpl for ArrayAgg {
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
         if acc_args.is_distinct {
             return Ok(Box::new(DistinctArrayAggAccumulator::try_new(
-                acc_args.input_type,
+                &acc_args.input_types[0],
             )?));
         }
 
         if acc_args.sort_exprs.is_empty() {
-            return Ok(Box::new(ArrayAggAccumulator::try_new(acc_args.input_type)?));
+            return Ok(Box::new(ArrayAggAccumulator::try_new(
+                &acc_args.input_types[0],
+            )?));
         }
 
         let ordering_req = limited_convert_logical_sort_exprs_to_physical_with_dfschema(
@@ -138,7 +140,7 @@ impl AggregateUDFImpl for ArrayAgg {
             .collect::<Result<Vec<_>>>()?;
 
         OrderSensitiveArrayAggAccumulator::try_new(
-            acc_args.input_type,
+            &acc_args.input_types[0],
             &ordering_dtypes,
             ordering_req,
             acc_args.is_reversed,
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index 18642fb843293..228bce1979a38 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -93,7 +93,7 @@ impl AggregateUDFImpl for Avg {
         }
         use DataType::*;
         // instantiate specialized accumulator based for the type
-        match (acc_args.input_type, acc_args.data_type) {
+        match (&acc_args.input_types[0], acc_args.data_type) {
             (Float64, Float64) => Ok(Box::<AvgAccumulator>::default()),
             (
                 Decimal128(sum_precision, sum_scale),
@@ -120,7 +120,7 @@ impl AggregateUDFImpl for Avg {
             })),
             _ => exec_err!(
                 "AvgAccumulator for ({} --> {})",
-                acc_args.input_type,
+                &acc_args.input_types[0],
                 acc_args.data_type
             ),
         }
@@ -135,7 +135,7 @@ impl AggregateUDFImpl for Avg {
             ),
             Field::new(
                 format_state_name(args.name, "sum"),
-                args.input_type.clone(),
+                args.input_types[0].clone(),
                 true,
             ),
         ])
@@ -154,10 +154,10 @@ impl AggregateUDFImpl for Avg {
     ) -> Result<Box<dyn GroupsAccumulator>> {
         use DataType::*;
         // instantiate specialized accumulator based for the type
-        match (args.input_type, args.data_type) {
+        match (&args.input_types[0], args.data_type) {
             (Float64, Float64) => {
                 Ok(Box::new(AvgGroupsAccumulator::<Float64Type, _>::new(
-                    args.input_type,
+                    &args.input_types[0],
                     args.data_type,
                     |sum: f64, count: u64| Ok(sum / count as f64),
                 )))
@@ -176,7 +176,7 @@ impl AggregateUDFImpl for Avg {
                     move |sum: i128, count: u64| decimal_averager.avg(sum, count as i128);
 
                 Ok(Box::new(AvgGroupsAccumulator::<Decimal128Type, _>::new(
-                    args.input_type,
+                    &args.input_types[0],
                     args.data_type,
                     avg_fn,
                 )))
@@ -197,7 +197,7 @@ impl AggregateUDFImpl for Avg {
                 };
 
                 Ok(Box::new(AvgGroupsAccumulator::<Decimal256Type, _>::new(
-                    args.input_type,
+                    &args.input_types[0],
                     args.data_type,
                     avg_fn,
                 )))
@@ -205,7 +205,7 @@ impl AggregateUDFImpl for Avg {
 
             _ => not_impl_err!(
                 "AvgGroupsAccumulator for ({} --> {})",
-                args.input_type,
+                &args.input_types[0],
                 args.data_type
             ),
         }
diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index 69eac84f890d3..e2d59003fca14 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -127,7 +127,7 @@ impl AggregateUDFImpl for Count {
             Ok(vec![Field::new_list(
                 format_state_name(args.name, "count distinct"),
                 // See COMMENTS.md to understand why nullable is set to true
-                Field::new("item", args.input_type.clone(), true),
+                Field::new("item", args.input_types[0].clone(), true),
                 false,
             )])
         } else {
@@ -148,7 +148,7 @@ impl AggregateUDFImpl for Count {
             return not_impl_err!("COUNT DISTINCT with multiple arguments");
         }
 
-        let data_type = acc_args.input_type;
+        let data_type = &acc_args.input_types[0];
         Ok(match data_type {
             // try and use a specialized accumulator if possible, otherwise fall back to generic accumulator
             DataType::Int8 => Box::new(
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
index 8969937d377c4..587767b8e356a 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -440,14 +440,14 @@ impl AggregateUDFImpl for LastValue {
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         let StateFieldsArgs {
             name,
-            input_type,
+            input_types,
             return_type: _,
             ordering_fields,
             is_distinct: _,
         } = args;
         let mut fields = vec![Field::new(
             format_state_name(name, "last_value"),
-            input_type.clone(),
+            input_types[0].clone(),
             true,
         )];
         fields.extend(ordering_fields.to_vec());
diff --git a/datafusion/functions-aggregate/src/median.rs b/datafusion/functions-aggregate/src/median.rs
index bb926b8da2712..febf1fcd2fefb 100644
--- a/datafusion/functions-aggregate/src/median.rs
+++ b/datafusion/functions-aggregate/src/median.rs
@@ -102,7 +102,7 @@ impl AggregateUDFImpl for Median {
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         //Intermediate state is a list of the elements we have collected so far
-        let field = Field::new("item", args.input_type.clone(), true);
+        let field = Field::new("item", args.input_types[0].clone(), true);
         let state_name = if args.is_distinct {
             "distinct_median"
         } else {
@@ -133,7 +133,7 @@ impl AggregateUDFImpl for Median {
             };
         }
 
-        let dt = acc_args.input_type;
+        let dt = &acc_args.input_types[0];
         downcast_integer! {
             dt => (helper, dt),
             DataType::Float16 => helper!(Float16Type, dt),
diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs
index 74f77f3f4b86a..dc7c6c86f2130 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -114,7 +114,7 @@ impl AggregateUDFImpl for NthValueAgg {
 
         NthValueAccumulator::try_new(
             n,
-            acc_args.input_type,
+            &acc_args.input_types[0],
             &ordering_dtypes,
             ordering_req,
         )
@@ -125,7 +125,7 @@ impl AggregateUDFImpl for NthValueAgg {
         let mut fields = vec![Field::new_list(
             format_state_name(self.name(), "nth_value"),
             // See COMMENTS.md to understand why nullable is set to true
-            Field::new("item", args.input_type.clone(), true),
+            Field::new("item", args.input_types[0].clone(), true),
             false,
         )];
         let orderings = args.ordering_fields.to_vec();
diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs
index 247962dc2ce11..df757ddc04226 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -335,7 +335,7 @@ mod tests {
             name: "a",
             is_distinct: false,
             is_reversed: false,
-            input_type: &DataType::Float64,
+            input_types: &[DataType::Float64],
             input_exprs: &[datafusion_expr::col("a")],
         };
 
@@ -348,7 +348,7 @@ mod tests {
             name: "a",
             is_distinct: false,
             is_reversed: false,
-            input_type: &DataType::Float64,
+            input_types: &[DataType::Float64],
             input_exprs: &[datafusion_expr::col("a")],
         };
 
diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
index aa7273648c636..665cdd708329f 100644
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -15,31 +15,33 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub mod count_distinct;
-pub mod groups_accumulator;
-pub mod merge_arrays;
-pub mod stats;
-pub mod tdigest;
-pub mod utils;
+use std::fmt::Debug;
+use std::{any::Any, sync::Arc};
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+
+use datafusion_common::exec_err;
 use datafusion_common::{internal_err, not_impl_err, DFSchema, Result};
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::type_coercion::aggregates::check_arg_count;
+use datafusion_expr::utils::AggregateOrderSensitivity;
 use datafusion_expr::ReversedUDAF;
 use datafusion_expr::{
     function::AccumulatorArgs, Accumulator, AggregateUDF, Expr, GroupsAccumulator,
 };
-use std::fmt::Debug;
-use std::{any::Any, sync::Arc};
 
-use self::utils::down_cast_any_ref;
 use crate::physical_expr::PhysicalExpr;
 use crate::sort_expr::{LexOrdering, PhysicalSortExpr};
 use crate::utils::reverse_order_bys;
 
-use datafusion_common::exec_err;
-use datafusion_expr::utils::AggregateOrderSensitivity;
+use self::utils::down_cast_any_ref;
+
+pub mod count_distinct;
+pub mod groups_accumulator;
+pub mod merge_arrays;
+pub mod stats;
+pub mod tdigest;
+pub mod utils;
 
 /// Creates a physical expression of the UDAF, that includes all necessary type coercion.
 /// This function errors when `args`' can't be coerced to a valid argument type of the UDAF.
@@ -225,7 +227,7 @@ impl AggregateExprBuilder {
             ignore_nulls,
             ordering_fields,
             is_distinct,
-            input_type: input_exprs_types[0].clone(),
+            input_types: input_exprs_types,
             is_reversed,
         }))
     }
@@ -466,7 +468,7 @@ pub struct AggregateFunctionExpr {
     ordering_fields: Vec<Field>,
     is_distinct: bool,
     is_reversed: bool,
-    input_type: DataType,
+    input_types: Vec<DataType>,
 }
 
 impl AggregateFunctionExpr {
@@ -504,7 +506,7 @@ impl AggregateExpr for AggregateFunctionExpr {
     fn state_fields(&self) -> Result<Vec<Field>> {
         let args = StateFieldsArgs {
             name: &self.name,
-            input_type: &self.input_type,
+            input_types: &self.input_types,
             return_type: &self.data_type,
             ordering_fields: &self.ordering_fields,
             is_distinct: self.is_distinct,
@@ -525,7 +527,7 @@ impl AggregateExpr for AggregateFunctionExpr {
             ignore_nulls: self.ignore_nulls,
             sort_exprs: &self.sort_exprs,
             is_distinct: self.is_distinct,
-            input_type: &self.input_type,
+            input_types: &self.input_types,
             input_exprs: &self.logical_args,
             name: &self.name,
             is_reversed: self.is_reversed,
@@ -542,7 +544,7 @@ impl AggregateExpr for AggregateFunctionExpr {
             ignore_nulls: self.ignore_nulls,
             sort_exprs: &self.sort_exprs,
             is_distinct: self.is_distinct,
-            input_type: &self.input_type,
+            input_types: &self.input_types,
             input_exprs: &self.logical_args,
             name: &self.name,
             is_reversed: self.is_reversed,
@@ -614,7 +616,7 @@ impl AggregateExpr for AggregateFunctionExpr {
             ignore_nulls: self.ignore_nulls,
             sort_exprs: &self.sort_exprs,
             is_distinct: self.is_distinct,
-            input_type: &self.input_type,
+            input_types: &self.input_types,
             input_exprs: &self.logical_args,
             name: &self.name,
             is_reversed: self.is_reversed,
@@ -630,7 +632,7 @@ impl AggregateExpr for AggregateFunctionExpr {
             ignore_nulls: self.ignore_nulls,
             sort_exprs: &self.sort_exprs,
             is_distinct: self.is_distinct,
-            input_type: &self.input_type,
+            input_types: &self.input_types,
             input_exprs: &self.logical_args,
             name: &self.name,
             is_reversed: self.is_reversed,

From cd786e27519c0ba5f0d45c578a3587dce379f366 Mon Sep 17 00:00:00 2001
From: Michael J Ward <Michael-J-Ward@users.noreply.github.com>
Date: Tue, 30 Jul 2024 18:58:21 -0500
Subject: [PATCH 172/357] fix: regr_count now returns Uint64 (#11731)

Fixes https://github.com/apache/datafusion/issues/11726
---
 datafusion/functions-aggregate/src/regr.rs    |  8 +++--
 .../sqllogictest/test_files/aggregate.slt     | 32 +++++++++----------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/datafusion/functions-aggregate/src/regr.rs b/datafusion/functions-aggregate/src/regr.rs
index 8d04ae87157d4..aad110a13e136 100644
--- a/datafusion/functions-aggregate/src/regr.rs
+++ b/datafusion/functions-aggregate/src/regr.rs
@@ -153,7 +153,11 @@ impl AggregateUDFImpl for Regr {
             return plan_err!("Covariance requires numeric input types");
         }
 
-        Ok(DataType::Float64)
+        if matches!(self.regr_type, RegrType::Count) {
+            Ok(DataType::UInt64)
+        } else {
+            Ok(DataType::Float64)
+        }
     }
 
     fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -480,7 +484,7 @@ impl Accumulator for RegrAccumulator {
                 let nullif_cond = self.count <= 1 || var_pop_x == 0.0;
                 nullif_or_stat(nullif_cond, self.mean_y - slope * self.mean_x)
             }
-            RegrType::Count => Ok(ScalarValue::Float64(Some(self.count as f64))),
+            RegrType::Count => Ok(ScalarValue::UInt64(Some(self.count))),
             RegrType::R2 => {
                 // Only 0/1 point or all x(or y) is the same
                 let nullif_cond = self.count <= 1 || var_pop_x == 0.0 || var_pop_y == 0.0;
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index fa228d499d1fc..5cc66bb493ac0 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -4742,27 +4742,27 @@ select regr_sxy(NULL, 'bar');
 
 
 # regr_*() NULL results
-query RRRRRRRRR
+query RRIRRRRRR
 select regr_slope(1,1), regr_intercept(1,1), regr_count(1,1), regr_r2(1,1), regr_avgx(1,1), regr_avgy(1,1), regr_sxx(1,1), regr_syy(1,1), regr_sxy(1,1);
 ----
 NULL NULL 1 NULL 1 1 0 0 0
 
-query RRRRRRRRR
+query RRIRRRRRR
 select regr_slope(1, NULL), regr_intercept(1, NULL), regr_count(1, NULL), regr_r2(1, NULL), regr_avgx(1, NULL), regr_avgy(1, NULL), regr_sxx(1, NULL), regr_syy(1, NULL), regr_sxy(1, NULL);
 ----
 NULL NULL 0 NULL NULL NULL NULL NULL NULL
 
-query RRRRRRRRR
+query RRIRRRRRR
 select regr_slope(NULL, 1), regr_intercept(NULL, 1), regr_count(NULL, 1), regr_r2(NULL, 1), regr_avgx(NULL, 1), regr_avgy(NULL, 1), regr_sxx(NULL, 1), regr_syy(NULL, 1), regr_sxy(NULL, 1);
 ----
 NULL NULL 0 NULL NULL NULL NULL NULL NULL
 
-query RRRRRRRRR
+query RRIRRRRRR
 select regr_slope(NULL, NULL), regr_intercept(NULL, NULL), regr_count(NULL, NULL), regr_r2(NULL, NULL), regr_avgx(NULL, NULL), regr_avgy(NULL, NULL), regr_sxx(NULL, NULL), regr_syy(NULL, NULL), regr_sxy(NULL, NULL);
 ----
 NULL NULL 0 NULL NULL NULL NULL NULL NULL
 
-query RRRRRRRRR
+query RRIRRRRRR
 select regr_slope(column2, column1), regr_intercept(column2, column1), regr_count(column2, column1), regr_r2(column2, column1), regr_avgx(column2, column1), regr_avgy(column2, column1), regr_sxx(column2, column1), regr_syy(column2, column1), regr_sxy(column2, column1) from (values (1,2), (1,4), (1,6));
 ----
 NULL NULL 3 NULL 1 4 0 8 0
@@ -4770,7 +4770,7 @@ NULL NULL 3 NULL 1 4 0 8 0
 
 
 # regr_*() basic tests
-query RRRRRRRRR
+query RRIRRRRRR
 select 
     regr_slope(column2, column1),
     regr_intercept(column2, column1),
@@ -4785,7 +4785,7 @@ from (values (1,2), (2,4), (3,6));
 ----
 2 0 3 1 2 4 2 8 4
 
-query RRRRRRRRR
+query RRIRRRRRR
 select 
     regr_slope(c12, c11),
     regr_intercept(c12, c11),
@@ -4803,7 +4803,7 @@ from aggregate_test_100;
 
 
 # regr_*() functions ignore NULLs
-query RRRRRRRRR
+query RRIRRRRRR
 select 
     regr_slope(column2, column1),
     regr_intercept(column2, column1),
@@ -4818,7 +4818,7 @@ from (values (1,NULL), (2,4), (3,6));
 ----
 2 0 2 1 2.5 5 0.5 2 1
 
-query RRRRRRRRR
+query RRIRRRRRR
 select 
     regr_slope(column2, column1),
     regr_intercept(column2, column1),
@@ -4833,7 +4833,7 @@ from (values (1,NULL), (NULL,4), (3,6));
 ----
 NULL NULL 1 NULL 3 6 0 0 0
 
-query RRRRRRRRR
+query RRIRRRRRR
 select 
     regr_slope(column2, column1),
     regr_intercept(column2, column1),
@@ -4848,7 +4848,7 @@ from (values (1,NULL), (NULL,4), (NULL,NULL));
 ----
 NULL NULL 0 NULL NULL NULL NULL NULL NULL
 
-query TRRRRRRRRR rowsort
+query TRRIRRRRRR rowsort
 select 
     column3, 
     regr_slope(column2, column1),
@@ -4873,7 +4873,7 @@ c NULL NULL 1 NULL 1 10 0 0 0
 statement ok
 set datafusion.execution.batch_size = 1;
 
-query RRRRRRRRR
+query RRIRRRRRR
 select 
     regr_slope(c12, c11),
     regr_intercept(c12, c11),
@@ -4891,7 +4891,7 @@ from aggregate_test_100;
 statement ok
 set datafusion.execution.batch_size = 2;
 
-query RRRRRRRRR
+query RRIRRRRRR
 select 
     regr_slope(c12, c11),
     regr_intercept(c12, c11),
@@ -4909,7 +4909,7 @@ from aggregate_test_100;
 statement ok
 set datafusion.execution.batch_size = 3;
 
-query RRRRRRRRR
+query RRIRRRRRR
 select 
     regr_slope(c12, c11),
     regr_intercept(c12, c11),
@@ -4930,7 +4930,7 @@ set datafusion.execution.batch_size = 8192;
 
 
 # regr_*() testing retract_batch() from RegrAccumulator's internal implementation
-query RRRRRRRRR
+query RRIRRRRRR
 SELECT
     regr_slope(column2, column1) OVER w AS slope,
     regr_intercept(column2, column1) OVER w AS intercept,
@@ -4951,7 +4951,7 @@ NULL NULL 1 NULL 1 2 0 0 0
 4.5 -7 3 0.964285714286 4 11 2 42 9
 3 0 3 1 5 15 2 18 6
 
-query RRRRRRRRR
+query RRIRRRRRR
 SELECT
     regr_slope(column2, column1) OVER w AS slope,
     regr_intercept(column2, column1) OVER w AS intercept,

From 7ca7456d3aa657b0b11c0d4eaae86576ae37a37e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Wed, 31 Jul 2024 08:14:34 +0200
Subject: [PATCH 173/357] Handle nulls in approx_percentile_cont (#11721)

* Respect nulls in approx_percentile_cont

* Compile

* Remove check

* Adapt comment
---
 .../functions-aggregate/src/approx_percentile_cont.rs | 11 ++++++++---
 datafusion/sqllogictest/test_files/aggregate.slt      |  6 ++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 16837dc80748c..844e48f0a44dc 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -19,7 +19,8 @@ use std::any::Any;
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;
 
-use arrow::array::RecordBatch;
+use arrow::array::{Array, RecordBatch};
+use arrow::compute::{filter, is_not_null};
 use arrow::{
     array::{
         ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
@@ -393,8 +394,12 @@ impl Accumulator for ApproxPercentileAccumulator {
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
-        let values = &values[0];
-        let sorted_values = &arrow::compute::sort(values, None)?;
+        // Remove any nulls before computing the percentile
+        let mut values = Arc::clone(&values[0]);
+        if values.nulls().is_some() {
+            values = filter(&values, &is_not_null(&values)?)?;
+        }
+        let sorted_values = &arrow::compute::sort(&values, None)?;
         let sorted_values = ApproxPercentileAccumulator::convert_to_float(sorted_values)?;
         self.digest = self.digest.merge_sorted_f64(&sorted_values);
         Ok(())
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 5cc66bb493ac0..6ec1e0c52690c 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -1237,6 +1237,12 @@ SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.9) AS DOUBLE) / 0.834) < 0.05
 ----
 true
 
+# percentile_cont_with_nulls
+query I
+SELECT APPROX_PERCENTILE_CONT(v, 0.5) FROM (VALUES (1), (2), (3), (NULL), (NULL), (NULL)) as t (v);
+----
+2
+
 # csv_query_cube_avg
 query TIR
 SELECT c1, c2, AVG(c3) FROM aggregate_test_100 GROUP BY CUBE (c1, c2) ORDER BY c1, c2

From 8ac50e2dbbf0175aea2958c226fbbfd05a901ae2 Mon Sep 17 00:00:00 2001
From: JasonLi <lijingxuan92@126.com>
Date: Wed, 31 Jul 2024 18:28:22 +0800
Subject: [PATCH 174/357] Reduce repetition in try_process_group_by_unnest and
 try_process_unnest (#11714)

* refactor: process unnest

* pass clippy
---
 datafusion/sql/src/select.rs | 42 +++++++++++-------------------------
 datafusion/sql/src/utils.rs  | 22 +++++++++++++++++++
 2 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 4de3952dc7ea1..95a44dace31a8 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -23,7 +23,7 @@ use crate::planner::{
 };
 use crate::utils::{
     check_columns_satisfy_exprs, extract_aliases, rebase_expr, resolve_aliases_to_exprs,
-    resolve_columns, resolve_positions_to_exprs, transform_bottom_unnest,
+    resolve_columns, resolve_positions_to_exprs, transform_bottom_unnests,
 };
 
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
@@ -318,20 +318,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             // - unnest(struct_col) will be transformed into unnest(struct_col).field1, unnest(struct_col).field2
             // - unnest(array_col) will be transformed into unnest(array_col).element
             // - unnest(array_col) + 1 will be transformed into unnest(array_col).element +1
-            let outer_projection_exprs: Vec<Expr> = intermediate_select_exprs
-                .iter()
-                .map(|expr| {
-                    transform_bottom_unnest(
-                        &intermediate_plan,
-                        &mut unnest_columns,
-                        &mut inner_projection_exprs,
-                        expr,
-                    )
-                })
-                .collect::<Result<Vec<_>>>()?
-                .into_iter()
-                .flatten()
-                .collect();
+            let outer_projection_exprs = transform_bottom_unnests(
+                &intermediate_plan,
+                &mut unnest_columns,
+                &mut inner_projection_exprs,
+                &intermediate_select_exprs,
+            )?;
 
             // No more unnest is possible
             if unnest_columns.is_empty() {
@@ -417,20 +409,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             let mut unnest_columns = vec![];
             let mut inner_projection_exprs = vec![];
 
-            let outer_projection_exprs: Vec<Expr> = intermediate_select_exprs
-                .iter()
-                .map(|expr| {
-                    transform_bottom_unnest(
-                        &intermediate_plan,
-                        &mut unnest_columns,
-                        &mut inner_projection_exprs,
-                        expr,
-                    )
-                })
-                .collect::<Result<Vec<_>>>()?
-                .into_iter()
-                .flatten()
-                .collect();
+            let outer_projection_exprs = transform_bottom_unnests(
+                &intermediate_plan,
+                &mut unnest_columns,
+                &mut inner_projection_exprs,
+                &intermediate_select_exprs,
+            )?;
 
             if unnest_columns.is_empty() {
                 break;
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index a9a782902ac95..3b044646e6cb2 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -287,6 +287,28 @@ pub(crate) fn value_to_string(value: &Value) -> Option<String> {
     }
 }
 
+pub(crate) fn transform_bottom_unnests(
+    input: &LogicalPlan,
+    unnest_placeholder_columns: &mut Vec<String>,
+    inner_projection_exprs: &mut Vec<Expr>,
+    original_exprs: &[Expr],
+) -> Result<Vec<Expr>> {
+    Ok(original_exprs
+        .iter()
+        .map(|expr| {
+            transform_bottom_unnest(
+                input,
+                unnest_placeholder_columns,
+                inner_projection_exprs,
+                expr,
+            )
+        })
+        .collect::<Result<Vec<_>>>()?
+        .into_iter()
+        .flatten()
+        .collect::<Vec<_>>())
+}
+
 /// The context is we want to rewrite unnest() into InnerProjection->Unnest->OuterProjection
 /// Given an expression which contains unnest expr as one of its children,
 /// Try transform depends on unnest type

From cc6416e7413e4a016f4a9070a801a7d5d9212745 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 31 Jul 2024 06:29:55 -0400
Subject: [PATCH 175/357] Minor: Add example for `ScalarUDF::call` (#11727)

---
 datafusion/expr/src/udf.rs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 487b098ea201b..5ba6e3007ead4 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -46,6 +46,8 @@ use crate::{
 /// 2. For advanced use cases, use [`ScalarUDFImpl`] which provides full API
 ///    access (examples in  [`advanced_udf.rs`]).
 ///
+/// See [`Self::call`] to invoke a `ScalarUDF` with arguments.
+///
 /// # API Note
 ///
 /// This is a separate struct from `ScalarUDFImpl` to maintain backwards
@@ -121,7 +123,16 @@ impl ScalarUDF {
     /// Returns a [`Expr`] logical expression to call this UDF with specified
     /// arguments.
     ///
-    /// This utility allows using the UDF without requiring access to the registry.
+    /// This utility allows easily calling UDFs
+    ///
+    /// # Example
+    /// ```no_run
+    /// use datafusion_expr::{col, lit, ScalarUDF};
+    /// # fn my_udf() -> ScalarUDF { unimplemented!() }
+    /// let my_func: ScalarUDF = my_udf();
+    /// // Create an expr for `my_func(a, 12.3)`
+    /// let expr = my_func.call(vec![col("a"), lit(12.3)]);
+    /// ```
     pub fn call(&self, args: Vec<Expr>) -> Expr {
         Expr::ScalarFunction(crate::expr::ScalarFunction::new_udf(
             Arc::new(self.clone()),

From 929568d600120b193795a6d968aeb86b064c07f4 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 31 Jul 2024 07:33:45 -0400
Subject: [PATCH 176/357] Use `cargo release` in benchmarks (#11722)

---
 benchmarks/bench.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
index efd56b17c7cb9..3b854f6dc1471 100755
--- a/benchmarks/bench.sh
+++ b/benchmarks/bench.sh
@@ -34,8 +34,7 @@ COMMAND=
 BENCHMARK=all
 DATAFUSION_DIR=${DATAFUSION_DIR:-$SCRIPT_DIR/..}
 DATA_DIR=${DATA_DIR:-$SCRIPT_DIR/data}
-#CARGO_COMMAND=${CARGO_COMMAND:-"cargo run --release"}
-CARGO_COMMAND=${CARGO_COMMAND:-"cargo run --profile release-nonlto"}  # for faster iterations
+CARGO_COMMAND=${CARGO_COMMAND:-"cargo run --release"}
 PREFER_HASH_JOIN=${PREFER_HASH_JOIN:-true}
 VIRTUAL_ENV=${VIRTUAL_ENV:-$SCRIPT_DIR/venv}
 

From 6508fa2dc41e27af5535b302b9ff26f15ca41eb1 Mon Sep 17 00:00:00 2001
From: Ruihang Xia <waynestxia@gmail.com>
Date: Wed, 31 Jul 2024 20:12:02 +0800
Subject: [PATCH 177/357] expose some fields on session state (#11716)

* expose some fields on session state

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add example

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* rename file

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
---
 datafusion-examples/README.md                 |   1 +
 datafusion-examples/examples/planner_api.rs   | 127 ++++++++++++++++++
 .../core/src/execution/session_state.rs       |  15 +++
 3 files changed, 143 insertions(+)
 create mode 100644 datafusion-examples/examples/planner_api.rs

diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index da01f60b527d9..5f032c3e9cfff 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -73,6 +73,7 @@ cargo run --example dataframe
 - [`parquet_exec_visitor.rs`](examples/parquet_exec_visitor.rs): Extract statistics by visiting an ExecutionPlan after execution
 - [`parse_sql_expr.rs`](examples/parse_sql_expr.rs): Parse SQL text into DataFusion `Expr`.
 - [`plan_to_sql.rs`](examples/plan_to_sql.rs): Generate SQL from DataFusion `Expr` and `LogicalPlan`
+- [`planner_api.rs](examples/planner_api.rs): APIs to manipulate logical and physical plans
 - [`pruning.rs`](examples/pruning.rs): Use pruning to rule out files based on statistics
 - [`query-aws-s3.rs`](examples/external_dependency/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3
 - [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` and run a query against files vi HTTP
diff --git a/datafusion-examples/examples/planner_api.rs b/datafusion-examples/examples/planner_api.rs
new file mode 100644
index 0000000000000..92b58bcee1974
--- /dev/null
+++ b/datafusion-examples/examples/planner_api.rs
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::error::Result;
+use datafusion::physical_plan::displayable;
+use datafusion::prelude::*;
+use datafusion_expr::{LogicalPlan, PlanType};
+
+/// This example demonstrates the process of converting logical plan
+/// into physical execution plans using DataFusion.
+///
+/// Planning phase in DataFusion contains several steps:
+/// 1. Analyzing and optimizing logical plan
+/// 2. Converting logical plan into physical plan
+///
+/// The code in this example shows two ways to convert a logical plan into
+/// physical plan:
+/// - Via the combined `create_physical_plan` API.
+/// - Utilizing the analyzer, optimizer, and query planner APIs separately.
+#[tokio::main]
+async fn main() -> Result<()> {
+    // Set up a DataFusion context and load a Parquet file
+    let ctx = SessionContext::new();
+    let testdata = datafusion::test_util::parquet_test_data();
+    let df = ctx
+        .read_parquet(
+            &format!("{testdata}/alltypes_plain.parquet"),
+            ParquetReadOptions::default(),
+        )
+        .await?;
+
+    // Construct the input logical plan using DataFrame API
+    let df = df
+        .clone()
+        .select(vec![
+            df.parse_sql_expr("int_col")?,
+            df.parse_sql_expr("double_col")?,
+        ])?
+        .filter(df.parse_sql_expr("int_col < 5 OR double_col = 8.0")?)?
+        .aggregate(
+            vec![df.parse_sql_expr("double_col")?],
+            vec![df.parse_sql_expr("SUM(int_col) as sum_int_col")?],
+        )?
+        .limit(0, Some(1))?;
+    let logical_plan = df.logical_plan().clone();
+
+    to_physical_plan_in_one_api_demo(&logical_plan, &ctx).await?;
+
+    to_physical_plan_step_by_step_demo(logical_plan, &ctx).await?;
+
+    Ok(())
+}
+
+/// Converts a logical plan into a physical plan using the combined
+/// `create_physical_plan` API. It will first optimize the logical
+/// plan and then convert it into physical plan.
+async fn to_physical_plan_in_one_api_demo(
+    input: &LogicalPlan,
+    ctx: &SessionContext,
+) -> Result<()> {
+    let physical_plan = ctx.state().create_physical_plan(input).await?;
+
+    println!(
+        "Physical plan direct from logical plan:\n\n{}\n\n",
+        displayable(physical_plan.as_ref())
+            .to_stringified(false, PlanType::InitialPhysicalPlan)
+            .plan
+    );
+
+    Ok(())
+}
+
+/// Converts a logical plan into a physical plan by utilizing the analyzer,
+/// optimizer, and query planner APIs separately. This flavor gives more
+/// control over the planning process.
+async fn to_physical_plan_step_by_step_demo(
+    input: LogicalPlan,
+    ctx: &SessionContext,
+) -> Result<()> {
+    // First analyze the logical plan
+    let analyzed_logical_plan = ctx.state().analyzer().execute_and_check(
+        input,
+        ctx.state().config_options(),
+        |_, _| (),
+    )?;
+    println!("Analyzed logical plan:\n\n{:?}\n\n", analyzed_logical_plan);
+
+    // Optimize the analyzed logical plan
+    let optimized_logical_plan = ctx.state().optimizer().optimize(
+        analyzed_logical_plan,
+        &ctx.state(),
+        |_, _| (),
+    )?;
+    println!(
+        "Optimized logical plan:\n\n{:?}\n\n",
+        optimized_logical_plan
+    );
+
+    // Create the physical plan
+    let physical_plan = ctx
+        .state()
+        .query_planner()
+        .create_physical_plan(&optimized_logical_plan, &ctx.state())
+        .await?;
+    println!(
+        "Final physical plan:\n\n{}\n\n",
+        displayable(physical_plan.as_ref())
+            .to_stringified(false, PlanType::InitialPhysicalPlan)
+            .plan
+    );
+
+    Ok(())
+}
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index ea7a65cc308fb..ccad0240fddbc 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -627,6 +627,21 @@ impl SessionState {
         query.sql_to_expr(sql_expr, df_schema, &mut PlannerContext::new())
     }
 
+    /// Returns the [`Analyzer`] for this session
+    pub fn analyzer(&self) -> &Analyzer {
+        &self.analyzer
+    }
+
+    /// Returns the [`Optimizer`] for this session
+    pub fn optimizer(&self) -> &Optimizer {
+        &self.optimizer
+    }
+
+    /// Returns the [`QueryPlanner`] for this session
+    pub fn query_planner(&self) -> &Arc<dyn QueryPlanner + Send + Sync> {
+        &self.query_planner
+    }
+
     /// Optimizes the logical plan by applying optimizer rules.
     pub fn optimize(&self, plan: &LogicalPlan) -> datafusion_common::Result<LogicalPlan> {
         if let LogicalPlan::Explain(e) = plan {

From abeb8b4f8d46b1852102c1da03aeb1d6fba6377f Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Wed, 31 Jul 2024 07:31:45 -0500
Subject: [PATCH 178/357] Make DefaultSchemaAdapterFactory public (#11709)

This seems like a nice self-contained API that would be useful for downstream consumers of DataFusion
---
 datafusion/core/src/datasource/schema_adapter.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/src/datasource/schema_adapter.rs b/datafusion/core/src/datasource/schema_adapter.rs
index f485c49e91097..40cb40a83af23 100644
--- a/datafusion/core/src/datasource/schema_adapter.rs
+++ b/datafusion/core/src/datasource/schema_adapter.rs
@@ -92,8 +92,10 @@ pub trait SchemaMapper: Debug + Send + Sync {
     ) -> datafusion_common::Result<RecordBatch>;
 }
 
+/// Basic implementation of [`SchemaAdapterFactory`] that maps columns by name
+/// and casts columns to the expected type.
 #[derive(Clone, Debug, Default)]
-pub(crate) struct DefaultSchemaAdapterFactory {}
+pub struct DefaultSchemaAdapterFactory {}
 
 impl SchemaAdapterFactory for DefaultSchemaAdapterFactory {
     fn create(&self, table_schema: SchemaRef) -> Box<dyn SchemaAdapter> {

From 89677ae66ec2238ffd58de8c36fe6030fad458fa Mon Sep 17 00:00:00 2001
From: kamille <caoruiqiu.crq@antgroup.com>
Date: Wed, 31 Jul 2024 20:31:59 +0800
Subject: [PATCH 179/357] Check hashes first during probing the aggr hash table
 (#11718)

* check hashes first during probing.

* fix style.

---------

Co-authored-by: xikai.wxk <xikai.wxk@antgroup.com>
---
 .../physical-plan/src/aggregates/group_values/row.rs     | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 8c2a4ba5c4977..9f05da7cff533 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -120,12 +120,13 @@ impl GroupValues for GroupValuesRows {
         batch_hashes.resize(n_rows, 0);
         create_hashes(cols, &self.random_state, batch_hashes)?;
 
-        for (row, &hash) in batch_hashes.iter().enumerate() {
-            let entry = self.map.get_mut(hash, |(_hash, group_idx)| {
+        for (row, &target_hash) in batch_hashes.iter().enumerate() {
+            let entry = self.map.get_mut(target_hash, |(exist_hash, group_idx)| {
                 // verify that a group that we are inserting with hash is
                 // actually the same key value as the group in
                 // existing_idx  (aka group_values @ row)
-                group_rows.row(row) == group_values.row(*group_idx)
+                target_hash == *exist_hash
+                    && group_rows.row(row) == group_values.row(*group_idx)
             });
 
             let group_idx = match entry {
@@ -139,7 +140,7 @@ impl GroupValues for GroupValuesRows {
 
                     // for hasher function, use precomputed hash value
                     self.map.insert_accounted(
-                        (hash, group_idx),
+                        (target_hash, group_idx),
                         |(hash, _group_index)| *hash,
                         &mut self.map_size,
                     );

From 2887491fc63c5084abf7eef644618b57172480bf Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Thu, 1 Aug 2024 00:20:50 +0800
Subject: [PATCH 180/357] fix: set `null_equals_null` to false when
 `convert_cross_join_to_inner_join` (#11738)

---
 datafusion/optimizer/src/push_down_filter.rs |  2 +-
 datafusion/sqllogictest/test_files/join.slt  | 24 +++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index ecad3404d5e03..f9c9ec961c8e1 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -1127,7 +1127,7 @@ fn convert_cross_join_to_inner_join(cross_join: CrossJoin) -> Result<Join> {
         on: vec![],
         filter: None,
         schema: DFSchemaRef::new(join_schema),
-        null_equals_null: true,
+        null_equals_null: false,
     })
 }
 
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index 84aeb3ebd7661..29f1c4384daff 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -1108,4 +1108,26 @@ statement ok
 DROP TABLE t0;
 
 statement ok
-DROP TABLE t1;
\ No newline at end of file
+DROP TABLE t1;
+
+# Test SQLancer issue: https://github.com/apache/datafusion/issues/11704
+query II
+WITH
+    t1 AS (SELECT NULL::int AS a),
+    t2 AS (SELECT NULL::int AS a)
+SELECT * FROM
+  (SELECT * FROM t1 CROSS JOIN t2)
+WHERE t1.a == t2.a
+  AND t1.a + t2.a IS NULL;
+----
+
+# Similar to above test case, but without the equality predicate
+query II
+WITH
+    t1 AS (SELECT NULL::int AS a),
+    t2 AS (SELECT NULL::int AS a)
+SELECT * FROM
+  (SELECT * FROM t1 CROSS JOIN t2)
+WHERE t1.a + t2.a IS NULL;
+----
+NULL NULL
\ No newline at end of file

From fa50636c6aec0f321212ed8678c77de25b71a8f9 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Wed, 31 Jul 2024 09:23:14 -0700
Subject: [PATCH 181/357] Implement physical plan serialization for parquet
 Copy plans (#11735)

* Implement physical plan serialization for parquet Copy plans

* fix clippy
---
 .../src/datasource/file_format/parquet.rs     |   3 +-
 .../proto/datafusion_common.proto             |   1 +
 datafusion/proto-common/src/from_proto/mod.rs |  22 +-
 .../proto-common/src/generated/pbjson.rs      | 900 +++++++++---------
 .../proto-common/src/generated/prost.rs       |  53 +-
 datafusion/proto-common/src/to_proto/mod.rs   |  29 +-
 .../src/generated/datafusion_proto_common.rs  |   5 +
 .../proto/src/logical_plan/file_formats.rs    | 264 ++++-
 .../tests/cases/roundtrip_logical_plan.rs     |  82 ++
 9 files changed, 867 insertions(+), 492 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 8a1cd2a147c73..25956665d56c4 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -93,7 +93,8 @@ const BUFFER_FLUSH_BYTES: usize = 1024000;
 #[derive(Default)]
 /// Factory struct used to create [ParquetFormat]
 pub struct ParquetFormatFactory {
-    options: Option<TableParquetOptions>,
+    /// inner options for parquet
+    pub options: Option<TableParquetOptions>,
 }
 
 impl ParquetFormatFactory {
diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto
index c59aaa2d42bbe..9268ccca0b70e 100644
--- a/datafusion/proto-common/proto/datafusion_common.proto
+++ b/datafusion/proto-common/proto/datafusion_common.proto
@@ -434,6 +434,7 @@ message JsonOptions {
 message TableParquetOptions {
   ParquetOptions global = 1;
   repeated ParquetColumnSpecificOptions column_specific_options = 2;
+  map<string, string> key_value_metadata = 3;
 }
 
 message ParquetColumnSpecificOptions {
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index 45703d8b9fed4..3487f43ae24e9 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -961,48 +961,48 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
     }
 }
 
-impl TryFrom<&protobuf::ColumnOptions> for ParquetColumnOptions {
+impl TryFrom<&protobuf::ParquetColumnOptions> for ParquetColumnOptions {
     type Error = DataFusionError;
     fn try_from(
-        value: &protobuf::ColumnOptions,
+        value: &protobuf::ParquetColumnOptions,
     ) -> datafusion_common::Result<Self, Self::Error> {
         Ok(ParquetColumnOptions {
             compression: value.compression_opt.clone().map(|opt| match opt {
-                protobuf::column_options::CompressionOpt::Compression(v) => Some(v),
+                protobuf::parquet_column_options::CompressionOpt::Compression(v) => Some(v),
             }).unwrap_or(None),
-            dictionary_enabled: value.dictionary_enabled_opt.as_ref().map(|protobuf::column_options::DictionaryEnabledOpt::DictionaryEnabled(v)| *v),
+            dictionary_enabled: value.dictionary_enabled_opt.as_ref().map(|protobuf::parquet_column_options::DictionaryEnabledOpt::DictionaryEnabled(v)| *v),
             statistics_enabled: value
                 .statistics_enabled_opt.clone()
                 .map(|opt| match opt {
-                    protobuf::column_options::StatisticsEnabledOpt::StatisticsEnabled(v) => Some(v),
+                    protobuf::parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled(v) => Some(v),
                 })
                 .unwrap_or(None),
             max_statistics_size: value
                 .max_statistics_size_opt.clone()
                 .map(|opt| match opt {
-                    protobuf::column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => Some(v as usize),
+                    protobuf::parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => Some(v as usize),
                 })
                 .unwrap_or(None),
             encoding: value
                 .encoding_opt.clone()
                 .map(|opt| match opt {
-                    protobuf::column_options::EncodingOpt::Encoding(v) => Some(v),
+                    protobuf::parquet_column_options::EncodingOpt::Encoding(v) => Some(v),
                 })
                 .unwrap_or(None),
             bloom_filter_enabled: value.bloom_filter_enabled_opt.clone().map(|opt| match opt {
-                protobuf::column_options::BloomFilterEnabledOpt::BloomFilterEnabled(v) => Some(v),
+                protobuf::parquet_column_options::BloomFilterEnabledOpt::BloomFilterEnabled(v) => Some(v),
             })
                 .unwrap_or(None),
             bloom_filter_fpp: value
                 .bloom_filter_fpp_opt.clone()
                 .map(|opt| match opt {
-                    protobuf::column_options::BloomFilterFppOpt::BloomFilterFpp(v) => Some(v),
+                    protobuf::parquet_column_options::BloomFilterFppOpt::BloomFilterFpp(v) => Some(v),
                 })
                 .unwrap_or(None),
             bloom_filter_ndv: value
                 .bloom_filter_ndv_opt.clone()
                 .map(|opt| match opt {
-                    protobuf::column_options::BloomFilterNdvOpt::BloomFilterNdv(v) => Some(v),
+                    protobuf::parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv(v) => Some(v),
                 })
                 .unwrap_or(None),
         })
@@ -1016,7 +1016,7 @@ impl TryFrom<&protobuf::TableParquetOptions> for TableParquetOptions {
     ) -> datafusion_common::Result<Self, Self::Error> {
         let mut column_specific_options: HashMap<String, ParquetColumnOptions> =
             HashMap::new();
-        for protobuf::ColumnSpecificOptions {
+        for protobuf::ParquetColumnSpecificOptions {
             column_name,
             options: maybe_options,
         } in &value.column_specific_options
diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs
index 23dd5746929d8..05e57f5585a6a 100644
--- a/datafusion/proto-common/src/generated/pbjson.rs
+++ b/datafusion/proto-common/src/generated/pbjson.rs
@@ -880,347 +880,7 @@ impl<'de> serde::Deserialize<'de> for Column {
         deserializer.deserialize_struct("datafusion_common.Column", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for ColumnOptions {
-    #[allow(deprecated)]
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeStruct;
-        let mut len = 0;
-        if self.bloom_filter_enabled_opt.is_some() {
-            len += 1;
-        }
-        if self.encoding_opt.is_some() {
-            len += 1;
-        }
-        if self.dictionary_enabled_opt.is_some() {
-            len += 1;
-        }
-        if self.compression_opt.is_some() {
-            len += 1;
-        }
-        if self.statistics_enabled_opt.is_some() {
-            len += 1;
-        }
-        if self.bloom_filter_fpp_opt.is_some() {
-            len += 1;
-        }
-        if self.bloom_filter_ndv_opt.is_some() {
-            len += 1;
-        }
-        if self.max_statistics_size_opt.is_some() {
-            len += 1;
-        }
-        let mut struct_ser = serializer.serialize_struct("datafusion_common.ColumnOptions", len)?;
-        if let Some(v) = self.bloom_filter_enabled_opt.as_ref() {
-            match v {
-                column_options::BloomFilterEnabledOpt::BloomFilterEnabled(v) => {
-                    struct_ser.serialize_field("bloomFilterEnabled", v)?;
-                }
-            }
-        }
-        if let Some(v) = self.encoding_opt.as_ref() {
-            match v {
-                column_options::EncodingOpt::Encoding(v) => {
-                    struct_ser.serialize_field("encoding", v)?;
-                }
-            }
-        }
-        if let Some(v) = self.dictionary_enabled_opt.as_ref() {
-            match v {
-                column_options::DictionaryEnabledOpt::DictionaryEnabled(v) => {
-                    struct_ser.serialize_field("dictionaryEnabled", v)?;
-                }
-            }
-        }
-        if let Some(v) = self.compression_opt.as_ref() {
-            match v {
-                column_options::CompressionOpt::Compression(v) => {
-                    struct_ser.serialize_field("compression", v)?;
-                }
-            }
-        }
-        if let Some(v) = self.statistics_enabled_opt.as_ref() {
-            match v {
-                column_options::StatisticsEnabledOpt::StatisticsEnabled(v) => {
-                    struct_ser.serialize_field("statisticsEnabled", v)?;
-                }
-            }
-        }
-        if let Some(v) = self.bloom_filter_fpp_opt.as_ref() {
-            match v {
-                column_options::BloomFilterFppOpt::BloomFilterFpp(v) => {
-                    struct_ser.serialize_field("bloomFilterFpp", v)?;
-                }
-            }
-        }
-        if let Some(v) = self.bloom_filter_ndv_opt.as_ref() {
-            match v {
-                column_options::BloomFilterNdvOpt::BloomFilterNdv(v) => {
-                    #[allow(clippy::needless_borrow)]
-                    struct_ser.serialize_field("bloomFilterNdv", ToString::to_string(&v).as_str())?;
-                }
-            }
-        }
-        if let Some(v) = self.max_statistics_size_opt.as_ref() {
-            match v {
-                column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => {
-                    struct_ser.serialize_field("maxStatisticsSize", v)?;
-                }
-            }
-        }
-        struct_ser.end()
-    }
-}
-impl<'de> serde::Deserialize<'de> for ColumnOptions {
-    #[allow(deprecated)]
-    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        const FIELDS: &[&str] = &[
-            "bloom_filter_enabled",
-            "bloomFilterEnabled",
-            "encoding",
-            "dictionary_enabled",
-            "dictionaryEnabled",
-            "compression",
-            "statistics_enabled",
-            "statisticsEnabled",
-            "bloom_filter_fpp",
-            "bloomFilterFpp",
-            "bloom_filter_ndv",
-            "bloomFilterNdv",
-            "max_statistics_size",
-            "maxStatisticsSize",
-        ];
-
-        #[allow(clippy::enum_variant_names)]
-        enum GeneratedField {
-            BloomFilterEnabled,
-            Encoding,
-            DictionaryEnabled,
-            Compression,
-            StatisticsEnabled,
-            BloomFilterFpp,
-            BloomFilterNdv,
-            MaxStatisticsSize,
-        }
-        impl<'de> serde::Deserialize<'de> for GeneratedField {
-            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
-            where
-                D: serde::Deserializer<'de>,
-            {
-                struct GeneratedVisitor;
-
-                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-                    type Value = GeneratedField;
-
-                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                        write!(formatter, "expected one of: {:?}", &FIELDS)
-                    }
-
-                    #[allow(unused_variables)]
-                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
-                    where
-                        E: serde::de::Error,
-                    {
-                        match value {
-                            "bloomFilterEnabled" | "bloom_filter_enabled" => Ok(GeneratedField::BloomFilterEnabled),
-                            "encoding" => Ok(GeneratedField::Encoding),
-                            "dictionaryEnabled" | "dictionary_enabled" => Ok(GeneratedField::DictionaryEnabled),
-                            "compression" => Ok(GeneratedField::Compression),
-                            "statisticsEnabled" | "statistics_enabled" => Ok(GeneratedField::StatisticsEnabled),
-                            "bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp),
-                            "bloomFilterNdv" | "bloom_filter_ndv" => Ok(GeneratedField::BloomFilterNdv),
-                            "maxStatisticsSize" | "max_statistics_size" => Ok(GeneratedField::MaxStatisticsSize),
-                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
-                        }
-                    }
-                }
-                deserializer.deserialize_identifier(GeneratedVisitor)
-            }
-        }
-        struct GeneratedVisitor;
-        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = ColumnOptions;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion_common.ColumnOptions")
-            }
-
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ColumnOptions, V::Error>
-                where
-                    V: serde::de::MapAccess<'de>,
-            {
-                let mut bloom_filter_enabled_opt__ = None;
-                let mut encoding_opt__ = None;
-                let mut dictionary_enabled_opt__ = None;
-                let mut compression_opt__ = None;
-                let mut statistics_enabled_opt__ = None;
-                let mut bloom_filter_fpp_opt__ = None;
-                let mut bloom_filter_ndv_opt__ = None;
-                let mut max_statistics_size_opt__ = None;
-                while let Some(k) = map_.next_key()? {
-                    match k {
-                        GeneratedField::BloomFilterEnabled => {
-                            if bloom_filter_enabled_opt__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("bloomFilterEnabled"));
-                            }
-                            bloom_filter_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(column_options::BloomFilterEnabledOpt::BloomFilterEnabled);
-                        }
-                        GeneratedField::Encoding => {
-                            if encoding_opt__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("encoding"));
-                            }
-                            encoding_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(column_options::EncodingOpt::Encoding);
-                        }
-                        GeneratedField::DictionaryEnabled => {
-                            if dictionary_enabled_opt__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("dictionaryEnabled"));
-                            }
-                            dictionary_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(column_options::DictionaryEnabledOpt::DictionaryEnabled);
-                        }
-                        GeneratedField::Compression => {
-                            if compression_opt__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("compression"));
-                            }
-                            compression_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(column_options::CompressionOpt::Compression);
-                        }
-                        GeneratedField::StatisticsEnabled => {
-                            if statistics_enabled_opt__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("statisticsEnabled"));
-                            }
-                            statistics_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(column_options::StatisticsEnabledOpt::StatisticsEnabled);
-                        }
-                        GeneratedField::BloomFilterFpp => {
-                            if bloom_filter_fpp_opt__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("bloomFilterFpp"));
-                            }
-                            bloom_filter_fpp_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| column_options::BloomFilterFppOpt::BloomFilterFpp(x.0));
-                        }
-                        GeneratedField::BloomFilterNdv => {
-                            if bloom_filter_ndv_opt__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("bloomFilterNdv"));
-                            }
-                            bloom_filter_ndv_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| column_options::BloomFilterNdvOpt::BloomFilterNdv(x.0));
-                        }
-                        GeneratedField::MaxStatisticsSize => {
-                            if max_statistics_size_opt__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("maxStatisticsSize"));
-                            }
-                            max_statistics_size_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(x.0));
-                        }
-                    }
-                }
-                Ok(ColumnOptions {
-                    bloom_filter_enabled_opt: bloom_filter_enabled_opt__,
-                    encoding_opt: encoding_opt__,
-                    dictionary_enabled_opt: dictionary_enabled_opt__,
-                    compression_opt: compression_opt__,
-                    statistics_enabled_opt: statistics_enabled_opt__,
-                    bloom_filter_fpp_opt: bloom_filter_fpp_opt__,
-                    bloom_filter_ndv_opt: bloom_filter_ndv_opt__,
-                    max_statistics_size_opt: max_statistics_size_opt__,
-                })
-            }
-        }
-        deserializer.deserialize_struct("datafusion_common.ColumnOptions", FIELDS, GeneratedVisitor)
-    }
-}
-impl serde::Serialize for ColumnRelation {
-    #[allow(deprecated)]
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeStruct;
-        let mut len = 0;
-        if !self.relation.is_empty() {
-            len += 1;
-        }
-        let mut struct_ser = serializer.serialize_struct("datafusion_common.ColumnRelation", len)?;
-        if !self.relation.is_empty() {
-            struct_ser.serialize_field("relation", &self.relation)?;
-        }
-        struct_ser.end()
-    }
-}
-impl<'de> serde::Deserialize<'de> for ColumnRelation {
-    #[allow(deprecated)]
-    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        const FIELDS: &[&str] = &[
-            "relation",
-        ];
-
-        #[allow(clippy::enum_variant_names)]
-        enum GeneratedField {
-            Relation,
-        }
-        impl<'de> serde::Deserialize<'de> for GeneratedField {
-            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
-            where
-                D: serde::Deserializer<'de>,
-            {
-                struct GeneratedVisitor;
-
-                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-                    type Value = GeneratedField;
-
-                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                        write!(formatter, "expected one of: {:?}", &FIELDS)
-                    }
-
-                    #[allow(unused_variables)]
-                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
-                    where
-                        E: serde::de::Error,
-                    {
-                        match value {
-                            "relation" => Ok(GeneratedField::Relation),
-                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
-                        }
-                    }
-                }
-                deserializer.deserialize_identifier(GeneratedVisitor)
-            }
-        }
-        struct GeneratedVisitor;
-        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = ColumnRelation;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion_common.ColumnRelation")
-            }
-
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ColumnRelation, V::Error>
-                where
-                    V: serde::de::MapAccess<'de>,
-            {
-                let mut relation__ = None;
-                while let Some(k) = map_.next_key()? {
-                    match k {
-                        GeneratedField::Relation => {
-                            if relation__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("relation"));
-                            }
-                            relation__ = Some(map_.next_value()?);
-                        }
-                    }
-                }
-                Ok(ColumnRelation {
-                    relation: relation__.unwrap_or_default(),
-                })
-            }
-        }
-        deserializer.deserialize_struct("datafusion_common.ColumnRelation", FIELDS, GeneratedVisitor)
-    }
-}
-impl serde::Serialize for ColumnSpecificOptions {
+impl serde::Serialize for ColumnRelation {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
     where
@@ -1228,38 +888,29 @@ impl serde::Serialize for ColumnSpecificOptions {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
-        if !self.column_name.is_empty() {
-            len += 1;
-        }
-        if self.options.is_some() {
+        if !self.relation.is_empty() {
             len += 1;
         }
-        let mut struct_ser = serializer.serialize_struct("datafusion_common.ColumnSpecificOptions", len)?;
-        if !self.column_name.is_empty() {
-            struct_ser.serialize_field("columnName", &self.column_name)?;
-        }
-        if let Some(v) = self.options.as_ref() {
-            struct_ser.serialize_field("options", v)?;
+        let mut struct_ser = serializer.serialize_struct("datafusion_common.ColumnRelation", len)?;
+        if !self.relation.is_empty() {
+            struct_ser.serialize_field("relation", &self.relation)?;
         }
         struct_ser.end()
     }
 }
-impl<'de> serde::Deserialize<'de> for ColumnSpecificOptions {
+impl<'de> serde::Deserialize<'de> for ColumnRelation {
     #[allow(deprecated)]
     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
     where
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
-            "column_name",
-            "columnName",
-            "options",
+            "relation",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
-            ColumnName,
-            Options,
+            Relation,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -1281,8 +932,7 @@ impl<'de> serde::Deserialize<'de> for ColumnSpecificOptions {
                         E: serde::de::Error,
                     {
                         match value {
-                            "columnName" | "column_name" => Ok(GeneratedField::ColumnName),
-                            "options" => Ok(GeneratedField::Options),
+                            "relation" => Ok(GeneratedField::Relation),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -1292,41 +942,33 @@ impl<'de> serde::Deserialize<'de> for ColumnSpecificOptions {
         }
         struct GeneratedVisitor;
         impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = ColumnSpecificOptions;
+            type Value = ColumnRelation;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion_common.ColumnSpecificOptions")
+                formatter.write_str("struct datafusion_common.ColumnRelation")
             }
 
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ColumnSpecificOptions, V::Error>
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ColumnRelation, V::Error>
                 where
                     V: serde::de::MapAccess<'de>,
             {
-                let mut column_name__ = None;
-                let mut options__ = None;
+                let mut relation__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
-                        GeneratedField::ColumnName => {
-                            if column_name__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("columnName"));
-                            }
-                            column_name__ = Some(map_.next_value()?);
-                        }
-                        GeneratedField::Options => {
-                            if options__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("options"));
+                        GeneratedField::Relation => {
+                            if relation__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("relation"));
                             }
-                            options__ = map_.next_value()?;
+                            relation__ = Some(map_.next_value()?);
                         }
                     }
                 }
-                Ok(ColumnSpecificOptions {
-                    column_name: column_name__.unwrap_or_default(),
-                    options: options__,
+                Ok(ColumnRelation {
+                    relation: relation__.unwrap_or_default(),
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion_common.ColumnSpecificOptions", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion_common.ColumnRelation", FIELDS, GeneratedVisitor)
     }
 }
 impl serde::Serialize for ColumnStats {
@@ -4448,30 +4090,231 @@ impl serde::Serialize for List {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
-        if self.field_type.is_some() {
+        if self.field_type.is_some() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion_common.List", len)?;
+        if let Some(v) = self.field_type.as_ref() {
+            struct_ser.serialize_field("fieldType", v)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for List {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "field_type",
+            "fieldType",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            FieldType,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "fieldType" | "field_type" => Ok(GeneratedField::FieldType),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = List;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion_common.List")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<List, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut field_type__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::FieldType => {
+                            if field_type__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("fieldType"));
+                            }
+                            field_type__ = map_.next_value()?;
+                        }
+                    }
+                }
+                Ok(List {
+                    field_type: field_type__,
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion_common.List", FIELDS, GeneratedVisitor)
+    }
+}
+impl serde::Serialize for Map {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.field_type.is_some() {
+            len += 1;
+        }
+        if self.keys_sorted {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion_common.Map", len)?;
+        if let Some(v) = self.field_type.as_ref() {
+            struct_ser.serialize_field("fieldType", v)?;
+        }
+        if self.keys_sorted {
+            struct_ser.serialize_field("keysSorted", &self.keys_sorted)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for Map {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "field_type",
+            "fieldType",
+            "keys_sorted",
+            "keysSorted",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            FieldType,
+            KeysSorted,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "fieldType" | "field_type" => Ok(GeneratedField::FieldType),
+                            "keysSorted" | "keys_sorted" => Ok(GeneratedField::KeysSorted),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = Map;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion_common.Map")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<Map, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut field_type__ = None;
+                let mut keys_sorted__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::FieldType => {
+                            if field_type__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("fieldType"));
+                            }
+                            field_type__ = map_.next_value()?;
+                        }
+                        GeneratedField::KeysSorted => {
+                            if keys_sorted__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("keysSorted"));
+                            }
+                            keys_sorted__ = Some(map_.next_value()?);
+                        }
+                    }
+                }
+                Ok(Map {
+                    field_type: field_type__,
+                    keys_sorted: keys_sorted__.unwrap_or_default(),
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion_common.Map", FIELDS, GeneratedVisitor)
+    }
+}
+impl serde::Serialize for NdJsonFormat {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.options.is_some() {
             len += 1;
         }
-        let mut struct_ser = serializer.serialize_struct("datafusion_common.List", len)?;
-        if let Some(v) = self.field_type.as_ref() {
-            struct_ser.serialize_field("fieldType", v)?;
+        let mut struct_ser = serializer.serialize_struct("datafusion_common.NdJsonFormat", len)?;
+        if let Some(v) = self.options.as_ref() {
+            struct_ser.serialize_field("options", v)?;
         }
         struct_ser.end()
     }
 }
-impl<'de> serde::Deserialize<'de> for List {
+impl<'de> serde::Deserialize<'de> for NdJsonFormat {
     #[allow(deprecated)]
     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
     where
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
-            "field_type",
-            "fieldType",
+            "options",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
-            FieldType,
+            Options,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -4493,7 +4336,7 @@ impl<'de> serde::Deserialize<'de> for List {
                         E: serde::de::Error,
                     {
                         match value {
-                            "fieldType" | "field_type" => Ok(GeneratedField::FieldType),
+                            "options" => Ok(GeneratedField::Options),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -4503,36 +4346,36 @@ impl<'de> serde::Deserialize<'de> for List {
         }
         struct GeneratedVisitor;
         impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = List;
+            type Value = NdJsonFormat;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion_common.List")
+                formatter.write_str("struct datafusion_common.NdJsonFormat")
             }
 
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<List, V::Error>
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<NdJsonFormat, V::Error>
                 where
                     V: serde::de::MapAccess<'de>,
             {
-                let mut field_type__ = None;
+                let mut options__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
-                        GeneratedField::FieldType => {
-                            if field_type__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("fieldType"));
+                        GeneratedField::Options => {
+                            if options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("options"));
                             }
-                            field_type__ = map_.next_value()?;
+                            options__ = map_.next_value()?;
                         }
                     }
                 }
-                Ok(List {
-                    field_type: field_type__,
+                Ok(NdJsonFormat {
+                    options: options__,
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion_common.List", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion_common.NdJsonFormat", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for Map {
+impl serde::Serialize for ParquetColumnOptions {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
     where
@@ -4540,39 +4383,124 @@ impl serde::Serialize for Map {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
-        if self.field_type.is_some() {
+        if self.bloom_filter_enabled_opt.is_some() {
             len += 1;
         }
-        if self.keys_sorted {
+        if self.encoding_opt.is_some() {
             len += 1;
         }
-        let mut struct_ser = serializer.serialize_struct("datafusion_common.Map", len)?;
-        if let Some(v) = self.field_type.as_ref() {
-            struct_ser.serialize_field("fieldType", v)?;
+        if self.dictionary_enabled_opt.is_some() {
+            len += 1;
         }
-        if self.keys_sorted {
-            struct_ser.serialize_field("keysSorted", &self.keys_sorted)?;
+        if self.compression_opt.is_some() {
+            len += 1;
+        }
+        if self.statistics_enabled_opt.is_some() {
+            len += 1;
+        }
+        if self.bloom_filter_fpp_opt.is_some() {
+            len += 1;
+        }
+        if self.bloom_filter_ndv_opt.is_some() {
+            len += 1;
+        }
+        if self.max_statistics_size_opt.is_some() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion_common.ParquetColumnOptions", len)?;
+        if let Some(v) = self.bloom_filter_enabled_opt.as_ref() {
+            match v {
+                parquet_column_options::BloomFilterEnabledOpt::BloomFilterEnabled(v) => {
+                    struct_ser.serialize_field("bloomFilterEnabled", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.encoding_opt.as_ref() {
+            match v {
+                parquet_column_options::EncodingOpt::Encoding(v) => {
+                    struct_ser.serialize_field("encoding", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.dictionary_enabled_opt.as_ref() {
+            match v {
+                parquet_column_options::DictionaryEnabledOpt::DictionaryEnabled(v) => {
+                    struct_ser.serialize_field("dictionaryEnabled", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.compression_opt.as_ref() {
+            match v {
+                parquet_column_options::CompressionOpt::Compression(v) => {
+                    struct_ser.serialize_field("compression", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.statistics_enabled_opt.as_ref() {
+            match v {
+                parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled(v) => {
+                    struct_ser.serialize_field("statisticsEnabled", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.bloom_filter_fpp_opt.as_ref() {
+            match v {
+                parquet_column_options::BloomFilterFppOpt::BloomFilterFpp(v) => {
+                    struct_ser.serialize_field("bloomFilterFpp", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.bloom_filter_ndv_opt.as_ref() {
+            match v {
+                parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv(v) => {
+                    #[allow(clippy::needless_borrow)]
+                    struct_ser.serialize_field("bloomFilterNdv", ToString::to_string(&v).as_str())?;
+                }
+            }
+        }
+        if let Some(v) = self.max_statistics_size_opt.as_ref() {
+            match v {
+                parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => {
+                    struct_ser.serialize_field("maxStatisticsSize", v)?;
+                }
+            }
         }
         struct_ser.end()
     }
 }
-impl<'de> serde::Deserialize<'de> for Map {
+impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
     #[allow(deprecated)]
     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
     where
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
-            "field_type",
-            "fieldType",
-            "keys_sorted",
-            "keysSorted",
+            "bloom_filter_enabled",
+            "bloomFilterEnabled",
+            "encoding",
+            "dictionary_enabled",
+            "dictionaryEnabled",
+            "compression",
+            "statistics_enabled",
+            "statisticsEnabled",
+            "bloom_filter_fpp",
+            "bloomFilterFpp",
+            "bloom_filter_ndv",
+            "bloomFilterNdv",
+            "max_statistics_size",
+            "maxStatisticsSize",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
-            FieldType,
-            KeysSorted,
+            BloomFilterEnabled,
+            Encoding,
+            DictionaryEnabled,
+            Compression,
+            StatisticsEnabled,
+            BloomFilterFpp,
+            BloomFilterNdv,
+            MaxStatisticsSize,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -4594,8 +4522,14 @@ impl<'de> serde::Deserialize<'de> for Map {
                         E: serde::de::Error,
                     {
                         match value {
-                            "fieldType" | "field_type" => Ok(GeneratedField::FieldType),
-                            "keysSorted" | "keys_sorted" => Ok(GeneratedField::KeysSorted),
+                            "bloomFilterEnabled" | "bloom_filter_enabled" => Ok(GeneratedField::BloomFilterEnabled),
+                            "encoding" => Ok(GeneratedField::Encoding),
+                            "dictionaryEnabled" | "dictionary_enabled" => Ok(GeneratedField::DictionaryEnabled),
+                            "compression" => Ok(GeneratedField::Compression),
+                            "statisticsEnabled" | "statistics_enabled" => Ok(GeneratedField::StatisticsEnabled),
+                            "bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp),
+                            "bloomFilterNdv" | "bloom_filter_ndv" => Ok(GeneratedField::BloomFilterNdv),
+                            "maxStatisticsSize" | "max_statistics_size" => Ok(GeneratedField::MaxStatisticsSize),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -4605,44 +4539,92 @@ impl<'de> serde::Deserialize<'de> for Map {
         }
         struct GeneratedVisitor;
         impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = Map;
+            type Value = ParquetColumnOptions;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion_common.Map")
+                formatter.write_str("struct datafusion_common.ParquetColumnOptions")
             }
 
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<Map, V::Error>
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ParquetColumnOptions, V::Error>
                 where
                     V: serde::de::MapAccess<'de>,
             {
-                let mut field_type__ = None;
-                let mut keys_sorted__ = None;
+                let mut bloom_filter_enabled_opt__ = None;
+                let mut encoding_opt__ = None;
+                let mut dictionary_enabled_opt__ = None;
+                let mut compression_opt__ = None;
+                let mut statistics_enabled_opt__ = None;
+                let mut bloom_filter_fpp_opt__ = None;
+                let mut bloom_filter_ndv_opt__ = None;
+                let mut max_statistics_size_opt__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
-                        GeneratedField::FieldType => {
-                            if field_type__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("fieldType"));
+                        GeneratedField::BloomFilterEnabled => {
+                            if bloom_filter_enabled_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("bloomFilterEnabled"));
                             }
-                            field_type__ = map_.next_value()?;
+                            bloom_filter_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_column_options::BloomFilterEnabledOpt::BloomFilterEnabled);
                         }
-                        GeneratedField::KeysSorted => {
-                            if keys_sorted__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("keysSorted"));
+                        GeneratedField::Encoding => {
+                            if encoding_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("encoding"));
                             }
-                            keys_sorted__ = Some(map_.next_value()?);
+                            encoding_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_column_options::EncodingOpt::Encoding);
+                        }
+                        GeneratedField::DictionaryEnabled => {
+                            if dictionary_enabled_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("dictionaryEnabled"));
+                            }
+                            dictionary_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_column_options::DictionaryEnabledOpt::DictionaryEnabled);
+                        }
+                        GeneratedField::Compression => {
+                            if compression_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("compression"));
+                            }
+                            compression_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_column_options::CompressionOpt::Compression);
+                        }
+                        GeneratedField::StatisticsEnabled => {
+                            if statistics_enabled_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("statisticsEnabled"));
+                            }
+                            statistics_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled);
+                        }
+                        GeneratedField::BloomFilterFpp => {
+                            if bloom_filter_fpp_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("bloomFilterFpp"));
+                            }
+                            bloom_filter_fpp_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_column_options::BloomFilterFppOpt::BloomFilterFpp(x.0));
+                        }
+                        GeneratedField::BloomFilterNdv => {
+                            if bloom_filter_ndv_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("bloomFilterNdv"));
+                            }
+                            bloom_filter_ndv_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv(x.0));
+                        }
+                        GeneratedField::MaxStatisticsSize => {
+                            if max_statistics_size_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("maxStatisticsSize"));
+                            }
+                            max_statistics_size_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(x.0));
                         }
                     }
                 }
-                Ok(Map {
-                    field_type: field_type__,
-                    keys_sorted: keys_sorted__.unwrap_or_default(),
+                Ok(ParquetColumnOptions {
+                    bloom_filter_enabled_opt: bloom_filter_enabled_opt__,
+                    encoding_opt: encoding_opt__,
+                    dictionary_enabled_opt: dictionary_enabled_opt__,
+                    compression_opt: compression_opt__,
+                    statistics_enabled_opt: statistics_enabled_opt__,
+                    bloom_filter_fpp_opt: bloom_filter_fpp_opt__,
+                    bloom_filter_ndv_opt: bloom_filter_ndv_opt__,
+                    max_statistics_size_opt: max_statistics_size_opt__,
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion_common.Map", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion_common.ParquetColumnOptions", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for NdJsonFormat {
+impl serde::Serialize for ParquetColumnSpecificOptions {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
     where
@@ -4650,28 +4632,37 @@ impl serde::Serialize for NdJsonFormat {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
+        if !self.column_name.is_empty() {
+            len += 1;
+        }
         if self.options.is_some() {
             len += 1;
         }
-        let mut struct_ser = serializer.serialize_struct("datafusion_common.NdJsonFormat", len)?;
+        let mut struct_ser = serializer.serialize_struct("datafusion_common.ParquetColumnSpecificOptions", len)?;
+        if !self.column_name.is_empty() {
+            struct_ser.serialize_field("columnName", &self.column_name)?;
+        }
         if let Some(v) = self.options.as_ref() {
             struct_ser.serialize_field("options", v)?;
         }
         struct_ser.end()
     }
 }
-impl<'de> serde::Deserialize<'de> for NdJsonFormat {
+impl<'de> serde::Deserialize<'de> for ParquetColumnSpecificOptions {
     #[allow(deprecated)]
     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
     where
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
+            "column_name",
+            "columnName",
             "options",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
+            ColumnName,
             Options,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
@@ -4694,6 +4685,7 @@ impl<'de> serde::Deserialize<'de> for NdJsonFormat {
                         E: serde::de::Error,
                     {
                         match value {
+                            "columnName" | "column_name" => Ok(GeneratedField::ColumnName),
                             "options" => Ok(GeneratedField::Options),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
@@ -4704,19 +4696,26 @@ impl<'de> serde::Deserialize<'de> for NdJsonFormat {
         }
         struct GeneratedVisitor;
         impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = NdJsonFormat;
+            type Value = ParquetColumnSpecificOptions;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion_common.NdJsonFormat")
+                formatter.write_str("struct datafusion_common.ParquetColumnSpecificOptions")
             }
 
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<NdJsonFormat, V::Error>
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ParquetColumnSpecificOptions, V::Error>
                 where
                     V: serde::de::MapAccess<'de>,
             {
+                let mut column_name__ = None;
                 let mut options__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
+                        GeneratedField::ColumnName => {
+                            if column_name__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("columnName"));
+                            }
+                            column_name__ = Some(map_.next_value()?);
+                        }
                         GeneratedField::Options => {
                             if options__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("options"));
@@ -4725,12 +4724,13 @@ impl<'de> serde::Deserialize<'de> for NdJsonFormat {
                         }
                     }
                 }
-                Ok(NdJsonFormat {
+                Ok(ParquetColumnSpecificOptions {
+                    column_name: column_name__.unwrap_or_default(),
                     options: options__,
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion_common.NdJsonFormat", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion_common.ParquetColumnSpecificOptions", FIELDS, GeneratedVisitor)
     }
 }
 impl serde::Serialize for ParquetFormat {
@@ -7558,6 +7558,9 @@ impl serde::Serialize for TableParquetOptions {
         if !self.column_specific_options.is_empty() {
             len += 1;
         }
+        if !self.key_value_metadata.is_empty() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion_common.TableParquetOptions", len)?;
         if let Some(v) = self.global.as_ref() {
             struct_ser.serialize_field("global", v)?;
@@ -7565,6 +7568,9 @@ impl serde::Serialize for TableParquetOptions {
         if !self.column_specific_options.is_empty() {
             struct_ser.serialize_field("columnSpecificOptions", &self.column_specific_options)?;
         }
+        if !self.key_value_metadata.is_empty() {
+            struct_ser.serialize_field("keyValueMetadata", &self.key_value_metadata)?;
+        }
         struct_ser.end()
     }
 }
@@ -7578,12 +7584,15 @@ impl<'de> serde::Deserialize<'de> for TableParquetOptions {
             "global",
             "column_specific_options",
             "columnSpecificOptions",
+            "key_value_metadata",
+            "keyValueMetadata",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
             Global,
             ColumnSpecificOptions,
+            KeyValueMetadata,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -7607,6 +7616,7 @@ impl<'de> serde::Deserialize<'de> for TableParquetOptions {
                         match value {
                             "global" => Ok(GeneratedField::Global),
                             "columnSpecificOptions" | "column_specific_options" => Ok(GeneratedField::ColumnSpecificOptions),
+                            "keyValueMetadata" | "key_value_metadata" => Ok(GeneratedField::KeyValueMetadata),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -7628,6 +7638,7 @@ impl<'de> serde::Deserialize<'de> for TableParquetOptions {
             {
                 let mut global__ = None;
                 let mut column_specific_options__ = None;
+                let mut key_value_metadata__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Global => {
@@ -7642,11 +7653,20 @@ impl<'de> serde::Deserialize<'de> for TableParquetOptions {
                             }
                             column_specific_options__ = Some(map_.next_value()?);
                         }
+                        GeneratedField::KeyValueMetadata => {
+                            if key_value_metadata__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("keyValueMetadata"));
+                            }
+                            key_value_metadata__ = Some(
+                                map_.next_value::<std::collections::HashMap<_, _>>()?
+                            );
+                        }
                     }
                 }
                 Ok(TableParquetOptions {
                     global: global__,
                     column_specific_options: column_specific_options__.unwrap_or_default(),
+                    key_value_metadata: key_value_metadata__.unwrap_or_default(),
                 })
             }
         }
diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs
index 9bea9be89e1d3..ebc05718a458a 100644
--- a/datafusion/proto-common/src/generated/prost.rs
+++ b/datafusion/proto-common/src/generated/prost.rs
@@ -670,46 +670,55 @@ pub struct TableParquetOptions {
     #[prost(message, optional, tag = "1")]
     pub global: ::core::option::Option<ParquetOptions>,
     #[prost(message, repeated, tag = "2")]
-    pub column_specific_options: ::prost::alloc::vec::Vec<ColumnSpecificOptions>,
+    pub column_specific_options: ::prost::alloc::vec::Vec<ParquetColumnSpecificOptions>,
+    #[prost(map = "string, string", tag = "3")]
+    pub key_value_metadata: ::std::collections::HashMap<
+        ::prost::alloc::string::String,
+        ::prost::alloc::string::String,
+    >,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ColumnSpecificOptions {
+pub struct ParquetColumnSpecificOptions {
     #[prost(string, tag = "1")]
     pub column_name: ::prost::alloc::string::String,
     #[prost(message, optional, tag = "2")]
-    pub options: ::core::option::Option<ColumnOptions>,
+    pub options: ::core::option::Option<ParquetColumnOptions>,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ColumnOptions {
-    #[prost(oneof = "column_options::BloomFilterEnabledOpt", tags = "1")]
+pub struct ParquetColumnOptions {
+    #[prost(oneof = "parquet_column_options::BloomFilterEnabledOpt", tags = "1")]
     pub bloom_filter_enabled_opt: ::core::option::Option<
-        column_options::BloomFilterEnabledOpt,
+        parquet_column_options::BloomFilterEnabledOpt,
     >,
-    #[prost(oneof = "column_options::EncodingOpt", tags = "2")]
-    pub encoding_opt: ::core::option::Option<column_options::EncodingOpt>,
-    #[prost(oneof = "column_options::DictionaryEnabledOpt", tags = "3")]
+    #[prost(oneof = "parquet_column_options::EncodingOpt", tags = "2")]
+    pub encoding_opt: ::core::option::Option<parquet_column_options::EncodingOpt>,
+    #[prost(oneof = "parquet_column_options::DictionaryEnabledOpt", tags = "3")]
     pub dictionary_enabled_opt: ::core::option::Option<
-        column_options::DictionaryEnabledOpt,
+        parquet_column_options::DictionaryEnabledOpt,
     >,
-    #[prost(oneof = "column_options::CompressionOpt", tags = "4")]
-    pub compression_opt: ::core::option::Option<column_options::CompressionOpt>,
-    #[prost(oneof = "column_options::StatisticsEnabledOpt", tags = "5")]
+    #[prost(oneof = "parquet_column_options::CompressionOpt", tags = "4")]
+    pub compression_opt: ::core::option::Option<parquet_column_options::CompressionOpt>,
+    #[prost(oneof = "parquet_column_options::StatisticsEnabledOpt", tags = "5")]
     pub statistics_enabled_opt: ::core::option::Option<
-        column_options::StatisticsEnabledOpt,
+        parquet_column_options::StatisticsEnabledOpt,
+    >,
+    #[prost(oneof = "parquet_column_options::BloomFilterFppOpt", tags = "6")]
+    pub bloom_filter_fpp_opt: ::core::option::Option<
+        parquet_column_options::BloomFilterFppOpt,
+    >,
+    #[prost(oneof = "parquet_column_options::BloomFilterNdvOpt", tags = "7")]
+    pub bloom_filter_ndv_opt: ::core::option::Option<
+        parquet_column_options::BloomFilterNdvOpt,
     >,
-    #[prost(oneof = "column_options::BloomFilterFppOpt", tags = "6")]
-    pub bloom_filter_fpp_opt: ::core::option::Option<column_options::BloomFilterFppOpt>,
-    #[prost(oneof = "column_options::BloomFilterNdvOpt", tags = "7")]
-    pub bloom_filter_ndv_opt: ::core::option::Option<column_options::BloomFilterNdvOpt>,
-    #[prost(oneof = "column_options::MaxStatisticsSizeOpt", tags = "8")]
+    #[prost(oneof = "parquet_column_options::MaxStatisticsSizeOpt", tags = "8")]
     pub max_statistics_size_opt: ::core::option::Option<
-        column_options::MaxStatisticsSizeOpt,
+        parquet_column_options::MaxStatisticsSizeOpt,
     >,
 }
-/// Nested message and enum types in `ColumnOptions`.
-pub mod column_options {
+/// Nested message and enum types in `ParquetColumnOptions`.
+pub mod parquet_column_options {
     #[allow(clippy::derive_partial_eq_without_eq)]
     #[derive(Clone, PartialEq, ::prost::Oneof)]
     pub enum BloomFilterEnabledOpt {
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index a61a026089fc6..4cf7e73ac9121 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::collections::HashMap;
 use std::sync::Arc;
 
 use crate::protobuf_common as protobuf;
@@ -832,42 +833,42 @@ impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions {
     }
 }
 
-impl TryFrom<&ParquetColumnOptions> for protobuf::ColumnOptions {
+impl TryFrom<&ParquetColumnOptions> for protobuf::ParquetColumnOptions {
     type Error = DataFusionError;
 
     fn try_from(
         value: &ParquetColumnOptions,
     ) -> datafusion_common::Result<Self, Self::Error> {
-        Ok(protobuf::ColumnOptions {
+        Ok(protobuf::ParquetColumnOptions {
             compression_opt: value
                 .compression
                 .clone()
-                .map(protobuf::column_options::CompressionOpt::Compression),
+                .map(protobuf::parquet_column_options::CompressionOpt::Compression),
             dictionary_enabled_opt: value
                 .dictionary_enabled
-                .map(protobuf::column_options::DictionaryEnabledOpt::DictionaryEnabled),
+                .map(protobuf::parquet_column_options::DictionaryEnabledOpt::DictionaryEnabled),
             statistics_enabled_opt: value
                 .statistics_enabled
                 .clone()
-                .map(protobuf::column_options::StatisticsEnabledOpt::StatisticsEnabled),
+                .map(protobuf::parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled),
             max_statistics_size_opt: value.max_statistics_size.map(|v| {
-                protobuf::column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(
+                protobuf::parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(
                     v as u32,
                 )
             }),
             encoding_opt: value
                 .encoding
                 .clone()
-                .map(protobuf::column_options::EncodingOpt::Encoding),
+                .map(protobuf::parquet_column_options::EncodingOpt::Encoding),
             bloom_filter_enabled_opt: value
                 .bloom_filter_enabled
-                .map(protobuf::column_options::BloomFilterEnabledOpt::BloomFilterEnabled),
+                .map(protobuf::parquet_column_options::BloomFilterEnabledOpt::BloomFilterEnabled),
             bloom_filter_fpp_opt: value
                 .bloom_filter_fpp
-                .map(protobuf::column_options::BloomFilterFppOpt::BloomFilterFpp),
+                .map(protobuf::parquet_column_options::BloomFilterFppOpt::BloomFilterFpp),
             bloom_filter_ndv_opt: value
                 .bloom_filter_ndv
-                .map(protobuf::column_options::BloomFilterNdvOpt::BloomFilterNdv),
+                .map(protobuf::parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv),
         })
     }
 }
@@ -881,15 +882,21 @@ impl TryFrom<&TableParquetOptions> for protobuf::TableParquetOptions {
             .column_specific_options
             .iter()
             .map(|(k, v)| {
-                Ok(protobuf::ColumnSpecificOptions {
+                Ok(protobuf::ParquetColumnSpecificOptions {
                     column_name: k.into(),
                     options: Some(v.try_into()?),
                 })
             })
             .collect::<datafusion_common::Result<Vec<_>>>()?;
+        let key_value_metadata = value
+            .key_value_metadata
+            .iter()
+            .filter_map(|(k, v)| v.as_ref().map(|v| (k.clone(), v.clone())))
+            .collect::<HashMap<String, String>>();
         Ok(protobuf::TableParquetOptions {
             global: Some((&value.global).try_into()?),
             column_specific_options,
+            key_value_metadata,
         })
     }
 }
diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs
index f48b05e8d3dc3..ebc05718a458a 100644
--- a/datafusion/proto/src/generated/datafusion_proto_common.rs
+++ b/datafusion/proto/src/generated/datafusion_proto_common.rs
@@ -671,6 +671,11 @@ pub struct TableParquetOptions {
     pub global: ::core::option::Option<ParquetOptions>,
     #[prost(message, repeated, tag = "2")]
     pub column_specific_options: ::prost::alloc::vec::Vec<ParquetColumnSpecificOptions>,
+    #[prost(map = "string, string", tag = "3")]
+    pub key_value_metadata: ::std::collections::HashMap<
+        ::prost::alloc::string::String,
+        ::prost::alloc::string::String,
+    >,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
diff --git a/datafusion/proto/src/logical_plan/file_formats.rs b/datafusion/proto/src/logical_plan/file_formats.rs
index ce9d24d94d990..607a3d8642fde 100644
--- a/datafusion/proto/src/logical_plan/file_formats.rs
+++ b/datafusion/proto/src/logical_plan/file_formats.rs
@@ -18,7 +18,10 @@
 use std::sync::Arc;
 
 use datafusion::{
-    config::{CsvOptions, JsonOptions},
+    config::{
+        CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions,
+        TableParquetOptions,
+    },
     datasource::file_format::{
         arrow::ArrowFormatFactory, csv::CsvFormatFactory, json::JsonFormatFactory,
         parquet::ParquetFormatFactory, FileFormatFactory,
@@ -31,7 +34,12 @@ use datafusion_common::{
 };
 use prost::Message;
 
-use crate::protobuf::{CsvOptions as CsvOptionsProto, JsonOptions as JsonOptionsProto};
+use crate::protobuf::{
+    parquet_column_options, parquet_options, CsvOptions as CsvOptionsProto,
+    JsonOptions as JsonOptionsProto, ParquetColumnOptions as ParquetColumnOptionsProto,
+    ParquetColumnSpecificOptions, ParquetOptions as ParquetOptionsProto,
+    TableParquetOptions as TableParquetOptionsProto,
+};
 
 use super::LogicalExtensionCodec;
 
@@ -337,6 +345,218 @@ impl LogicalExtensionCodec for JsonLogicalExtensionCodec {
     }
 }
 
+impl TableParquetOptionsProto {
+    fn from_factory(factory: &ParquetFormatFactory) -> Self {
+        let global_options = if let Some(ref options) = factory.options {
+            options.clone()
+        } else {
+            return TableParquetOptionsProto::default();
+        };
+
+        let column_specific_options = global_options.column_specific_options;
+        TableParquetOptionsProto {
+            global: Some(ParquetOptionsProto {
+                enable_page_index: global_options.global.enable_page_index,
+                pruning: global_options.global.pruning,
+                skip_metadata: global_options.global.skip_metadata,
+                metadata_size_hint_opt: global_options.global.metadata_size_hint.map(|size| {
+                    parquet_options::MetadataSizeHintOpt::MetadataSizeHint(size as u64)
+                }),
+                pushdown_filters: global_options.global.pushdown_filters,
+                reorder_filters: global_options.global.reorder_filters,
+                data_pagesize_limit: global_options.global.data_pagesize_limit as u64,
+                write_batch_size: global_options.global.write_batch_size as u64,
+                writer_version: global_options.global.writer_version.clone(),
+                compression_opt: global_options.global.compression.map(|compression| {
+                    parquet_options::CompressionOpt::Compression(compression)
+                }),
+                dictionary_enabled_opt: global_options.global.dictionary_enabled.map(|enabled| {
+                    parquet_options::DictionaryEnabledOpt::DictionaryEnabled(enabled)
+                }),
+                dictionary_page_size_limit: global_options.global.dictionary_page_size_limit as u64,
+                statistics_enabled_opt: global_options.global.statistics_enabled.map(|enabled| {
+                    parquet_options::StatisticsEnabledOpt::StatisticsEnabled(enabled)
+                }),
+                max_statistics_size_opt: global_options.global.max_statistics_size.map(|size| {
+                    parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(size as u64)
+                }),
+                max_row_group_size: global_options.global.max_row_group_size as u64,
+                created_by: global_options.global.created_by.clone(),
+                column_index_truncate_length_opt: global_options.global.column_index_truncate_length.map(|length| {
+                    parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(length as u64)
+                }),
+                data_page_row_count_limit: global_options.global.data_page_row_count_limit as u64,
+                encoding_opt: global_options.global.encoding.map(|encoding| {
+                    parquet_options::EncodingOpt::Encoding(encoding)
+                }),
+                bloom_filter_on_read: global_options.global.bloom_filter_on_read,
+                bloom_filter_on_write: global_options.global.bloom_filter_on_write,
+                bloom_filter_fpp_opt: global_options.global.bloom_filter_fpp.map(|fpp| {
+                    parquet_options::BloomFilterFppOpt::BloomFilterFpp(fpp)
+                }),
+                bloom_filter_ndv_opt: global_options.global.bloom_filter_ndv.map(|ndv| {
+                    parquet_options::BloomFilterNdvOpt::BloomFilterNdv(ndv)
+                }),
+                allow_single_file_parallelism: global_options.global.allow_single_file_parallelism,
+                maximum_parallel_row_group_writers: global_options.global.maximum_parallel_row_group_writers as u64,
+                maximum_buffered_record_batches_per_stream: global_options.global.maximum_buffered_record_batches_per_stream as u64,
+                schema_force_string_view: global_options.global.schema_force_string_view,
+            }),
+            column_specific_options: column_specific_options.into_iter().map(|(column_name, options)| {
+                ParquetColumnSpecificOptions {
+                    column_name,
+                    options: Some(ParquetColumnOptionsProto {
+                        bloom_filter_enabled_opt: options.bloom_filter_enabled.map(|enabled| {
+                            parquet_column_options::BloomFilterEnabledOpt::BloomFilterEnabled(enabled)
+                        }),
+                        encoding_opt: options.encoding.map(|encoding| {
+                            parquet_column_options::EncodingOpt::Encoding(encoding)
+                        }),
+                        dictionary_enabled_opt: options.dictionary_enabled.map(|enabled| {
+                            parquet_column_options::DictionaryEnabledOpt::DictionaryEnabled(enabled)
+                        }),
+                        compression_opt: options.compression.map(|compression| {
+                            parquet_column_options::CompressionOpt::Compression(compression)
+                        }),
+                        statistics_enabled_opt: options.statistics_enabled.map(|enabled| {
+                            parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled(enabled)
+                        }),
+                        bloom_filter_fpp_opt: options.bloom_filter_fpp.map(|fpp| {
+                            parquet_column_options::BloomFilterFppOpt::BloomFilterFpp(fpp)
+                        }),
+                        bloom_filter_ndv_opt: options.bloom_filter_ndv.map(|ndv| {
+                            parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv(ndv)
+                        }),
+                        max_statistics_size_opt: options.max_statistics_size.map(|size| {
+                            parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(size as u32)
+                        }),
+                    })
+                }
+            }).collect(),
+            key_value_metadata: global_options.key_value_metadata
+                .iter()
+                .filter_map(|(key, value)| {
+                    value.as_ref().map(|v| (key.clone(), v.clone()))
+                })
+                .collect(),
+        }
+    }
+}
+
+impl From<&ParquetOptionsProto> for ParquetOptions {
+    fn from(proto: &ParquetOptionsProto) -> Self {
+        ParquetOptions {
+            enable_page_index: proto.enable_page_index,
+            pruning: proto.pruning,
+            skip_metadata: proto.skip_metadata,
+            metadata_size_hint: proto.metadata_size_hint_opt.as_ref().map(|opt| match opt {
+                parquet_options::MetadataSizeHintOpt::MetadataSizeHint(size) => *size as usize,
+            }),
+            pushdown_filters: proto.pushdown_filters,
+            reorder_filters: proto.reorder_filters,
+            data_pagesize_limit: proto.data_pagesize_limit as usize,
+            write_batch_size: proto.write_batch_size as usize,
+            writer_version: proto.writer_version.clone(),
+            compression: proto.compression_opt.as_ref().map(|opt| match opt {
+                parquet_options::CompressionOpt::Compression(compression) => compression.clone(),
+            }),
+            dictionary_enabled: proto.dictionary_enabled_opt.as_ref().map(|opt| match opt {
+                parquet_options::DictionaryEnabledOpt::DictionaryEnabled(enabled) => *enabled,
+            }),
+            dictionary_page_size_limit: proto.dictionary_page_size_limit as usize,
+            statistics_enabled: proto.statistics_enabled_opt.as_ref().map(|opt| match opt {
+                parquet_options::StatisticsEnabledOpt::StatisticsEnabled(statistics) => statistics.clone(),
+            }),
+            max_statistics_size: proto.max_statistics_size_opt.as_ref().map(|opt| match opt {
+                parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(size) => *size as usize,
+            }),
+            max_row_group_size: proto.max_row_group_size as usize,
+            created_by: proto.created_by.clone(),
+            column_index_truncate_length: proto.column_index_truncate_length_opt.as_ref().map(|opt| match opt {
+                parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(length) => *length as usize,
+            }),
+            data_page_row_count_limit: proto.data_page_row_count_limit as usize,
+            encoding: proto.encoding_opt.as_ref().map(|opt| match opt {
+                parquet_options::EncodingOpt::Encoding(encoding) => encoding.clone(),
+            }),
+            bloom_filter_on_read: proto.bloom_filter_on_read,
+            bloom_filter_on_write: proto.bloom_filter_on_write,
+            bloom_filter_fpp: proto.bloom_filter_fpp_opt.as_ref().map(|opt| match opt {
+                parquet_options::BloomFilterFppOpt::BloomFilterFpp(fpp) => *fpp,
+            }),
+            bloom_filter_ndv: proto.bloom_filter_ndv_opt.as_ref().map(|opt| match opt {
+                parquet_options::BloomFilterNdvOpt::BloomFilterNdv(ndv) => *ndv,
+            }),
+            allow_single_file_parallelism: proto.allow_single_file_parallelism,
+            maximum_parallel_row_group_writers: proto.maximum_parallel_row_group_writers as usize,
+            maximum_buffered_record_batches_per_stream: proto.maximum_buffered_record_batches_per_stream as usize,
+            schema_force_string_view: proto.schema_force_string_view,
+        }
+    }
+}
+
+impl From<ParquetColumnOptionsProto> for ParquetColumnOptions {
+    fn from(proto: ParquetColumnOptionsProto) -> Self {
+        ParquetColumnOptions {
+            bloom_filter_enabled: proto.bloom_filter_enabled_opt.map(
+                |parquet_column_options::BloomFilterEnabledOpt::BloomFilterEnabled(v)| v,
+            ),
+            encoding: proto
+                .encoding_opt
+                .map(|parquet_column_options::EncodingOpt::Encoding(v)| v),
+            dictionary_enabled: proto.dictionary_enabled_opt.map(
+                |parquet_column_options::DictionaryEnabledOpt::DictionaryEnabled(v)| v,
+            ),
+            compression: proto
+                .compression_opt
+                .map(|parquet_column_options::CompressionOpt::Compression(v)| v),
+            statistics_enabled: proto.statistics_enabled_opt.map(
+                |parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled(v)| v,
+            ),
+            bloom_filter_fpp: proto
+                .bloom_filter_fpp_opt
+                .map(|parquet_column_options::BloomFilterFppOpt::BloomFilterFpp(v)| v),
+            bloom_filter_ndv: proto
+                .bloom_filter_ndv_opt
+                .map(|parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv(v)| v),
+            max_statistics_size: proto.max_statistics_size_opt.map(
+                |parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v)| {
+                    v as usize
+                },
+            ),
+        }
+    }
+}
+
+impl From<&TableParquetOptionsProto> for TableParquetOptions {
+    fn from(proto: &TableParquetOptionsProto) -> Self {
+        TableParquetOptions {
+            global: proto
+                .global
+                .as_ref()
+                .map(ParquetOptions::from)
+                .unwrap_or_default(),
+            column_specific_options: proto
+                .column_specific_options
+                .iter()
+                .map(|parquet_column_options| {
+                    (
+                        parquet_column_options.column_name.clone(),
+                        ParquetColumnOptions::from(
+                            parquet_column_options.options.clone().unwrap_or_default(),
+                        ),
+                    )
+                })
+                .collect(),
+            key_value_metadata: proto
+                .key_value_metadata
+                .iter()
+                .map(|(k, v)| (k.clone(), Some(v.clone())))
+                .collect(),
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct ParquetLogicalExtensionCodec;
 
@@ -382,17 +602,47 @@ impl LogicalExtensionCodec for ParquetLogicalExtensionCodec {
 
     fn try_decode_file_format(
         &self,
-        __buf: &[u8],
-        __ctx: &SessionContext,
+        buf: &[u8],
+        _ctx: &SessionContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
-        Ok(Arc::new(ParquetFormatFactory::new()))
+        let proto = TableParquetOptionsProto::decode(buf).map_err(|e| {
+            DataFusionError::Execution(format!(
+                "Failed to decode TableParquetOptionsProto: {:?}",
+                e
+            ))
+        })?;
+        let options: TableParquetOptions = (&proto).into();
+        Ok(Arc::new(ParquetFormatFactory {
+            options: Some(options),
+        }))
     }
 
     fn try_encode_file_format(
         &self,
-        __buf: &mut Vec<u8>,
-        __node: Arc<dyn FileFormatFactory>,
+        buf: &mut Vec<u8>,
+        node: Arc<dyn FileFormatFactory>,
     ) -> datafusion_common::Result<()> {
+        let options = if let Some(parquet_factory) =
+            node.as_any().downcast_ref::<ParquetFormatFactory>()
+        {
+            parquet_factory.options.clone().unwrap_or_default()
+        } else {
+            return Err(DataFusionError::Execution(
+                "Unsupported FileFormatFactory type".to_string(),
+            ));
+        };
+
+        let proto = TableParquetOptionsProto::from_factory(&ParquetFormatFactory {
+            options: Some(options),
+        });
+
+        proto.encode(buf).map_err(|e| {
+            DataFusionError::Execution(format!(
+                "Failed to encode TableParquetOptionsProto: {:?}",
+                e
+            ))
+        })?;
+
         Ok(())
     }
 }
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index daa92475068f4..f7ad2b9b6158b 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -577,6 +577,74 @@ async fn roundtrip_logical_plan_copy_to_json() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn roundtrip_logical_plan_copy_to_parquet() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    // Assume create_parquet_scan creates a logical plan for scanning a Parquet file
+    let input = create_parquet_scan(&ctx).await?;
+
+    let table_options =
+        TableOptions::default_from_session_config(ctx.state().config_options());
+    let mut parquet_format = table_options.parquet;
+
+    // Set specific Parquet format options
+    let mut key_value_metadata = HashMap::new();
+    key_value_metadata.insert("test".to_string(), Some("test".to_string()));
+    parquet_format.key_value_metadata = key_value_metadata.clone();
+
+    parquet_format.global.allow_single_file_parallelism = false;
+    parquet_format.global.created_by = "test".to_string();
+
+    let file_type = format_as_file_type(Arc::new(
+        ParquetFormatFactory::new_with_options(parquet_format.clone()),
+    ));
+
+    let plan = LogicalPlan::Copy(CopyTo {
+        input: Arc::new(input),
+        output_url: "test.parquet".to_string(),
+        partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()],
+        file_type,
+        options: Default::default(),
+    });
+
+    // Assume ParquetLogicalExtensionCodec is implemented similarly to JsonLogicalExtensionCodec
+    let codec = ParquetLogicalExtensionCodec {};
+    let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
+    let logical_round_trip =
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+
+    match logical_round_trip {
+        LogicalPlan::Copy(copy_to) => {
+            assert_eq!("test.parquet", copy_to.output_url);
+            assert_eq!("parquet".to_string(), copy_to.file_type.get_ext());
+            assert_eq!(vec!["a", "b", "c"], copy_to.partition_by);
+
+            let file_type = copy_to
+                .file_type
+                .as_ref()
+                .as_any()
+                .downcast_ref::<DefaultFileType>()
+                .unwrap();
+
+            let format_factory = file_type.as_format_factory();
+            let parquet_factory = format_factory
+                .as_ref()
+                .as_any()
+                .downcast_ref::<ParquetFormatFactory>()
+                .unwrap();
+            let parquet_config = parquet_factory.options.as_ref().unwrap();
+            assert_eq!(parquet_config.key_value_metadata, key_value_metadata);
+            assert!(!parquet_config.global.allow_single_file_parallelism);
+            assert_eq!(parquet_config.global.created_by, "test".to_string());
+        }
+        _ => panic!(),
+    }
+
+    Ok(())
+}
+
 async fn create_csv_scan(ctx: &SessionContext) -> Result<LogicalPlan, DataFusionError> {
     ctx.register_csv("t1", "tests/testdata/test.csv", CsvReadOptions::default())
         .await?;
@@ -597,6 +665,20 @@ async fn create_json_scan(ctx: &SessionContext) -> Result<LogicalPlan, DataFusio
     Ok(input)
 }
 
+async fn create_parquet_scan(
+    ctx: &SessionContext,
+) -> Result<LogicalPlan, DataFusionError> {
+    ctx.register_parquet(
+        "t1",
+        "../substrait/tests/testdata/empty.parquet",
+        ParquetReadOptions::default(),
+    )
+    .await?;
+
+    let input = ctx.table("t1").await?.into_optimized_plan()?;
+    Ok(input)
+}
+
 #[tokio::test]
 async fn roundtrip_logical_plan_distinct_on() -> Result<()> {
     let ctx = SessionContext::new();

From ae2ca6a0e21b77bba1ac40ea6ee059e47d0791e0 Mon Sep 17 00:00:00 2001
From: Jeffrey Smith II <jsmith@influxdata.com>
Date: Wed, 31 Jul 2024 17:49:39 -0400
Subject: [PATCH 182/357] Support  cross-timezone `timestamp` comparison via
 coercsion (#11711)

* feat: enable comparisons across timezones

* test: add tests for timezone changes

* test: fix test

* chore: cargofmt

* chore: improve documentation and code cleanup

* feat: use nonstrict timezone coercion for values
---
 datafusion/expr/src/type_coercion/binary.rs   | 165 +++++++++++++-----
 .../sqllogictest/test_files/timestamps.slt    |  96 ++++++++++
 2 files changed, 222 insertions(+), 39 deletions(-)

diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index a657f4df0e3d5..17280289ed1b4 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -28,7 +28,6 @@ use arrow::datatypes::{
     DataType, Field, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
     DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
 };
-
 use datafusion_common::{exec_datafusion_err, plan_datafusion_err, plan_err, Result};
 
 /// The type signature of an instantiation of binary operator expression such as
@@ -155,7 +154,7 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature>
                     rhs: rhs.clone(),
                     ret,
                 })
-            } else if let Some(coerced) = temporal_coercion(lhs, rhs) {
+            } else if let Some(coerced) = temporal_coercion_strict_timezone(lhs, rhs) {
                 // Temporal arithmetic by first coercing to a common time representation
                 // e.g. Date32 - Timestamp
                 let ret = get_result(&coerced, &coerced).map_err(|e| {
@@ -492,7 +491,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
     }
     binary_numeric_coercion(lhs_type, rhs_type)
         .or_else(|| dictionary_coercion(lhs_type, rhs_type, true))
-        .or_else(|| temporal_coercion(lhs_type, rhs_type))
+        .or_else(|| temporal_coercion_nonstrict_timezone(lhs_type, rhs_type))
         .or_else(|| string_coercion(lhs_type, rhs_type))
         .or_else(|| list_coercion(lhs_type, rhs_type))
         .or_else(|| null_coercion(lhs_type, rhs_type))
@@ -508,7 +507,7 @@ pub fn values_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataT
         return Some(lhs_type.clone());
     }
     binary_numeric_coercion(lhs_type, rhs_type)
-        .or_else(|| temporal_coercion(lhs_type, rhs_type))
+        .or_else(|| temporal_coercion_nonstrict_timezone(lhs_type, rhs_type))
         .or_else(|| string_coercion(lhs_type, rhs_type))
         .or_else(|| binary_coercion(lhs_type, rhs_type))
 }
@@ -1036,29 +1035,61 @@ fn is_time_with_valid_unit(datatype: DataType) -> bool {
     )
 }
 
+/// Non-strict Timezone Coercion is useful in scenarios where we can guarantee
+/// a stable relationship between two timestamps of different timezones.
+///
+/// An example of this is binary comparisons (<, >, ==, etc). Arrow stores timestamps
+/// as relative to UTC epoch, and then adds the timezone as an offset. As a result, we can always
+/// do a binary comparison between the two times.
+///
+/// Timezone coercion is handled by the following rules:
+/// - If only one has a timezone, coerce the other to match
+/// - If both have a timezone, coerce to the left type
+/// - "UTC" and "+00:00" are considered equivalent
+fn temporal_coercion_nonstrict_timezone(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+
+    match (lhs_type, rhs_type) {
+        (Timestamp(lhs_unit, lhs_tz), Timestamp(rhs_unit, rhs_tz)) => {
+            let tz = match (lhs_tz, rhs_tz) {
+                // If both have a timezone, use the left timezone.
+                (Some(lhs_tz), Some(_rhs_tz)) => Some(Arc::clone(lhs_tz)),
+                (Some(lhs_tz), None) => Some(Arc::clone(lhs_tz)),
+                (None, Some(rhs_tz)) => Some(Arc::clone(rhs_tz)),
+                (None, None) => None,
+            };
+
+            let unit = timeunit_coercion(lhs_unit, rhs_unit);
+
+            Some(Timestamp(unit, tz))
+        }
+        _ => temporal_coercion(lhs_type, rhs_type),
+    }
+}
+
+/// Strict Timezone coercion is useful in scenarios where we cannot guarantee a stable relationship
+/// between two timestamps with different timezones or do not want implicit coercion between them.
+///
+/// An example of this when attempting to coerce function arguments. Functions already have a mechanism
+/// for defining which timestamp types they want to support, so we do not want to do any further coercion.
+///
 /// Coercion rules for Temporal columns: the type that both lhs and rhs can be
 /// casted to for the purpose of a date computation
 /// For interval arithmetic, it doesn't handle datetime type +/- interval
-fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
+/// Timezone coercion is handled by the following rules:
+/// - If only one has a timezone, coerce the other to match
+/// - If both have a timezone, throw an error
+/// - "UTC" and "+00:00" are considered equivalent
+fn temporal_coercion_strict_timezone(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
-    use arrow::datatypes::IntervalUnit::*;
-    use arrow::datatypes::TimeUnit::*;
 
     match (lhs_type, rhs_type) {
-        (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
-        (Date64, Date32) | (Date32, Date64) => Some(Date64),
-        (Timestamp(_, None), Date64) | (Date64, Timestamp(_, None)) => {
-            Some(Timestamp(Nanosecond, None))
-        }
-        (Timestamp(_, _tz), Date64) | (Date64, Timestamp(_, _tz)) => {
-            Some(Timestamp(Nanosecond, None))
-        }
-        (Timestamp(_, None), Date32) | (Date32, Timestamp(_, None)) => {
-            Some(Timestamp(Nanosecond, None))
-        }
-        (Timestamp(_, _tz), Date32) | (Date32, Timestamp(_, _tz)) => {
-            Some(Timestamp(Nanosecond, None))
-        }
         (Timestamp(lhs_unit, lhs_tz), Timestamp(rhs_unit, rhs_tz)) => {
             let tz = match (lhs_tz, rhs_tz) {
                 (Some(lhs_tz), Some(rhs_tz)) => {
@@ -1078,31 +1109,60 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
                 (None, None) => None,
             };
 
-            let unit = match (lhs_unit, rhs_unit) {
-                (Second, Millisecond) => Second,
-                (Second, Microsecond) => Second,
-                (Second, Nanosecond) => Second,
-                (Millisecond, Second) => Second,
-                (Millisecond, Microsecond) => Millisecond,
-                (Millisecond, Nanosecond) => Millisecond,
-                (Microsecond, Second) => Second,
-                (Microsecond, Millisecond) => Millisecond,
-                (Microsecond, Nanosecond) => Microsecond,
-                (Nanosecond, Second) => Second,
-                (Nanosecond, Millisecond) => Millisecond,
-                (Nanosecond, Microsecond) => Microsecond,
-                (l, r) => {
-                    assert_eq!(l, r);
-                    *l
-                }
-            };
+            let unit = timeunit_coercion(lhs_unit, rhs_unit);
 
             Some(Timestamp(unit, tz))
         }
+        _ => temporal_coercion(lhs_type, rhs_type),
+    }
+}
+
+fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+    use arrow::datatypes::IntervalUnit::*;
+    use arrow::datatypes::TimeUnit::*;
+
+    match (lhs_type, rhs_type) {
+        (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
+        (Date64, Date32) | (Date32, Date64) => Some(Date64),
+        (Timestamp(_, None), Date64) | (Date64, Timestamp(_, None)) => {
+            Some(Timestamp(Nanosecond, None))
+        }
+        (Timestamp(_, _tz), Date64) | (Date64, Timestamp(_, _tz)) => {
+            Some(Timestamp(Nanosecond, None))
+        }
+        (Timestamp(_, None), Date32) | (Date32, Timestamp(_, None)) => {
+            Some(Timestamp(Nanosecond, None))
+        }
+        (Timestamp(_, _tz), Date32) | (Date32, Timestamp(_, _tz)) => {
+            Some(Timestamp(Nanosecond, None))
+        }
         _ => None,
     }
 }
 
+fn timeunit_coercion(lhs_unit: &TimeUnit, rhs_unit: &TimeUnit) -> TimeUnit {
+    use arrow::datatypes::TimeUnit::*;
+    match (lhs_unit, rhs_unit) {
+        (Second, Millisecond) => Second,
+        (Second, Microsecond) => Second,
+        (Second, Nanosecond) => Second,
+        (Millisecond, Second) => Second,
+        (Millisecond, Microsecond) => Millisecond,
+        (Millisecond, Nanosecond) => Millisecond,
+        (Microsecond, Second) => Second,
+        (Microsecond, Millisecond) => Millisecond,
+        (Microsecond, Nanosecond) => Microsecond,
+        (Nanosecond, Second) => Second,
+        (Nanosecond, Millisecond) => Millisecond,
+        (Nanosecond, Microsecond) => Microsecond,
+        (l, r) => {
+            assert_eq!(l, r);
+            *l
+        }
+    }
+}
+
 /// coercion rules from NULL type. Since NULL can be casted to any other type in arrow,
 /// either lhs or rhs is NULL, if NULL can be casted to type of the other side, the coercion is valid.
 fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
@@ -1727,6 +1787,33 @@ mod tests {
             DataType::LargeBinary
         );
 
+        // Timestamps
+        let utc: Option<Arc<str>> = Some("UTC".into());
+        test_coercion_binary_rule!(
+            DataType::Timestamp(TimeUnit::Second, utc.clone()),
+            DataType::Timestamp(TimeUnit::Second, utc.clone()),
+            Operator::Eq,
+            DataType::Timestamp(TimeUnit::Second, utc.clone())
+        );
+        test_coercion_binary_rule!(
+            DataType::Timestamp(TimeUnit::Second, utc.clone()),
+            DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())),
+            Operator::Eq,
+            DataType::Timestamp(TimeUnit::Second, utc.clone())
+        );
+        test_coercion_binary_rule!(
+            DataType::Timestamp(TimeUnit::Second, Some("America/New_York".into())),
+            DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())),
+            Operator::Eq,
+            DataType::Timestamp(TimeUnit::Second, Some("America/New_York".into()))
+        );
+        test_coercion_binary_rule!(
+            DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())),
+            DataType::Timestamp(TimeUnit::Second, utc.clone()),
+            Operator::Eq,
+            DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into()))
+        );
+
         // TODO add other data type
         Ok(())
     }
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index 2ca2d49997a66..b63aad49d152e 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -3021,3 +3021,99 @@ drop view t_utc;
 
 statement ok
 drop view t_timezone;
+
+# test comparisons across timestamps
+statement ok
+create table t AS
+VALUES
+  ('2024-01-01T00:00:01Z'),
+  ('2024-02-01T00:00:01Z'),
+  ('2024-03-01T00:00:01Z')
+;
+
+statement ok
+create view t_utc as
+select column1::timestamp AT TIME ZONE 'UTC' as "column1"
+from t;
+
+statement ok
+create view t_europe as
+select column1::timestamp AT TIME ZONE 'Europe/Brussels' as "column1"
+from t;
+
+query P
+SELECT column1 FROM t_utc WHERE column1 < '2024-02-01T00:00:00' AT TIME ZONE 'America/Los_Angeles';
+----
+2024-01-01T00:00:01Z
+2024-02-01T00:00:01Z
+
+query P
+SELECT column1 FROM t_europe WHERE column1 = '2024-01-31T16:00:01' AT TIME ZONE 'America/Los_Angeles';
+----
+2024-02-01T00:00:01+01:00
+
+query P
+SELECT column1 FROM t_europe WHERE column1 BETWEEN '2020-01-01T00:00:00' AT TIME ZONE 'Australia/Brisbane' AND '2024-02-01T00:00:00' AT TIME ZONE 'America/Los_Angeles';
+----
+2024-01-01T00:00:01+01:00
+2024-02-01T00:00:01+01:00
+
+query P
+SELECT column1 FROM t_utc WHERE column1 IN ('2024-01-31T16:00:01' AT TIME ZONE 'America/Los_Angeles');
+----
+2024-02-01T00:00:01Z
+
+query P
+SELECT column1 as u from t_utc UNION SELECT column1 from t_europe ORDER BY u;
+----
+2023-12-31T23:00:01Z
+2024-01-01T00:00:01Z
+2024-01-31T23:00:01Z
+2024-02-01T00:00:01Z
+2024-02-29T23:00:01Z
+2024-03-01T00:00:01Z
+
+query P
+SELECT column1 as e from t_europe UNION SELECT column1 from t_utc ORDER BY e;
+----
+2024-01-01T00:00:01+01:00
+2024-01-01T01:00:01+01:00
+2024-02-01T00:00:01+01:00
+2024-02-01T01:00:01+01:00
+2024-03-01T00:00:01+01:00
+2024-03-01T01:00:01+01:00
+
+query P
+SELECT nvl2(null, '2020-01-01T00:00:00-04:00'::timestamp, '2021-02-03T04:05:06Z'::timestamp)
+----
+2021-02-03T04:05:06
+
+query ?
+SELECT make_array('2020-01-01T00:00:00-04:00'::timestamp, '2021-01-01T01:02:03Z'::timestamp);
+----
+[2020-01-01T04:00:00, 2021-01-01T01:02:03]
+
+query P
+SELECT * FROM VALUES
+ ('2023-12-31T23:00:00Z' AT TIME ZONE 'UTC'),
+ ('2024-02-01T00:00:00' AT TIME ZONE 'America/Los_Angeles');
+----
+2023-12-31T15:00:00-08:00
+2024-02-01T00:00:00-08:00
+
+query P
+SELECT * FROM VALUES
+ ('2024-02-01T00:00:00' AT TIME ZONE 'America/Los_Angeles'),
+ ('2023-12-31T23:00:00' AT TIME ZONE 'UTC');
+----
+2024-02-01T08:00:00Z
+2023-12-31T23:00:00Z
+
+statement ok
+drop table t;
+
+statement ok
+drop view t_utc;
+
+statement ok
+drop view t_europe;

From 0f554fa12490ffa72c3c6ca42186c1ac9461bfa6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Thu, 1 Aug 2024 11:06:55 +0800
Subject: [PATCH 183/357] Minor: Improve documentation for
 AggregateUDFImpl::state_fields (#11740)

* Minor: Improve documentation for AggregateUDFImpl::state_fields

* apply review suggestion
---
 datafusion/expr/src/udaf.rs | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index af964b615445f..f5eeef6b53bbe 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -351,12 +351,8 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
 
     /// Return the fields used to store the intermediate state of this accumulator.
     ///
-    /// # Arguments:
-    /// 1. `name`: the name of the expression (e.g. AVG, SUM, etc)
-    /// 2. `value_type`: Aggregate function output returned by [`Self::return_type`] if defined, otherwise
-    ///    it is equivalent to the data type of the first arguments
-    /// 3. `ordering_fields`: the fields used to order the input arguments, if any.
-    ///    Empty if no ordering expression is provided.
+    /// args:  [`StateFieldsArgs`] contains arguments passed to the
+    /// aggregate function's accumulator.
     ///
     /// # Notes:
     ///

From 9dd2cfc8013ba72695e82bb0367e816fe3cd77b3 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 1 Aug 2024 06:35:28 -0400
Subject: [PATCH 184/357] Do not push down Sorts if it violates the sort
 requirements (#11678)

* Do not push down Sorts if it violates the sort requirements

* Test for pushing through orders
---
 .../src/physical_optimizer/enforce_sorting.rs | 64 ++++++++++++
 .../src/physical_optimizer/sort_pushdown.rs   | 24 +++++
 .../core/src/physical_optimizer/test_utils.rs | 99 ++++++++++++++++++-
 3 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index cf9d33252ad9d..faf8d01a97fd9 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -621,6 +621,7 @@ mod tests {
         limit_exec, local_limit_exec, memory_exec, parquet_exec, parquet_exec_sorted,
         repartition_exec, sort_exec, sort_expr, sort_expr_options, sort_merge_join_exec,
         sort_preserving_merge_exec, spr_repartition_exec, union_exec,
+        RequirementsTestExec,
     };
     use crate::physical_plan::{displayable, get_plan_string, Partitioning};
     use crate::prelude::{SessionConfig, SessionContext};
@@ -2346,4 +2347,67 @@ mod tests {
         assert_optimized!(expected_input, expected_no_change, physical_plan, true);
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_push_with_required_input_ordering_prohibited() -> Result<()> {
+        // SortExec: expr=[b]            <-- can't push this down
+        //  RequiredInputOrder expr=[a]  <-- this requires input sorted by a, and preserves the input order
+        //    SortExec: expr=[a]
+        //      MemoryExec
+        let schema = create_test_schema3()?;
+        let sort_exprs_a = vec![sort_expr("a", &schema)];
+        let sort_exprs_b = vec![sort_expr("b", &schema)];
+        let plan = memory_exec(&schema);
+        let plan = sort_exec(sort_exprs_a.clone(), plan);
+        let plan = RequirementsTestExec::new(plan)
+            .with_required_input_ordering(sort_exprs_a)
+            .with_maintains_input_order(true)
+            .into_arc();
+        let plan = sort_exec(sort_exprs_b, plan);
+
+        let expected_input = [
+            "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]",
+            "  RequiredInputOrderingExec",
+            "    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        // should not be able to push shorts
+        let expected_no_change = expected_input;
+        assert_optimized!(expected_input, expected_no_change, plan, true);
+        Ok(())
+    }
+
+    // test when the required input ordering is satisfied so could push through
+    #[tokio::test]
+    async fn test_push_with_required_input_ordering_allowed() -> Result<()> {
+        // SortExec: expr=[a,b]          <-- can push this down (as it is compatible with the required input ordering)
+        //  RequiredInputOrder expr=[a]  <-- this requires input sorted by a, and preserves the input order
+        //    SortExec: expr=[a]
+        //      MemoryExec
+        let schema = create_test_schema3()?;
+        let sort_exprs_a = vec![sort_expr("a", &schema)];
+        let sort_exprs_ab = vec![sort_expr("a", &schema), sort_expr("b", &schema)];
+        let plan = memory_exec(&schema);
+        let plan = sort_exec(sort_exprs_a.clone(), plan);
+        let plan = RequirementsTestExec::new(plan)
+            .with_required_input_ordering(sort_exprs_a)
+            .with_maintains_input_order(true)
+            .into_arc();
+        let plan = sort_exec(sort_exprs_ab, plan);
+
+        let expected_input = [
+            "SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
+            "  RequiredInputOrderingExec",
+            "    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        // should able to push shorts
+        let expected = [
+            "RequiredInputOrderingExec",
+            "  SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
+            "    MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected, plan, true);
+        Ok(())
+    }
 }
diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
index 36ac4b22d5942..3577e109b0697 100644
--- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
@@ -176,6 +176,7 @@ fn pushdown_requirement_to_children(
         || plan.as_any().is::<ProjectionExec>()
         || is_limit(plan)
         || plan.as_any().is::<HashJoinExec>()
+        || pushdown_would_violate_requirements(parent_required, plan.as_ref())
     {
         // If the current plan is a leaf node or can not maintain any of the input ordering, can not pushed down requirements.
         // For RepartitionExec, we always choose to not push down the sort requirements even the RepartitionExec(input_partition=1) could maintain input ordering.
@@ -211,6 +212,29 @@ fn pushdown_requirement_to_children(
     // TODO: Add support for Projection push down
 }
 
+/// Return true if pushing the sort requirements through a node would violate
+/// the input sorting requirements for the plan
+fn pushdown_would_violate_requirements(
+    parent_required: LexRequirementRef,
+    child: &dyn ExecutionPlan,
+) -> bool {
+    child
+        .required_input_ordering()
+        .iter()
+        .any(|child_required| {
+            let Some(child_required) = child_required.as_ref() else {
+                // no requirements, so pushing down would not violate anything
+                return false;
+            };
+            // check if the plan's requirements would still e satisfied if we pushed
+            // down the parent requirements
+            child_required
+                .iter()
+                .zip(parent_required.iter())
+                .all(|(c, p)| !c.compatible(p))
+        })
+}
+
 /// Determine children requirements:
 /// - If children requirements are more specific, do not push down parent
 ///   requirements.
diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs
index 5320938d2eb88..55a0fa8145527 100644
--- a/datafusion/core/src/physical_optimizer/test_utils.rs
+++ b/datafusion/core/src/physical_optimizer/test_utils.rs
@@ -17,6 +17,8 @@
 
 //! Collection of testing utility functions that are leveraged by the query optimizer rules
 
+use std::any::Any;
+use std::fmt::Formatter;
 use std::sync::Arc;
 
 use crate::datasource::listing::PartitionedFile;
@@ -47,10 +49,14 @@ use datafusion_expr::{WindowFrame, WindowFunctionDefinition};
 use datafusion_functions_aggregate::count::count_udaf;
 use datafusion_physical_expr::expressions::col;
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
-use datafusion_physical_plan::displayable;
 use datafusion_physical_plan::tree_node::PlanContext;
+use datafusion_physical_plan::{
+    displayable, DisplayAs, DisplayFormatType, PlanProperties,
+};
 
 use async_trait::async_trait;
+use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+use datafusion_physical_expr_common::sort_expr::PhysicalSortRequirement;
 
 async fn register_current_csv(
     ctx: &SessionContext,
@@ -354,6 +360,97 @@ pub fn sort_exec(
     Arc::new(SortExec::new(sort_exprs, input))
 }
 
+/// A test [`ExecutionPlan`] whose requirements can be configured.
+#[derive(Debug)]
+pub struct RequirementsTestExec {
+    required_input_ordering: Vec<PhysicalSortExpr>,
+    maintains_input_order: bool,
+    input: Arc<dyn ExecutionPlan>,
+}
+
+impl RequirementsTestExec {
+    pub fn new(input: Arc<dyn ExecutionPlan>) -> Self {
+        Self {
+            required_input_ordering: vec![],
+            maintains_input_order: true,
+            input,
+        }
+    }
+
+    /// sets the required input ordering
+    pub fn with_required_input_ordering(
+        mut self,
+        required_input_ordering: Vec<PhysicalSortExpr>,
+    ) -> Self {
+        self.required_input_ordering = required_input_ordering;
+        self
+    }
+
+    /// set the maintains_input_order flag
+    pub fn with_maintains_input_order(mut self, maintains_input_order: bool) -> Self {
+        self.maintains_input_order = maintains_input_order;
+        self
+    }
+
+    /// returns this ExecutionPlan as an Arc<dyn ExecutionPlan>
+    pub fn into_arc(self) -> Arc<dyn ExecutionPlan> {
+        Arc::new(self)
+    }
+}
+
+impl DisplayAs for RequirementsTestExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "RequiredInputOrderingExec")
+    }
+}
+
+impl ExecutionPlan for RequirementsTestExec {
+    fn name(&self) -> &str {
+        "RequiredInputOrderingExec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        self.input.properties()
+    }
+
+    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+        let requirement =
+            PhysicalSortRequirement::from_sort_exprs(&self.required_input_ordering);
+        vec![Some(requirement)]
+    }
+
+    fn maintains_input_order(&self) -> Vec<bool> {
+        vec![self.maintains_input_order]
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        assert_eq!(children.len(), 1);
+        Ok(RequirementsTestExec::new(children[0].clone())
+            .with_required_input_ordering(self.required_input_ordering.clone())
+            .with_maintains_input_order(self.maintains_input_order)
+            .into_arc())
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        unimplemented!("Test exec does not support execution")
+    }
+}
+
 /// A [`PlanContext`] object is susceptible to being left in an inconsistent state after
 /// untested mutable operations. It is crucial that there be no discrepancies between a plan
 /// associated with the root node and the plan generated after traversing all nodes

From 4884c08ceb83037ea72f56526bd1d3d2c00def4c Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 1 Aug 2024 07:27:36 -0400
Subject: [PATCH 185/357] Use upstream StatisticsConveter (#11479)

---
 datafusion-examples/examples/parquet_index.rs |    6 +-
 datafusion/core/Cargo.toml                    |    4 -
 datafusion/core/benches/parquet_statistic.rs  |  287 --
 .../src/datasource/file_format/parquet.rs     |    5 +-
 .../datasource/physical_plan/parquet/mod.rs   |    2 -
 .../physical_plan/parquet/page_filter.rs      |    8 +-
 .../physical_plan/parquet/row_group_filter.rs |   24 +-
 .../physical_plan/parquet/statistics.rs       | 2642 -----------------
 .../core/tests/parquet/arrow_statistics.rs    | 2178 --------------
 datafusion/core/tests/parquet/mod.rs          |  429 +--
 10 files changed, 31 insertions(+), 5554 deletions(-)
 delete mode 100644 datafusion/core/benches/parquet_statistic.rs
 delete mode 100644 datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
 delete mode 100644 datafusion/core/tests/parquet/arrow_statistics.rs

diff --git a/datafusion-examples/examples/parquet_index.rs b/datafusion-examples/examples/parquet_index.rs
index 91e178f1f1a53..d6e17764442d5 100644
--- a/datafusion-examples/examples/parquet_index.rs
+++ b/datafusion-examples/examples/parquet_index.rs
@@ -25,12 +25,10 @@ use arrow_schema::SchemaRef;
 use async_trait::async_trait;
 use datafusion::catalog::Session;
 use datafusion::datasource::listing::PartitionedFile;
-use datafusion::datasource::physical_plan::{
-    parquet::StatisticsConverter,
-    {FileScanConfig, ParquetExec},
-};
+use datafusion::datasource::physical_plan::{FileScanConfig, ParquetExec};
 use datafusion::datasource::TableProvider;
 use datafusion::execution::object_store::ObjectStoreUrl;
+use datafusion::parquet::arrow::arrow_reader::statistics::StatisticsConverter;
 use datafusion::parquet::arrow::{
     arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter,
 };
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 09b90a56d2aaf..0415c3164b38c 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -217,10 +217,6 @@ name = "sort"
 harness = false
 name = "topk_aggregate"
 
-[[bench]]
-harness = false
-name = "parquet_statistic"
-
 [[bench]]
 harness = false
 name = "map_query_sql"
diff --git a/datafusion/core/benches/parquet_statistic.rs b/datafusion/core/benches/parquet_statistic.rs
deleted file mode 100644
index 3595e8773b070..0000000000000
--- a/datafusion/core/benches/parquet_statistic.rs
+++ /dev/null
@@ -1,287 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Benchmarks of benchmark for extracting arrow statistics from parquet
-
-use arrow::array::{ArrayRef, DictionaryArray, Float64Array, StringArray, UInt64Array};
-use arrow_array::{Int32Array, Int64Array, RecordBatch};
-use arrow_schema::{
-    DataType::{self, *},
-    Field, Schema,
-};
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
-use datafusion::datasource::physical_plan::parquet::StatisticsConverter;
-use parquet::{
-    arrow::arrow_reader::ArrowReaderOptions, file::properties::WriterProperties,
-};
-use parquet::{
-    arrow::{arrow_reader::ArrowReaderBuilder, ArrowWriter},
-    file::properties::EnabledStatistics,
-};
-use std::sync::Arc;
-use tempfile::NamedTempFile;
-#[derive(Debug, Clone)]
-enum TestTypes {
-    UInt64,
-    Int64,
-    F64,
-    String,
-    Dictionary,
-}
-
-use std::fmt;
-
-impl fmt::Display for TestTypes {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            TestTypes::UInt64 => write!(f, "UInt64"),
-            TestTypes::Int64 => write!(f, "Int64"),
-            TestTypes::F64 => write!(f, "F64"),
-            TestTypes::String => write!(f, "String"),
-            TestTypes::Dictionary => write!(f, "Dictionary(Int32, String)"),
-        }
-    }
-}
-
-fn create_parquet_file(
-    dtype: TestTypes,
-    row_groups: usize,
-    data_page_row_count_limit: &Option<usize>,
-) -> NamedTempFile {
-    let schema = match dtype {
-        TestTypes::UInt64 => {
-            Arc::new(Schema::new(vec![Field::new("col", DataType::UInt64, true)]))
-        }
-        TestTypes::Int64 => {
-            Arc::new(Schema::new(vec![Field::new("col", DataType::Int64, true)]))
-        }
-        TestTypes::F64 => Arc::new(Schema::new(vec![Field::new(
-            "col",
-            DataType::Float64,
-            true,
-        )])),
-        TestTypes::String => {
-            Arc::new(Schema::new(vec![Field::new("col", DataType::Utf8, true)]))
-        }
-        TestTypes::Dictionary => Arc::new(Schema::new(vec![Field::new(
-            "col",
-            DataType::Dictionary(Box::new(Int32), Box::new(Utf8)),
-            true,
-        )])),
-    };
-
-    let mut props = WriterProperties::builder().set_max_row_group_size(row_groups);
-    if let Some(limit) = data_page_row_count_limit {
-        props = props
-            .set_data_page_row_count_limit(*limit)
-            .set_statistics_enabled(EnabledStatistics::Page);
-    };
-    let props = props.build();
-
-    let file = tempfile::Builder::new()
-        .suffix(".parquet")
-        .tempfile()
-        .unwrap();
-    let mut writer =
-        ArrowWriter::try_new(file.reopen().unwrap(), schema.clone(), Some(props))
-            .unwrap();
-
-    for _ in 0..row_groups {
-        let batch = match dtype {
-            TestTypes::UInt64 => make_uint64_batch(),
-            TestTypes::Int64 => make_int64_batch(),
-            TestTypes::F64 => make_f64_batch(),
-            TestTypes::String => make_string_batch(),
-            TestTypes::Dictionary => make_dict_batch(),
-        };
-        if data_page_row_count_limit.is_some() {
-            // Send batches one at a time. This allows the
-            // writer to apply the page limit, that is only
-            // checked on RecordBatch boundaries.
-            for i in 0..batch.num_rows() {
-                writer.write(&batch.slice(i, 1)).unwrap();
-            }
-        } else {
-            writer.write(&batch).unwrap();
-        }
-    }
-    writer.close().unwrap();
-    file
-}
-
-fn make_uint64_batch() -> RecordBatch {
-    let array: ArrayRef = Arc::new(UInt64Array::from(vec![
-        Some(1),
-        Some(2),
-        Some(3),
-        Some(4),
-        Some(5),
-    ]));
-    RecordBatch::try_new(
-        Arc::new(arrow::datatypes::Schema::new(vec![
-            arrow::datatypes::Field::new("col", UInt64, false),
-        ])),
-        vec![array],
-    )
-    .unwrap()
-}
-
-fn make_int64_batch() -> RecordBatch {
-    let array: ArrayRef = Arc::new(Int64Array::from(vec![
-        Some(1),
-        Some(2),
-        Some(3),
-        Some(4),
-        Some(5),
-    ]));
-    RecordBatch::try_new(
-        Arc::new(arrow::datatypes::Schema::new(vec![
-            arrow::datatypes::Field::new("col", Int64, false),
-        ])),
-        vec![array],
-    )
-    .unwrap()
-}
-
-fn make_f64_batch() -> RecordBatch {
-    let array: ArrayRef = Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]));
-    RecordBatch::try_new(
-        Arc::new(arrow::datatypes::Schema::new(vec![
-            arrow::datatypes::Field::new("col", Float64, false),
-        ])),
-        vec![array],
-    )
-    .unwrap()
-}
-
-fn make_string_batch() -> RecordBatch {
-    let array: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"]));
-    RecordBatch::try_new(
-        Arc::new(arrow::datatypes::Schema::new(vec![
-            arrow::datatypes::Field::new("col", Utf8, false),
-        ])),
-        vec![array],
-    )
-    .unwrap()
-}
-
-fn make_dict_batch() -> RecordBatch {
-    let keys = Int32Array::from(vec![0, 1, 2, 3, 4]);
-    let values = StringArray::from(vec!["a", "b", "c", "d", "e"]);
-    let array: ArrayRef =
-        Arc::new(DictionaryArray::try_new(keys, Arc::new(values)).unwrap());
-    RecordBatch::try_new(
-        Arc::new(Schema::new(vec![Field::new(
-            "col",
-            Dictionary(Box::new(Int32), Box::new(Utf8)),
-            false,
-        )])),
-        vec![array],
-    )
-    .unwrap()
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let row_groups = 100;
-    use TestTypes::*;
-    let types = vec![Int64, UInt64, F64, String, Dictionary];
-    let data_page_row_count_limits = vec![None, Some(1)];
-
-    for dtype in types {
-        for data_page_row_count_limit in &data_page_row_count_limits {
-            let file =
-                create_parquet_file(dtype.clone(), row_groups, data_page_row_count_limit);
-            let file = file.reopen().unwrap();
-            let options = ArrowReaderOptions::new().with_page_index(true);
-            let reader = ArrowReaderBuilder::try_new_with_options(file, options).unwrap();
-            let metadata = reader.metadata();
-            let row_groups = metadata.row_groups();
-            let row_group_indices: Vec<_> = (0..row_groups.len()).collect();
-
-            let statistic_type = if data_page_row_count_limit.is_some() {
-                "data page"
-            } else {
-                "row group"
-            };
-
-            let mut group = c.benchmark_group(format!(
-                "Extract {} statistics for {}",
-                statistic_type,
-                dtype.clone()
-            ));
-            group.bench_function(
-                BenchmarkId::new("extract_statistics", dtype.clone()),
-                |b| {
-                    b.iter(|| {
-                        let converter = StatisticsConverter::try_new(
-                            "col",
-                            reader.schema(),
-                            reader.parquet_schema(),
-                        )
-                        .unwrap();
-
-                        if data_page_row_count_limit.is_some() {
-                            let column_page_index = reader
-                                .metadata()
-                                .column_index()
-                                .expect("File should have column page indices");
-
-                            let column_offset_index = reader
-                                .metadata()
-                                .offset_index()
-                                .expect("File should have column offset indices");
-
-                            let _ = converter.data_page_mins(
-                                column_page_index,
-                                column_offset_index,
-                                &row_group_indices,
-                            );
-                            let _ = converter.data_page_maxes(
-                                column_page_index,
-                                column_offset_index,
-                                &row_group_indices,
-                            );
-                            let _ = converter.data_page_null_counts(
-                                column_page_index,
-                                column_offset_index,
-                                &row_group_indices,
-                            );
-                            let _ = converter.data_page_row_counts(
-                                column_offset_index,
-                                row_groups,
-                                &row_group_indices,
-                            );
-                        } else {
-                            let _ = converter.row_group_mins(row_groups.iter()).unwrap();
-                            let _ = converter.row_group_maxes(row_groups.iter()).unwrap();
-                            let _ = converter
-                                .row_group_null_counts(row_groups.iter())
-                                .unwrap();
-                            let _ = converter
-                                .row_group_row_counts(row_groups.iter())
-                                .unwrap();
-                        }
-                    })
-                },
-            );
-            group.finish();
-        }
-    }
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 25956665d56c4..829b69c297ee5 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -75,12 +75,11 @@ use tokio::io::{AsyncWrite, AsyncWriteExt};
 use tokio::sync::mpsc::{self, Receiver, Sender};
 use tokio::task::JoinSet;
 
-use crate::datasource::physical_plan::parquet::{
-    ParquetExecBuilder, StatisticsConverter,
-};
+use crate::datasource::physical_plan::parquet::ParquetExecBuilder;
 use futures::{StreamExt, TryStreamExt};
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
+use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
 
 /// Initial writing buffer size. Note this is just a size hint for efficiency. It
 /// will grow beyond the set value if needed.
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index ed71d871b3fd3..91e2f8866bffb 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -52,7 +52,6 @@ mod page_filter;
 mod reader;
 mod row_filter;
 mod row_group_filter;
-mod statistics;
 mod writer;
 
 use crate::datasource::schema_adapter::{
@@ -62,7 +61,6 @@ pub use access_plan::{ParquetAccessPlan, RowGroupAccess};
 pub use metrics::ParquetFileMetrics;
 use opener::ParquetOpener;
 pub use reader::{DefaultParquetFileReaderFactory, ParquetFileReaderFactory};
-pub use statistics::StatisticsConverter;
 pub use writer::plan_to_parquet;
 
 /// Execution plan for reading one or more Parquet files.
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
index d658608ab4f1b..e4d26a460ecdf 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs
@@ -17,8 +17,8 @@
 
 //! Contains code to filter entire pages
 
+use super::metrics::ParquetFileMetrics;
 use crate::datasource::physical_plan::parquet::ParquetAccessPlan;
-use crate::datasource::physical_plan::parquet::StatisticsConverter;
 use crate::physical_optimizer::pruning::{PruningPredicate, PruningStatistics};
 use arrow::array::BooleanArray;
 use arrow::{array::ArrayRef, datatypes::SchemaRef};
@@ -26,6 +26,7 @@ use arrow_schema::Schema;
 use datafusion_common::ScalarValue;
 use datafusion_physical_expr::{split_conjunction, PhysicalExpr};
 use log::{debug, trace};
+use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
 use parquet::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex};
 use parquet::format::PageLocation;
 use parquet::schema::types::SchemaDescriptor;
@@ -36,8 +37,6 @@ use parquet::{
 use std::collections::HashSet;
 use std::sync::Arc;
 
-use super::metrics::ParquetFileMetrics;
-
 /// Filters a [`ParquetAccessPlan`] based on the [Parquet PageIndex], if present
 ///
 /// It does so by evaluating statistics from the [`ParquetColumnIndex`] and
@@ -377,7 +376,7 @@ impl<'a> PagesPruningStatistics<'a> {
         converter: StatisticsConverter<'a>,
         parquet_metadata: &'a ParquetMetaData,
     ) -> Option<Self> {
-        let Some(parquet_column_index) = converter.parquet_index() else {
+        let Some(parquet_column_index) = converter.parquet_column_index() else {
             trace!(
                 "Column {:?} not in parquet file, skipping",
                 converter.arrow_field()
@@ -432,7 +431,6 @@ impl<'a> PagesPruningStatistics<'a> {
         Some(vec)
     }
 }
-
 impl<'a> PruningStatistics for PagesPruningStatistics<'a> {
     fn min_values(&self, _column: &datafusion_common::Column) -> Option<ArrayRef> {
         match self.converter.data_page_mins(
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
index 170beb15ead25..6a6910748fc88 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
@@ -15,9 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::datasource::listing::FileRange;
+use crate::physical_optimizer::pruning::{PruningPredicate, PruningStatistics};
 use arrow::{array::ArrayRef, datatypes::Schema};
 use arrow_array::BooleanArray;
 use datafusion_common::{Column, Result, ScalarValue};
+use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
+use parquet::arrow::parquet_column;
 use parquet::basic::Type;
 use parquet::data_type::Decimal;
 use parquet::schema::types::SchemaDescriptor;
@@ -29,11 +33,7 @@ use parquet::{
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
-use crate::datasource::listing::FileRange;
-use crate::datasource::physical_plan::parquet::statistics::parquet_column;
-use crate::physical_optimizer::pruning::{PruningPredicate, PruningStatistics};
-
-use super::{ParquetAccessPlan, ParquetFileMetrics, StatisticsConverter};
+use super::{ParquetAccessPlan, ParquetFileMetrics};
 
 /// Reduces the [`ParquetAccessPlan`] based on row group level metadata.
 ///
@@ -356,20 +356,24 @@ impl<'a> RowGroupPruningStatistics<'a> {
         &'a self,
         column: &'b Column,
     ) -> Result<StatisticsConverter<'a>> {
-        StatisticsConverter::try_new(&column.name, self.arrow_schema, self.parquet_schema)
+        Ok(StatisticsConverter::try_new(
+            &column.name,
+            self.arrow_schema,
+            self.parquet_schema,
+        )?)
     }
 }
 
 impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> {
     fn min_values(&self, column: &Column) -> Option<ArrayRef> {
         self.statistics_converter(column)
-            .and_then(|c| c.row_group_mins(self.metadata_iter()))
+            .and_then(|c| Ok(c.row_group_mins(self.metadata_iter())?))
             .ok()
     }
 
     fn max_values(&self, column: &Column) -> Option<ArrayRef> {
         self.statistics_converter(column)
-            .and_then(|c| c.row_group_maxes(self.metadata_iter()))
+            .and_then(|c| Ok(c.row_group_maxes(self.metadata_iter())?))
             .ok()
     }
 
@@ -379,7 +383,7 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> {
 
     fn null_counts(&self, column: &Column) -> Option<ArrayRef> {
         self.statistics_converter(column)
-            .and_then(|c| c.row_group_null_counts(self.metadata_iter()))
+            .and_then(|c| Ok(c.row_group_null_counts(self.metadata_iter())?))
             .ok()
             .map(|counts| Arc::new(counts) as ArrayRef)
     }
@@ -387,7 +391,7 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> {
     fn row_counts(&self, column: &Column) -> Option<ArrayRef> {
         // row counts are the same for all columns in a row group
         self.statistics_converter(column)
-            .and_then(|c| c.row_group_row_counts(self.metadata_iter()))
+            .and_then(|c| Ok(c.row_group_row_counts(self.metadata_iter())?))
             .ok()
             .flatten()
             .map(|counts| Arc::new(counts) as ArrayRef)
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
deleted file mode 100644
index 11b8f5fc6c79a..0000000000000
--- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
+++ /dev/null
@@ -1,2642 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! [`StatisticsConverter`] to convert statistics in parquet format to arrow [`ArrayRef`].
-
-// TODO: potentially move this to arrow-rs: https://github.com/apache/arrow-rs/issues/4328
-
-use arrow::array::{
-    BooleanBuilder, FixedSizeBinaryBuilder, LargeStringBuilder, StringBuilder,
-};
-use arrow::datatypes::i256;
-use arrow::{array::ArrayRef, datatypes::DataType};
-use arrow_array::{
-    new_empty_array, new_null_array, BinaryArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, Decimal256Array, Float16Array, Float32Array, Float64Array,
-    Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
-    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
-    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
-    UInt64Array, UInt8Array,
-};
-use arrow_schema::{Field, FieldRef, Schema, TimeUnit};
-use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result};
-use half::f16;
-use parquet::data_type::{ByteArray, FixedLenByteArray};
-use parquet::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex, RowGroupMetaData};
-use parquet::file::page_index::index::{Index, PageIndex};
-use parquet::file::statistics::Statistics as ParquetStatistics;
-use parquet::schema::types::SchemaDescriptor;
-use paste::paste;
-use std::sync::Arc;
-
-// Convert the bytes array to i128.
-// The endian of the input bytes array must be big-endian.
-pub(crate) fn from_bytes_to_i128(b: &[u8]) -> i128 {
-    // The bytes array are from parquet file and must be the big-endian.
-    // The endian is defined by parquet format, and the reference document
-    // https://github.com/apache/parquet-format/blob/54e53e5d7794d383529dd30746378f19a12afd58/src/main/thrift/parquet.thrift#L66
-    i128::from_be_bytes(sign_extend_be::<16>(b))
-}
-
-// Convert the bytes array to i256.
-// The endian of the input bytes array must be big-endian.
-pub(crate) fn from_bytes_to_i256(b: &[u8]) -> i256 {
-    i256::from_be_bytes(sign_extend_be::<32>(b))
-}
-
-// Convert the bytes array to f16
-pub(crate) fn from_bytes_to_f16(b: &[u8]) -> Option<f16> {
-    match b {
-        [low, high] => Some(f16::from_be_bytes([*high, *low])),
-        _ => None,
-    }
-}
-
-// Copy from arrow-rs
-// https://github.com/apache/arrow-rs/blob/198af7a3f4aa20f9bd003209d9f04b0f37bb120e/parquet/src/arrow/buffer/bit_util.rs#L54
-// Convert the byte slice to fixed length byte array with the length of N.
-fn sign_extend_be<const N: usize>(b: &[u8]) -> [u8; N] {
-    assert!(b.len() <= N, "Array too large, expected less than {N}");
-    let is_negative = (b[0] & 128u8) == 128u8;
-    let mut result = if is_negative { [255u8; N] } else { [0u8; N] };
-    for (d, s) in result.iter_mut().skip(N - b.len()).zip(b) {
-        *d = *s;
-    }
-    result
-}
-
-/// Define an adapter iterator for extracting statistics from an iterator of
-/// `ParquetStatistics`
-///
-///
-/// Handles checking if the statistics are present and valid with the correct type.
-///
-/// Parameters:
-/// * `$iterator_type` is the name of the iterator type (e.g. `MinBooleanStatsIterator`)
-/// * `$func` is the function to call to get the value (e.g. `min` or `max`)
-/// * `$parquet_statistics_type` is the type of the statistics (e.g. `ParquetStatistics::Boolean`)
-/// * `$stat_value_type` is the type of the statistics value (e.g. `bool`)
-macro_rules! make_stats_iterator {
-    ($iterator_type:ident, $func:ident, $parquet_statistics_type:path, $stat_value_type:ty) => {
-        /// Maps an iterator of `ParquetStatistics` into an iterator of
-        /// `&$stat_value_type``
-        ///
-        /// Yielded elements:
-        /// * Some(stats) if valid
-        /// * None if the statistics are not present, not valid, or not $stat_value_type
-        struct $iterator_type<'a, I>
-        where
-            I: Iterator<Item = Option<&'a ParquetStatistics>>,
-        {
-            iter: I,
-        }
-
-        impl<'a, I> $iterator_type<'a, I>
-        where
-            I: Iterator<Item = Option<&'a ParquetStatistics>>,
-        {
-            /// Create a new iterator to extract the statistics
-            fn new(iter: I) -> Self {
-                Self { iter }
-            }
-        }
-
-        /// Implement the Iterator trait for the iterator
-        impl<'a, I> Iterator for $iterator_type<'a, I>
-        where
-            I: Iterator<Item = Option<&'a ParquetStatistics>>,
-        {
-            type Item = Option<&'a $stat_value_type>;
-
-            /// return the next statistics value
-            fn next(&mut self) -> Option<Self::Item> {
-                let next = self.iter.next();
-                next.map(|x| {
-                    x.and_then(|stats| match stats {
-                        $parquet_statistics_type(s) if stats.has_min_max_set() => {
-                            Some(s.$func())
-                        }
-                        _ => None,
-                    })
-                })
-            }
-
-            fn size_hint(&self) -> (usize, Option<usize>) {
-                self.iter.size_hint()
-            }
-        }
-    };
-}
-
-make_stats_iterator!(
-    MinBooleanStatsIterator,
-    min,
-    ParquetStatistics::Boolean,
-    bool
-);
-make_stats_iterator!(
-    MaxBooleanStatsIterator,
-    max,
-    ParquetStatistics::Boolean,
-    bool
-);
-make_stats_iterator!(MinInt32StatsIterator, min, ParquetStatistics::Int32, i32);
-make_stats_iterator!(MaxInt32StatsIterator, max, ParquetStatistics::Int32, i32);
-make_stats_iterator!(MinInt64StatsIterator, min, ParquetStatistics::Int64, i64);
-make_stats_iterator!(MaxInt64StatsIterator, max, ParquetStatistics::Int64, i64);
-make_stats_iterator!(MinFloatStatsIterator, min, ParquetStatistics::Float, f32);
-make_stats_iterator!(MaxFloatStatsIterator, max, ParquetStatistics::Float, f32);
-make_stats_iterator!(MinDoubleStatsIterator, min, ParquetStatistics::Double, f64);
-make_stats_iterator!(MaxDoubleStatsIterator, max, ParquetStatistics::Double, f64);
-make_stats_iterator!(
-    MinByteArrayStatsIterator,
-    min_bytes,
-    ParquetStatistics::ByteArray,
-    [u8]
-);
-make_stats_iterator!(
-    MaxByteArrayStatsIterator,
-    max_bytes,
-    ParquetStatistics::ByteArray,
-    [u8]
-);
-make_stats_iterator!(
-    MinFixedLenByteArrayStatsIterator,
-    min_bytes,
-    ParquetStatistics::FixedLenByteArray,
-    [u8]
-);
-make_stats_iterator!(
-    MaxFixedLenByteArrayStatsIterator,
-    max_bytes,
-    ParquetStatistics::FixedLenByteArray,
-    [u8]
-);
-
-/// Special iterator adapter for extracting i128 values from from an iterator of
-/// `ParquetStatistics`
-///
-/// Handles checking if the statistics are present and valid with the correct type.
-///
-/// Depending on the parquet file, the statistics for `Decimal128` can be stored as
-/// `Int32`, `Int64` or `ByteArray` or `FixedSizeByteArray` :mindblown:
-///
-/// This iterator handles all cases, extracting the values
-/// and converting it to `stat_value_type`.
-///
-/// Parameters:
-/// * `$iterator_type` is the name of the iterator type (e.g. `MinBooleanStatsIterator`)
-/// * `$func` is the function to call to get the value (e.g. `min` or `max`)
-/// * `$bytes_func` is the function to call to get the value as bytes (e.g. `min_bytes` or `max_bytes`)
-/// * `$stat_value_type` is the type of the statistics value (e.g. `i128`)
-/// * `convert_func` is the function to convert the bytes to stats value (e.g. `from_bytes_to_i128`)
-macro_rules! make_decimal_stats_iterator {
-    ($iterator_type:ident, $func:ident, $bytes_func:ident, $stat_value_type:ident, $convert_func: ident) => {
-        struct $iterator_type<'a, I>
-        where
-            I: Iterator<Item = Option<&'a ParquetStatistics>>,
-        {
-            iter: I,
-        }
-
-        impl<'a, I> $iterator_type<'a, I>
-        where
-            I: Iterator<Item = Option<&'a ParquetStatistics>>,
-        {
-            fn new(iter: I) -> Self {
-                Self { iter }
-            }
-        }
-
-        impl<'a, I> Iterator for $iterator_type<'a, I>
-        where
-            I: Iterator<Item = Option<&'a ParquetStatistics>>,
-        {
-            type Item = Option<$stat_value_type>;
-
-            fn next(&mut self) -> Option<Self::Item> {
-                let next = self.iter.next();
-                next.map(|x| {
-                    x.and_then(|stats| {
-                        if !stats.has_min_max_set() {
-                            return None;
-                        }
-                        match stats {
-                            ParquetStatistics::Int32(s) => {
-                                Some($stat_value_type::from(*s.$func()))
-                            }
-                            ParquetStatistics::Int64(s) => {
-                                Some($stat_value_type::from(*s.$func()))
-                            }
-                            ParquetStatistics::ByteArray(s) => {
-                                Some($convert_func(s.$bytes_func()))
-                            }
-                            ParquetStatistics::FixedLenByteArray(s) => {
-                                Some($convert_func(s.$bytes_func()))
-                            }
-                            _ => None,
-                        }
-                    })
-                })
-            }
-
-            fn size_hint(&self) -> (usize, Option<usize>) {
-                self.iter.size_hint()
-            }
-        }
-    };
-}
-
-make_decimal_stats_iterator!(
-    MinDecimal128StatsIterator,
-    min,
-    min_bytes,
-    i128,
-    from_bytes_to_i128
-);
-make_decimal_stats_iterator!(
-    MaxDecimal128StatsIterator,
-    max,
-    max_bytes,
-    i128,
-    from_bytes_to_i128
-);
-make_decimal_stats_iterator!(
-    MinDecimal256StatsIterator,
-    min,
-    min_bytes,
-    i256,
-    from_bytes_to_i256
-);
-make_decimal_stats_iterator!(
-    MaxDecimal256StatsIterator,
-    max,
-    max_bytes,
-    i256,
-    from_bytes_to_i256
-);
-
-/// Special macro to combine the statistics iterators for min and max using the [`mod@paste`] macro.
-/// This is used to avoid repeating the same code for min and max statistics extractions
-///
-/// Parameters:
-/// stat_type_prefix: The prefix of the statistics iterator type (e.g. `Min` or `Max`)
-/// data_type: The data type of the statistics (e.g. `DataType::Int32`)
-/// iterator: The iterator of [`ParquetStatistics`] to extract the statistics from.
-macro_rules! get_statistics {
-    ($stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
-        paste! {
-        match $data_type {
-            DataType::Boolean => Ok(Arc::new(BooleanArray::from_iter(
-                [<$stat_type_prefix BooleanStatsIterator>]::new($iterator).map(|x| x.copied()),
-            ))),
-            DataType::Int8 => Ok(Arc::new(Int8Array::from_iter(
-                [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| {
-                    x.and_then(|x| i8::try_from(*x).ok())
-                }),
-            ))),
-            DataType::Int16 => Ok(Arc::new(Int16Array::from_iter(
-                [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| {
-                    x.and_then(|x| i16::try_from(*x).ok())
-                }),
-            ))),
-            DataType::Int32 => Ok(Arc::new(Int32Array::from_iter(
-                [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| x.copied()),
-            ))),
-            DataType::Int64 => Ok(Arc::new(Int64Array::from_iter(
-                [<$stat_type_prefix Int64StatsIterator>]::new($iterator).map(|x| x.copied()),
-            ))),
-            DataType::UInt8 => Ok(Arc::new(UInt8Array::from_iter(
-                [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| {
-                    x.and_then(|x| u8::try_from(*x).ok())
-                }),
-            ))),
-            DataType::UInt16 => Ok(Arc::new(UInt16Array::from_iter(
-                [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| {
-                    x.and_then(|x| u16::try_from(*x).ok())
-                }),
-            ))),
-            DataType::UInt32 => Ok(Arc::new(UInt32Array::from_iter(
-                [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| x.map(|x| *x as u32)),
-            ))),
-            DataType::UInt64 => Ok(Arc::new(UInt64Array::from_iter(
-                [<$stat_type_prefix Int64StatsIterator>]::new($iterator).map(|x| x.map(|x| *x as u64)),
-            ))),
-            DataType::Float16 => Ok(Arc::new(Float16Array::from_iter(
-                [<$stat_type_prefix FixedLenByteArrayStatsIterator>]::new($iterator).map(|x| x.and_then(|x| {
-                    from_bytes_to_f16(x)
-                })),
-            ))),
-            DataType::Float32 => Ok(Arc::new(Float32Array::from_iter(
-                [<$stat_type_prefix FloatStatsIterator>]::new($iterator).map(|x| x.copied()),
-            ))),
-            DataType::Float64 => Ok(Arc::new(Float64Array::from_iter(
-                [<$stat_type_prefix DoubleStatsIterator>]::new($iterator).map(|x| x.copied()),
-            ))),
-            DataType::Date32 => Ok(Arc::new(Date32Array::from_iter(
-                [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| x.copied()),
-            ))),
-            DataType::Date64 => Ok(Arc::new(Date64Array::from_iter(
-                [<$stat_type_prefix Int32StatsIterator>]::new($iterator)
-                    .map(|x| x.map(|x| i64::from(*x) * 24 * 60 * 60 * 1000)),
-            ))),
-            DataType::Timestamp(unit, timezone) =>{
-                let iter = [<$stat_type_prefix Int64StatsIterator>]::new($iterator).map(|x| x.copied());
-                Ok(match unit {
-                    TimeUnit::Second => Arc::new(TimestampSecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
-                    TimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
-                    TimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
-                    TimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
-                })
-            },
-            DataType::Time32(unit) => {
-                Ok(match unit {
-                    TimeUnit::Second =>  Arc::new(Time32SecondArray::from_iter(
-                        [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| x.copied()),
-                    )),
-                    TimeUnit::Millisecond => Arc::new(Time32MillisecondArray::from_iter(
-                        [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| x.copied()),
-                    )),
-                    _ => {
-                        let len = $iterator.count();
-                        // don't know how to extract statistics, so return a null array
-                        new_null_array($data_type, len)
-                    }
-                })
-            },
-            DataType::Time64(unit) => {
-                Ok(match unit {
-                    TimeUnit::Microsecond =>  Arc::new(Time64MicrosecondArray::from_iter(
-                        [<$stat_type_prefix Int64StatsIterator>]::new($iterator).map(|x| x.copied()),
-                    )),
-                    TimeUnit::Nanosecond => Arc::new(Time64NanosecondArray::from_iter(
-                        [<$stat_type_prefix Int64StatsIterator>]::new($iterator).map(|x| x.copied()),
-                    )),
-                    _ => {
-                        let len = $iterator.count();
-                        // don't know how to extract statistics, so return a null array
-                        new_null_array($data_type, len)
-                    }
-                })
-            },
-            DataType::Binary => Ok(Arc::new(BinaryArray::from_iter(
-                [<$stat_type_prefix ByteArrayStatsIterator>]::new($iterator)
-            ))),
-            DataType::LargeBinary => Ok(Arc::new(LargeBinaryArray::from_iter(
-                [<$stat_type_prefix ByteArrayStatsIterator>]::new($iterator)
-            ))),
-            DataType::Utf8 => {
-                let iterator = [<$stat_type_prefix ByteArrayStatsIterator>]::new($iterator);
-                let mut builder = StringBuilder::new();
-                for x in iterator {
-                    let Some(x) = x else {
-                        builder.append_null(); // no statistics value
-                        continue;
-                    };
-
-                    let Ok(x) = std::str::from_utf8(x) else {
-                        log::debug!("Utf8 statistics is a non-UTF8 value, ignoring it.");
-                        builder.append_null();
-                        continue;
-                    };
-
-                    builder.append_value(x);
-                }
-                Ok(Arc::new(builder.finish()))
-            },
-            DataType::LargeUtf8 => {
-                let iterator = [<$stat_type_prefix ByteArrayStatsIterator>]::new($iterator);
-                let mut builder = LargeStringBuilder::new();
-                for x in iterator {
-                    let Some(x) = x else {
-                        builder.append_null(); // no statistics value
-                        continue;
-                    };
-
-                    let Ok(x) = std::str::from_utf8(x) else {
-                        log::debug!("Utf8 statistics is a non-UTF8 value, ignoring it.");
-                        builder.append_null();
-                        continue;
-                    };
-
-                    builder.append_value(x);
-                }
-                Ok(Arc::new(builder.finish()))
-            },
-            DataType::FixedSizeBinary(size) => {
-                let iterator = [<$stat_type_prefix FixedLenByteArrayStatsIterator>]::new($iterator);
-                let mut builder = FixedSizeBinaryBuilder::new(*size);
-                for x in iterator {
-                    let Some(x) = x else {
-                        builder.append_null(); // no statistics value
-                        continue;
-                    };
-
-                    // ignore invalid values
-                    if x.len().try_into() != Ok(*size){
-                        log::debug!(
-                            "FixedSizeBinary({}) statistics is a binary of size {}, ignoring it.",
-                            size,
-                            x.len(),
-                        );
-                        builder.append_null();
-                        continue;
-                    }
-
-                    builder.append_value(x).expect("ensure to append successfully here, because size have been checked before");
-                }
-                Ok(Arc::new(builder.finish()))
-            },
-            DataType::Decimal128(precision, scale) => {
-                let arr = Decimal128Array::from_iter(
-                    [<$stat_type_prefix Decimal128StatsIterator>]::new($iterator)
-                ).with_precision_and_scale(*precision, *scale)?;
-                Ok(Arc::new(arr))
-            },
-            DataType::Decimal256(precision, scale) => {
-                let arr = Decimal256Array::from_iter(
-                    [<$stat_type_prefix Decimal256StatsIterator>]::new($iterator)
-                ).with_precision_and_scale(*precision, *scale)?;
-                Ok(Arc::new(arr))
-            },
-            DataType::Dictionary(_, value_type) => {
-                [<$stat_type_prefix:lower _ statistics>](value_type, $iterator)
-            }
-
-            DataType::Map(_,_) |
-            DataType::Duration(_) |
-            DataType::Interval(_) |
-            DataType::Null |
-            DataType::BinaryView |
-            DataType::Utf8View |
-            DataType::List(_) |
-            DataType::ListView(_) |
-            DataType::FixedSizeList(_, _) |
-            DataType::LargeList(_) |
-            DataType::LargeListView(_) |
-            DataType::Struct(_) |
-            DataType::Union(_, _) |
-            DataType::RunEndEncoded(_, _) => {
-                let len = $iterator.count();
-                // don't know how to extract statistics, so return a null array
-                Ok(new_null_array($data_type, len))
-            }
-        }}}
-}
-
-macro_rules! make_data_page_stats_iterator {
-    ($iterator_type: ident, $func: expr, $index_type: path, $stat_value_type: ty) => {
-        struct $iterator_type<'a, I>
-        where
-            I: Iterator<Item = (usize, &'a Index)>,
-        {
-            iter: I,
-        }
-
-        impl<'a, I> $iterator_type<'a, I>
-        where
-            I: Iterator<Item = (usize, &'a Index)>,
-        {
-            fn new(iter: I) -> Self {
-                Self { iter }
-            }
-        }
-
-        impl<'a, I> Iterator for $iterator_type<'a, I>
-        where
-            I: Iterator<Item = (usize, &'a Index)>,
-        {
-            type Item = Vec<Option<$stat_value_type>>;
-
-            fn next(&mut self) -> Option<Self::Item> {
-                let next = self.iter.next();
-                match next {
-                    Some((len, index)) => match index {
-                        $index_type(native_index) => Some(
-                            native_index
-                                .indexes
-                                .iter()
-                                .map(|x| $func(x))
-                                .collect::<Vec<_>>(),
-                        ),
-                        // No matching `Index` found;
-                        // thus no statistics that can be extracted.
-                        // We return vec![None; len] to effectively
-                        // create an arrow null-array with the length
-                        // corresponding to the number of entries in
-                        // `ParquetOffsetIndex` per row group per column.
-                        _ => Some(vec![None; len]),
-                    },
-                    _ => None,
-                }
-            }
-
-            fn size_hint(&self) -> (usize, Option<usize>) {
-                self.iter.size_hint()
-            }
-        }
-    };
-}
-
-make_data_page_stats_iterator!(
-    MinBooleanDataPageStatsIterator,
-    |x: &PageIndex<bool>| { x.min },
-    Index::BOOLEAN,
-    bool
-);
-make_data_page_stats_iterator!(
-    MaxBooleanDataPageStatsIterator,
-    |x: &PageIndex<bool>| { x.max },
-    Index::BOOLEAN,
-    bool
-);
-make_data_page_stats_iterator!(
-    MinInt32DataPageStatsIterator,
-    |x: &PageIndex<i32>| { x.min },
-    Index::INT32,
-    i32
-);
-make_data_page_stats_iterator!(
-    MaxInt32DataPageStatsIterator,
-    |x: &PageIndex<i32>| { x.max },
-    Index::INT32,
-    i32
-);
-make_data_page_stats_iterator!(
-    MinInt64DataPageStatsIterator,
-    |x: &PageIndex<i64>| { x.min },
-    Index::INT64,
-    i64
-);
-make_data_page_stats_iterator!(
-    MaxInt64DataPageStatsIterator,
-    |x: &PageIndex<i64>| { x.max },
-    Index::INT64,
-    i64
-);
-make_data_page_stats_iterator!(
-    MinFloat16DataPageStatsIterator,
-    |x: &PageIndex<FixedLenByteArray>| { x.min.clone() },
-    Index::FIXED_LEN_BYTE_ARRAY,
-    FixedLenByteArray
-);
-make_data_page_stats_iterator!(
-    MaxFloat16DataPageStatsIterator,
-    |x: &PageIndex<FixedLenByteArray>| { x.max.clone() },
-    Index::FIXED_LEN_BYTE_ARRAY,
-    FixedLenByteArray
-);
-make_data_page_stats_iterator!(
-    MinFloat32DataPageStatsIterator,
-    |x: &PageIndex<f32>| { x.min },
-    Index::FLOAT,
-    f32
-);
-make_data_page_stats_iterator!(
-    MaxFloat32DataPageStatsIterator,
-    |x: &PageIndex<f32>| { x.max },
-    Index::FLOAT,
-    f32
-);
-make_data_page_stats_iterator!(
-    MinFloat64DataPageStatsIterator,
-    |x: &PageIndex<f64>| { x.min },
-    Index::DOUBLE,
-    f64
-);
-make_data_page_stats_iterator!(
-    MaxFloat64DataPageStatsIterator,
-    |x: &PageIndex<f64>| { x.max },
-    Index::DOUBLE,
-    f64
-);
-make_data_page_stats_iterator!(
-    MinByteArrayDataPageStatsIterator,
-    |x: &PageIndex<ByteArray>| { x.min.clone() },
-    Index::BYTE_ARRAY,
-    ByteArray
-);
-make_data_page_stats_iterator!(
-    MaxByteArrayDataPageStatsIterator,
-    |x: &PageIndex<ByteArray>| { x.max.clone() },
-    Index::BYTE_ARRAY,
-    ByteArray
-);
-make_data_page_stats_iterator!(
-    MaxFixedLenByteArrayDataPageStatsIterator,
-    |x: &PageIndex<FixedLenByteArray>| { x.max.clone() },
-    Index::FIXED_LEN_BYTE_ARRAY,
-    FixedLenByteArray
-);
-
-make_data_page_stats_iterator!(
-    MinFixedLenByteArrayDataPageStatsIterator,
-    |x: &PageIndex<FixedLenByteArray>| { x.min.clone() },
-    Index::FIXED_LEN_BYTE_ARRAY,
-    FixedLenByteArray
-);
-
-macro_rules! get_decimal_page_stats_iterator {
-    ($iterator_type: ident, $func: ident, $stat_value_type: ident, $convert_func: ident) => {
-        struct $iterator_type<'a, I>
-        where
-            I: Iterator<Item = (usize, &'a Index)>,
-        {
-            iter: I,
-        }
-
-        impl<'a, I> $iterator_type<'a, I>
-        where
-            I: Iterator<Item = (usize, &'a Index)>,
-        {
-            fn new(iter: I) -> Self {
-                Self { iter }
-            }
-        }
-
-        impl<'a, I> Iterator for $iterator_type<'a, I>
-        where
-            I: Iterator<Item = (usize, &'a Index)>,
-        {
-            type Item = Vec<Option<$stat_value_type>>;
-
-            fn next(&mut self) -> Option<Self::Item> {
-                let next = self.iter.next();
-                match next {
-                    Some((len, index)) => match index {
-                        Index::INT32(native_index) => Some(
-                            native_index
-                                .indexes
-                                .iter()
-                                .map(|x| {
-                                    x.$func.and_then(|x| Some($stat_value_type::from(x)))
-                                })
-                                .collect::<Vec<_>>(),
-                        ),
-                        Index::INT64(native_index) => Some(
-                            native_index
-                                .indexes
-                                .iter()
-                                .map(|x| {
-                                    x.$func.and_then(|x| Some($stat_value_type::from(x)))
-                                })
-                                .collect::<Vec<_>>(),
-                        ),
-                        Index::BYTE_ARRAY(native_index) => Some(
-                            native_index
-                                .indexes
-                                .iter()
-                                .map(|x| {
-                                    x.clone()
-                                        .$func
-                                        .and_then(|x| Some($convert_func(x.data())))
-                                })
-                                .collect::<Vec<_>>(),
-                        ),
-                        Index::FIXED_LEN_BYTE_ARRAY(native_index) => Some(
-                            native_index
-                                .indexes
-                                .iter()
-                                .map(|x| {
-                                    x.clone()
-                                        .$func
-                                        .and_then(|x| Some($convert_func(x.data())))
-                                })
-                                .collect::<Vec<_>>(),
-                        ),
-                        _ => Some(vec![None; len]),
-                    },
-                    _ => None,
-                }
-            }
-
-            fn size_hint(&self) -> (usize, Option<usize>) {
-                self.iter.size_hint()
-            }
-        }
-    };
-}
-
-get_decimal_page_stats_iterator!(
-    MinDecimal128DataPageStatsIterator,
-    min,
-    i128,
-    from_bytes_to_i128
-);
-
-get_decimal_page_stats_iterator!(
-    MaxDecimal128DataPageStatsIterator,
-    max,
-    i128,
-    from_bytes_to_i128
-);
-
-get_decimal_page_stats_iterator!(
-    MinDecimal256DataPageStatsIterator,
-    min,
-    i256,
-    from_bytes_to_i256
-);
-
-get_decimal_page_stats_iterator!(
-    MaxDecimal256DataPageStatsIterator,
-    max,
-    i256,
-    from_bytes_to_i256
-);
-
-macro_rules! get_data_page_statistics {
-    ($stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
-        paste! {
-            match $data_type {
-                Some(DataType::Boolean) => {
-                    let iterator = [<$stat_type_prefix BooleanDataPageStatsIterator>]::new($iterator);
-                    let mut builder = BooleanBuilder::new();
-                    for x in iterator {
-                        for x in x.into_iter() {
-                            let Some(x) = x else {
-                                builder.append_null(); // no statistics value
-                                continue;
-                            };
-                            builder.append_value(x);
-                        }
-                    }
-                    Ok(Arc::new(builder.finish()))
-                },
-                Some(DataType::UInt8) => Ok(Arc::new(
-                    UInt8Array::from_iter(
-                        [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
-                            .map(|x| {
-                                x.into_iter().map(|x| {
-                                    x.and_then(|x| u8::try_from(x).ok())
-                                })
-                            })
-                            .flatten()
-                    )
-                )),
-                Some(DataType::UInt16) => Ok(Arc::new(
-                    UInt16Array::from_iter(
-                        [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
-                            .map(|x| {
-                                x.into_iter().map(|x| {
-                                    x.and_then(|x| u16::try_from(x).ok())
-                                })
-                            })
-                            .flatten()
-                    )
-                )),
-                Some(DataType::UInt32) => Ok(Arc::new(
-                    UInt32Array::from_iter(
-                        [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
-                            .map(|x| {
-                                x.into_iter().map(|x| {
-                                    x.and_then(|x| Some(x as u32))
-                                })
-                            })
-                            .flatten()
-                ))),
-                Some(DataType::UInt64) => Ok(Arc::new(
-                    UInt64Array::from_iter(
-                        [<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator)
-                            .map(|x| {
-                                x.into_iter().map(|x| {
-                                    x.and_then(|x| Some(x as u64))
-                                })
-                            })
-                            .flatten()
-                ))),
-                Some(DataType::Int8) => Ok(Arc::new(
-                    Int8Array::from_iter(
-                        [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
-                            .map(|x| {
-                                x.into_iter().map(|x| {
-                                    x.and_then(|x| i8::try_from(x).ok())
-                                })
-                            })
-                            .flatten()
-                    )
-                )),
-                Some(DataType::Int16) => Ok(Arc::new(
-                    Int16Array::from_iter(
-                        [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
-                            .map(|x| {
-                                x.into_iter().map(|x| {
-                                    x.and_then(|x| i16::try_from(x).ok())
-                                })
-                            })
-                            .flatten()
-                    )
-                )),
-                Some(DataType::Int32) => Ok(Arc::new(Int32Array::from_iter([<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator).flatten()))),
-                Some(DataType::Int64) => Ok(Arc::new(Int64Array::from_iter([<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten()))),
-                Some(DataType::Float16) => Ok(Arc::new(
-                    Float16Array::from_iter(
-                        [<$stat_type_prefix Float16DataPageStatsIterator>]::new($iterator)
-                            .map(|x| {
-                                x.into_iter().map(|x| {
-                                    x.and_then(|x| from_bytes_to_f16(x.data()))
-                                })
-                            })
-                            .flatten()
-                    )
-                )),
-                Some(DataType::Float32) => Ok(Arc::new(Float32Array::from_iter([<$stat_type_prefix Float32DataPageStatsIterator>]::new($iterator).flatten()))),
-                Some(DataType::Float64) => Ok(Arc::new(Float64Array::from_iter([<$stat_type_prefix Float64DataPageStatsIterator>]::new($iterator).flatten()))),
-                Some(DataType::Binary) => Ok(Arc::new(BinaryArray::from_iter([<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator).flatten()))),
-                Some(DataType::LargeBinary) => Ok(Arc::new(LargeBinaryArray::from_iter([<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator).flatten()))),
-                Some(DataType::Utf8) => {
-                    let mut builder = StringBuilder::new();
-                    let iterator = [<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator);
-                    for x in iterator {
-                        for x in x.into_iter() {
-                            let Some(x) = x else {
-                                builder.append_null(); // no statistics value
-                                continue;
-                            };
-
-                            let Ok(x) = std::str::from_utf8(x.data()) else {
-                                log::debug!("Utf8 statistics is a non-UTF8 value, ignoring it.");
-                                builder.append_null();
-                                continue;
-                            };
-
-                            builder.append_value(x);
-                        }
-                    }
-                    Ok(Arc::new(builder.finish()))
-                },
-                Some(DataType::LargeUtf8) => {
-                    let mut builder = LargeStringBuilder::new();
-                    let iterator = [<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator);
-                    for x in iterator {
-                        for x in x.into_iter() {
-                            let Some(x) = x else {
-                                builder.append_null(); // no statistics value
-                                continue;
-                            };
-
-                            let Ok(x) = std::str::from_utf8(x.data()) else {
-                                log::debug!("LargeUtf8 statistics is a non-UTF8 value, ignoring it.");
-                                builder.append_null();
-                                continue;
-                            };
-
-                            builder.append_value(x);
-                        }
-                    }
-                    Ok(Arc::new(builder.finish()))
-                },
-                Some(DataType::Dictionary(_, value_type)) => {
-                    [<$stat_type_prefix:lower _ page_statistics>](Some(value_type), $iterator)
-                },
-                Some(DataType::Timestamp(unit, timezone)) => {
-                    let iter = [<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten();
-                    Ok(match unit {
-                        TimeUnit::Second => Arc::new(TimestampSecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
-                        TimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
-                        TimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
-                        TimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
-                    })
-                },
-                Some(DataType::Date32) => Ok(Arc::new(Date32Array::from_iter([<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator).flatten()))),
-                Some(DataType::Date64) => Ok(
-                    Arc::new(
-                        Date64Array::from_iter([<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
-                            .map(|x| {
-                                x.into_iter()
-                                .map(|x| {
-                                    x.and_then(|x| i64::try_from(x).ok())
-                                })
-                                .map(|x| x.map(|x| x * 24 * 60 * 60 * 1000))
-                            }).flatten()
-                        )
-                    )
-                ),
-                Some(DataType::Decimal128(precision, scale)) => Ok(Arc::new(
-                    Decimal128Array::from_iter([<$stat_type_prefix Decimal128DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
-                Some(DataType::Decimal256(precision, scale)) => Ok(Arc::new(
-                    Decimal256Array::from_iter([<$stat_type_prefix Decimal256DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
-                Some(DataType::Time32(unit)) => {
-                    Ok(match unit {
-                        TimeUnit::Second =>  Arc::new(Time32SecondArray::from_iter(
-                            [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator).flatten(),
-                        )),
-                        TimeUnit::Millisecond => Arc::new(Time32MillisecondArray::from_iter(
-                            [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator).flatten(),
-                        )),
-                        _ => {
-                            // don't know how to extract statistics, so return an empty array
-                            new_empty_array(&DataType::Time32(unit.clone()))
-                        }
-                    })
-                }
-                Some(DataType::Time64(unit)) => {
-                    Ok(match unit {
-                        TimeUnit::Microsecond =>  Arc::new(Time64MicrosecondArray::from_iter(
-                            [<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten(),
-                        )),
-                        TimeUnit::Nanosecond => Arc::new(Time64NanosecondArray::from_iter(
-                            [<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten(),
-                        )),
-                        _ => {
-                            // don't know how to extract statistics, so return an empty array
-                            new_empty_array(&DataType::Time64(unit.clone()))
-                        }
-                    })
-                },
-               Some(DataType::FixedSizeBinary(size)) => {
-                    let mut builder = FixedSizeBinaryBuilder::new(*size);
-                    let iterator = [<$stat_type_prefix FixedLenByteArrayDataPageStatsIterator>]::new($iterator);
-                    for x in iterator {
-                        for x in x.into_iter() {
-                            let Some(x) = x else {
-                                builder.append_null(); // no statistics value
-                                continue;
-                            };
-
-                            if x.len() == *size as usize {
-                                let _ = builder.append_value(x.data());
-                            } else {
-                                log::debug!(
-                                    "FixedSizeBinary({}) statistics is a binary of size {}, ignoring it.",
-                                    size,
-                                    x.len(),
-                                );
-                                builder.append_null();
-                            }
-                        }
-                    }
-                    Ok(Arc::new(builder.finish()))
-                },
-                _ => unimplemented!()
-            }
-        }
-    }
-}
-
-/// Lookups up the parquet column by name
-///
-/// Returns the parquet column index and the corresponding arrow field
-pub(crate) fn parquet_column<'a>(
-    parquet_schema: &SchemaDescriptor,
-    arrow_schema: &'a Schema,
-    name: &str,
-) -> Option<(usize, &'a FieldRef)> {
-    let (root_idx, field) = arrow_schema.fields.find(name)?;
-    if field.data_type().is_nested() {
-        // Nested fields are not supported and require non-trivial logic
-        // to correctly walk the parquet schema accounting for the
-        // logical type rules - <https://github.com/apache/parquet-format/blob/master/LogicalTypes.md>
-        //
-        // For example a ListArray could correspond to anything from 1 to 3 levels
-        // in the parquet schema
-        return None;
-    }
-
-    // This could be made more efficient (#TBD)
-    let parquet_idx = (0..parquet_schema.columns().len())
-        .find(|x| parquet_schema.get_column_root_idx(*x) == root_idx)?;
-    Some((parquet_idx, field))
-}
-
-/// Extracts the min statistics from an iterator of [`ParquetStatistics`] to an
-/// [`ArrayRef`]
-///
-/// This is an internal helper -- see [`StatisticsConverter`] for public API
-fn min_statistics<'a, I: Iterator<Item = Option<&'a ParquetStatistics>>>(
-    data_type: &DataType,
-    iterator: I,
-) -> Result<ArrayRef> {
-    get_statistics!(Min, data_type, iterator)
-}
-
-/// Extracts the max statistics from an iterator of [`ParquetStatistics`] to an [`ArrayRef`]
-///
-/// This is an internal helper -- see [`StatisticsConverter`] for public API
-fn max_statistics<'a, I: Iterator<Item = Option<&'a ParquetStatistics>>>(
-    data_type: &DataType,
-    iterator: I,
-) -> Result<ArrayRef> {
-    get_statistics!(Max, data_type, iterator)
-}
-
-/// Extracts the min statistics from an iterator
-/// of parquet page [`Index`]'es to an [`ArrayRef`]
-pub(crate) fn min_page_statistics<'a, I>(
-    data_type: Option<&DataType>,
-    iterator: I,
-) -> Result<ArrayRef>
-where
-    I: Iterator<Item = (usize, &'a Index)>,
-{
-    get_data_page_statistics!(Min, data_type, iterator)
-}
-
-/// Extracts the max statistics from an iterator
-/// of parquet page [`Index`]'es to an [`ArrayRef`]
-pub(crate) fn max_page_statistics<'a, I>(
-    data_type: Option<&DataType>,
-    iterator: I,
-) -> Result<ArrayRef>
-where
-    I: Iterator<Item = (usize, &'a Index)>,
-{
-    get_data_page_statistics!(Max, data_type, iterator)
-}
-
-/// Extracts the null count statistics from an iterator
-/// of parquet page [`Index`]'es to an [`ArrayRef`]
-///
-/// The returned Array is an [`UInt64Array`]
-pub(crate) fn null_counts_page_statistics<'a, I>(iterator: I) -> Result<UInt64Array>
-where
-    I: Iterator<Item = (usize, &'a Index)>,
-{
-    let iter = iterator.flat_map(|(len, index)| match index {
-        Index::NONE => vec![None; len],
-        Index::BOOLEAN(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::INT32(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::INT64(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::FLOAT(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::DOUBLE(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::FIXED_LEN_BYTE_ARRAY(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::BYTE_ARRAY(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        _ => unimplemented!(),
-    });
-
-    Ok(UInt64Array::from_iter(iter))
-}
-
-/// Extracts Parquet statistics as Arrow arrays
-///
-/// This is used to convert Parquet statistics to Arrow arrays, with proper type
-/// conversions. This information can be used for pruning parquet files or row
-/// groups based on the statistics embedded in parquet files
-///
-/// # Schemas
-///
-/// The schema of the parquet file and the arrow schema are used to convert the
-/// underlying statistics value (stored as a parquet value) into the
-/// corresponding Arrow  value. For example, Decimals are stored as binary in
-/// parquet files.
-///
-/// The parquet_schema and arrow_schema do not have to be identical (for
-/// example, the columns may be in different orders and one or the other schemas
-/// may have additional columns). The function [`parquet_column`] is used to
-/// match the column in the parquet file to the column in the arrow schema.
-#[derive(Debug)]
-pub struct StatisticsConverter<'a> {
-    /// the index of the matched column in the parquet schema
-    parquet_index: Option<usize>,
-    /// The field (with data type) of the column in the arrow schema
-    arrow_field: &'a Field,
-}
-
-impl<'a> StatisticsConverter<'a> {
-    /// Return the index of the column in the parquet file, if any
-    pub fn parquet_index(&self) -> Option<usize> {
-        self.parquet_index
-    }
-
-    /// Return the arrow field of the column in the arrow schema
-    pub fn arrow_field(&self) -> &'a Field {
-        self.arrow_field
-    }
-
-    /// Returns a [`UInt64Array`] with row counts for each row group
-    ///
-    /// # Return Value
-    ///
-    /// The returned array has no nulls, and has one value for each row group.
-    /// Each value is the number of rows in the row group.
-    ///
-    /// # Example
-    /// ```no_run
-    /// # use arrow::datatypes::Schema;
-    /// # use arrow_array::ArrayRef;
-    /// # use parquet::file::metadata::ParquetMetaData;
-    /// # use datafusion::datasource::physical_plan::parquet::StatisticsConverter;
-    /// # fn get_parquet_metadata() -> ParquetMetaData { unimplemented!() }
-    /// # fn get_arrow_schema() -> Schema { unimplemented!() }
-    /// // Given the metadata for a parquet file and the arrow schema
-    /// let metadata: ParquetMetaData = get_parquet_metadata();
-    /// let arrow_schema: Schema = get_arrow_schema();
-    /// let parquet_schema = metadata.file_metadata().schema_descr();
-    /// // create a converter
-    /// let converter = StatisticsConverter::try_new("foo", &arrow_schema, parquet_schema)
-    ///   .unwrap();
-    /// // get the row counts for each row group
-    /// let row_counts = converter.row_group_row_counts(metadata
-    ///   .row_groups()
-    ///   .iter()
-    /// );
-    /// ```
-    pub fn row_group_row_counts<I>(&self, metadatas: I) -> Result<Option<UInt64Array>>
-    where
-        I: IntoIterator<Item = &'a RowGroupMetaData>,
-    {
-        let Some(_) = self.parquet_index else {
-            return Ok(None);
-        };
-
-        let mut builder = UInt64Array::builder(10);
-        for metadata in metadatas.into_iter() {
-            let row_count = metadata.num_rows();
-            let row_count: u64 = row_count.try_into().map_err(|e| {
-                internal_datafusion_err!(
-                    "Parquet row count {row_count} too large to convert to u64: {e}"
-                )
-            })?;
-            builder.append_value(row_count);
-        }
-        Ok(Some(builder.finish()))
-    }
-
-    /// Create a new `StatisticsConverter` to extract statistics for a column
-    ///
-    /// Note if there is no corresponding column in the parquet file, the returned
-    /// arrays will be null. This can happen if the column is in the arrow
-    /// schema but not in the parquet schema due to schema evolution.
-    ///
-    /// See example on [`Self::row_group_mins`] for usage
-    ///
-    /// # Errors
-    ///
-    /// * If the column is not found in the arrow schema
-    pub fn try_new<'b>(
-        column_name: &'b str,
-        arrow_schema: &'a Schema,
-        parquet_schema: &'a SchemaDescriptor,
-    ) -> Result<Self> {
-        // ensure the requested column is in the arrow schema
-        let Some((_idx, arrow_field)) = arrow_schema.column_with_name(column_name) else {
-            return plan_err!(
-                "Column '{}' not found in schema for statistics conversion",
-                column_name
-            );
-        };
-
-        // find the column in the parquet schema, if not, return a null array
-        let parquet_index = match parquet_column(
-            parquet_schema,
-            arrow_schema,
-            column_name,
-        ) {
-            Some((parquet_idx, matched_field)) => {
-                // sanity check that matching field matches the arrow field
-                if matched_field.as_ref() != arrow_field {
-                    return internal_err!(
-                        "Matched column '{:?}' does not match original matched column '{:?}'",
-                        matched_field,
-                        arrow_field
-                    );
-                }
-                Some(parquet_idx)
-            }
-            None => None,
-        };
-
-        Ok(Self {
-            parquet_index,
-            arrow_field,
-        })
-    }
-
-    /// Extract the minimum values from row group statistics in [`RowGroupMetaData`]
-    ///
-    /// # Return Value
-    ///
-    /// The returned array contains 1 value for each row group, in the same order as `metadatas`
-    ///
-    /// Each value is either
-    /// * the minimum value for the column
-    /// * a null value, if the statistics can not be extracted
-    ///
-    /// Note that a null value does NOT mean the min value was actually
-    /// `null` it means it the requested statistic is unknown
-    ///
-    /// # Errors
-    ///
-    /// Reasons for not being able to extract the statistics include:
-    /// * the column is not present in the parquet file
-    /// * statistics for the column are not present in the row group
-    /// * the stored statistic value can not be converted to the requested type
-    ///
-    /// # Example
-    /// ```no_run
-    /// # use arrow::datatypes::Schema;
-    /// # use arrow_array::ArrayRef;
-    /// # use parquet::file::metadata::ParquetMetaData;
-    /// # use datafusion::datasource::physical_plan::parquet::StatisticsConverter;
-    /// # fn get_parquet_metadata() -> ParquetMetaData { unimplemented!() }
-    /// # fn get_arrow_schema() -> Schema { unimplemented!() }
-    /// // Given the metadata for a parquet file and the arrow schema
-    /// let metadata: ParquetMetaData = get_parquet_metadata();
-    /// let arrow_schema: Schema = get_arrow_schema();
-    /// let parquet_schema = metadata.file_metadata().schema_descr();
-    /// // create a converter
-    /// let converter = StatisticsConverter::try_new("foo", &arrow_schema, parquet_schema)
-    ///   .unwrap();
-    /// // get the minimum value for the column "foo" in the parquet file
-    /// let min_values: ArrayRef = converter
-    ///   .row_group_mins(metadata.row_groups().iter())
-    ///  .unwrap();
-    /// ```
-    pub fn row_group_mins<I>(&self, metadatas: I) -> Result<ArrayRef>
-    where
-        I: IntoIterator<Item = &'a RowGroupMetaData>,
-    {
-        let data_type = self.arrow_field.data_type();
-
-        let Some(parquet_index) = self.parquet_index else {
-            return Ok(self.make_null_array(data_type, metadatas));
-        };
-
-        let iter = metadatas
-            .into_iter()
-            .map(|x| x.column(parquet_index).statistics());
-        min_statistics(data_type, iter)
-    }
-
-    /// Extract the maximum values from row group statistics in [`RowGroupMetaData`]
-    ///
-    /// See docs on [`Self::row_group_mins`] for details
-    pub fn row_group_maxes<I>(&self, metadatas: I) -> Result<ArrayRef>
-    where
-        I: IntoIterator<Item = &'a RowGroupMetaData>,
-    {
-        let data_type = self.arrow_field.data_type();
-
-        let Some(parquet_index) = self.parquet_index else {
-            return Ok(self.make_null_array(data_type, metadatas));
-        };
-
-        let iter = metadatas
-            .into_iter()
-            .map(|x| x.column(parquet_index).statistics());
-        max_statistics(data_type, iter)
-    }
-
-    /// Extract the null counts from row group statistics in [`RowGroupMetaData`]
-    ///
-    /// See docs on [`Self::row_group_mins`] for details
-    pub fn row_group_null_counts<I>(&self, metadatas: I) -> Result<UInt64Array>
-    where
-        I: IntoIterator<Item = &'a RowGroupMetaData>,
-    {
-        let Some(parquet_index) = self.parquet_index else {
-            let num_row_groups = metadatas.into_iter().count();
-            return Ok(UInt64Array::from_iter(
-                std::iter::repeat(None).take(num_row_groups),
-            ));
-        };
-
-        let null_counts = metadatas
-            .into_iter()
-            .map(|x| x.column(parquet_index).statistics())
-            .map(|s| s.map(|s| s.null_count()));
-        Ok(UInt64Array::from_iter(null_counts))
-    }
-
-    /// Extract the minimum values from Data Page statistics.
-    ///
-    /// In Parquet files, in addition to the Column Chunk level statistics
-    /// (stored for each column for each row group) there are also
-    /// optional statistics stored for each data page, as part of
-    /// the [`ParquetColumnIndex`].
-    ///
-    /// Since a single Column Chunk is stored as one or more pages,
-    /// page level statistics can prune at a finer granularity.
-    ///
-    /// However since they are stored in a separate metadata
-    /// structure ([`Index`]) there is different code to extract them as
-    /// compared to arrow statistics.
-    ///
-    /// # Parameters:
-    ///
-    /// * `column_page_index`: The parquet column page indices, read from
-    ///   `ParquetMetaData` column_index
-    ///
-    /// * `column_offset_index`: The parquet column offset indices, read from
-    ///   `ParquetMetaData` offset_index
-    ///
-    /// * `row_group_indices`: The indices of the row groups, that are used to
-    ///   extract the column page index and offset index on a per row group
-    ///   per column basis.
-    ///
-    /// # Return Value
-    ///
-    /// The returned array contains 1 value for each `NativeIndex`
-    /// in the underlying `Index`es, in the same order as they appear
-    /// in `metadatas`.
-    ///
-    /// For example, if there are two `Index`es in `metadatas`:
-    /// 1. the first having `3` `PageIndex` entries
-    /// 2. the second having `2` `PageIndex` entries
-    ///
-    /// The returned array would have 5 rows.
-    ///
-    /// Each value is either:
-    /// * the minimum value for the page
-    /// * a null value, if the statistics can not be extracted
-    ///
-    /// Note that a null value does NOT mean the min value was actually
-    /// `null` it means it the requested statistic is unknown
-    ///
-    /// # Errors
-    ///
-    /// Reasons for not being able to extract the statistics include:
-    /// * the column is not present in the parquet file
-    /// * statistics for the pages are not present in the row group
-    /// * the stored statistic value can not be converted to the requested type
-    pub fn data_page_mins<I>(
-        &self,
-        column_page_index: &ParquetColumnIndex,
-        column_offset_index: &ParquetOffsetIndex,
-        row_group_indices: I,
-    ) -> Result<ArrayRef>
-    where
-        I: IntoIterator<Item = &'a usize>,
-    {
-        let data_type = self.arrow_field.data_type();
-
-        let Some(parquet_index) = self.parquet_index else {
-            return Ok(self.make_null_array(data_type, row_group_indices));
-        };
-
-        let iter = row_group_indices.into_iter().map(|rg_index| {
-            let column_page_index_per_row_group_per_column =
-                &column_page_index[*rg_index][parquet_index];
-            let num_data_pages = &column_offset_index[*rg_index][parquet_index].len();
-
-            (*num_data_pages, column_page_index_per_row_group_per_column)
-        });
-
-        min_page_statistics(Some(data_type), iter)
-    }
-
-    /// Extract the maximum values from Data Page statistics.
-    ///
-    /// See docs on [`Self::data_page_mins`] for details.
-    pub fn data_page_maxes<I>(
-        &self,
-        column_page_index: &ParquetColumnIndex,
-        column_offset_index: &ParquetOffsetIndex,
-        row_group_indices: I,
-    ) -> Result<ArrayRef>
-    where
-        I: IntoIterator<Item = &'a usize>,
-    {
-        let data_type = self.arrow_field.data_type();
-
-        let Some(parquet_index) = self.parquet_index else {
-            return Ok(self.make_null_array(data_type, row_group_indices));
-        };
-
-        let iter = row_group_indices.into_iter().map(|rg_index| {
-            let column_page_index_per_row_group_per_column =
-                &column_page_index[*rg_index][parquet_index];
-            let num_data_pages = &column_offset_index[*rg_index][parquet_index].len();
-
-            (*num_data_pages, column_page_index_per_row_group_per_column)
-        });
-
-        max_page_statistics(Some(data_type), iter)
-    }
-
-    /// Extract the null counts from Data Page statistics.
-    ///
-    /// The returned Array is an [`UInt64Array`]
-    ///
-    /// See docs on [`Self::data_page_mins`] for details.
-    pub fn data_page_null_counts<I>(
-        &self,
-        column_page_index: &ParquetColumnIndex,
-        column_offset_index: &ParquetOffsetIndex,
-        row_group_indices: I,
-    ) -> Result<UInt64Array>
-    where
-        I: IntoIterator<Item = &'a usize>,
-    {
-        let Some(parquet_index) = self.parquet_index else {
-            let num_row_groups = row_group_indices.into_iter().count();
-            return Ok(UInt64Array::from_iter(
-                std::iter::repeat(None).take(num_row_groups),
-            ));
-        };
-
-        let iter = row_group_indices.into_iter().map(|rg_index| {
-            let column_page_index_per_row_group_per_column =
-                &column_page_index[*rg_index][parquet_index];
-            let num_data_pages = &column_offset_index[*rg_index][parquet_index].len();
-
-            (*num_data_pages, column_page_index_per_row_group_per_column)
-        });
-        null_counts_page_statistics(iter)
-    }
-
-    /// Returns an [`ArrayRef`] with row counts for each row group.
-    ///
-    /// This function iterates over the given row group indexes and computes
-    /// the row count for each page in the specified column.
-    ///
-    /// # Parameters:
-    ///
-    /// * `column_offset_index`: The parquet column offset indices, read from
-    ///   `ParquetMetaData` offset_index
-    ///
-    /// * `row_group_metadatas`: The metadata slice of the row groups, read
-    ///   from `ParquetMetaData` row_groups
-    ///
-    /// * `row_group_indices`: The indices of the row groups, that are used to
-    ///   extract the column offset index on a per row group per column basis.
-    ///
-    /// See docs on [`Self::data_page_mins`] for details.
-    pub fn data_page_row_counts<I>(
-        &self,
-        column_offset_index: &ParquetOffsetIndex,
-        row_group_metadatas: &'a [RowGroupMetaData],
-        row_group_indices: I,
-    ) -> Result<Option<UInt64Array>>
-    where
-        I: IntoIterator<Item = &'a usize>,
-    {
-        let Some(parquet_index) = self.parquet_index else {
-            // no matching column found in parquet_index;
-            // thus we cannot extract page_locations in order to determine
-            // the row count on a per DataPage basis.
-            return Ok(None);
-        };
-
-        let mut row_count_total = Vec::new();
-        for rg_idx in row_group_indices {
-            let page_locations = &column_offset_index[*rg_idx][parquet_index];
-
-            let row_count_per_page = page_locations.windows(2).map(|loc| {
-                Some(loc[1].first_row_index as u64 - loc[0].first_row_index as u64)
-            });
-
-            // append the last page row count
-            let num_rows_in_row_group = &row_group_metadatas[*rg_idx].num_rows();
-            let row_count_per_page = row_count_per_page
-                .chain(std::iter::once(Some(
-                    *num_rows_in_row_group as u64
-                        - page_locations.last().unwrap().first_row_index as u64,
-                )))
-                .collect::<Vec<_>>();
-
-            row_count_total.extend(row_count_per_page);
-        }
-
-        Ok(Some(UInt64Array::from_iter(row_count_total)))
-    }
-
-    /// Returns a null array of data_type with one element per row group
-    fn make_null_array<I, A>(&self, data_type: &DataType, metadatas: I) -> ArrayRef
-    where
-        I: IntoIterator<Item = A>,
-    {
-        // column was in the arrow schema but not in the parquet schema, so return a null array
-        let num_row_groups = metadatas.into_iter().count();
-        new_null_array(data_type, num_row_groups)
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use arrow::compute::kernels::cast_utils::Parser;
-    use arrow::datatypes::{i256, Date32Type, Date64Type};
-    use arrow_array::{
-        new_empty_array, new_null_array, Array, BinaryArray, BooleanArray, Date32Array,
-        Date64Array, Decimal128Array, Decimal256Array, Float32Array, Float64Array,
-        Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, RecordBatch,
-        StringArray, StructArray, TimestampNanosecondArray,
-    };
-    use arrow_schema::{Field, SchemaRef};
-    use bytes::Bytes;
-    use datafusion_common::test_util::parquet_test_data;
-    use parquet::arrow::arrow_reader::ArrowReaderBuilder;
-    use parquet::arrow::arrow_writer::ArrowWriter;
-    use parquet::file::metadata::{ParquetMetaData, RowGroupMetaData};
-    use parquet::file::properties::{EnabledStatistics, WriterProperties};
-    use std::path::PathBuf;
-    use std::sync::Arc;
-
-    // TODO error cases (with parquet statistics that are mismatched in expected type)
-
-    #[test]
-    fn roundtrip_empty() {
-        let empty_bool_array = new_empty_array(&DataType::Boolean);
-        Test {
-            input: empty_bool_array.clone(),
-            expected_min: empty_bool_array.clone(),
-            expected_max: empty_bool_array.clone(),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_bool() {
-        Test {
-            input: bool_array([
-                // row group 1
-                Some(true),
-                None,
-                Some(true),
-                // row group 2
-                Some(true),
-                Some(false),
-                None,
-                // row group 3
-                None,
-                None,
-                None,
-            ]),
-            expected_min: bool_array([Some(true), Some(false), None]),
-            expected_max: bool_array([Some(true), Some(true), None]),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_int32() {
-        Test {
-            input: i32_array([
-                // row group 1
-                Some(1),
-                None,
-                Some(3),
-                // row group 2
-                Some(0),
-                Some(5),
-                None,
-                // row group 3
-                None,
-                None,
-                None,
-            ]),
-            expected_min: i32_array([Some(1), Some(0), None]),
-            expected_max: i32_array([Some(3), Some(5), None]),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_int64() {
-        Test {
-            input: i64_array([
-                // row group 1
-                Some(1),
-                None,
-                Some(3),
-                // row group 2
-                Some(0),
-                Some(5),
-                None,
-                // row group 3
-                None,
-                None,
-                None,
-            ]),
-            expected_min: i64_array([Some(1), Some(0), None]),
-            expected_max: i64_array(vec![Some(3), Some(5), None]),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_f32() {
-        Test {
-            input: f32_array([
-                // row group 1
-                Some(1.0),
-                None,
-                Some(3.0),
-                // row group 2
-                Some(-1.0),
-                Some(5.0),
-                None,
-                // row group 3
-                None,
-                None,
-                None,
-            ]),
-            expected_min: f32_array([Some(1.0), Some(-1.0), None]),
-            expected_max: f32_array([Some(3.0), Some(5.0), None]),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_f64() {
-        Test {
-            input: f64_array([
-                // row group 1
-                Some(1.0),
-                None,
-                Some(3.0),
-                // row group 2
-                Some(-1.0),
-                Some(5.0),
-                None,
-                // row group 3
-                None,
-                None,
-                None,
-            ]),
-            expected_min: f64_array([Some(1.0), Some(-1.0), None]),
-            expected_max: f64_array([Some(3.0), Some(5.0), None]),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_timestamp() {
-        Test {
-            input: timestamp_seconds_array(
-                [
-                    // row group 1
-                    Some(1),
-                    None,
-                    Some(3),
-                    // row group 2
-                    Some(9),
-                    Some(5),
-                    None,
-                    // row group 3
-                    None,
-                    None,
-                    None,
-                ],
-                None,
-            ),
-            expected_min: timestamp_seconds_array([Some(1), Some(5), None], None),
-            expected_max: timestamp_seconds_array([Some(3), Some(9), None], None),
-        }
-        .run();
-
-        Test {
-            input: timestamp_milliseconds_array(
-                [
-                    // row group 1
-                    Some(1),
-                    None,
-                    Some(3),
-                    // row group 2
-                    Some(9),
-                    Some(5),
-                    None,
-                    // row group 3
-                    None,
-                    None,
-                    None,
-                ],
-                None,
-            ),
-            expected_min: timestamp_milliseconds_array([Some(1), Some(5), None], None),
-            expected_max: timestamp_milliseconds_array([Some(3), Some(9), None], None),
-        }
-        .run();
-
-        Test {
-            input: timestamp_microseconds_array(
-                [
-                    // row group 1
-                    Some(1),
-                    None,
-                    Some(3),
-                    // row group 2
-                    Some(9),
-                    Some(5),
-                    None,
-                    // row group 3
-                    None,
-                    None,
-                    None,
-                ],
-                None,
-            ),
-            expected_min: timestamp_microseconds_array([Some(1), Some(5), None], None),
-            expected_max: timestamp_microseconds_array([Some(3), Some(9), None], None),
-        }
-        .run();
-
-        Test {
-            input: timestamp_nanoseconds_array(
-                [
-                    // row group 1
-                    Some(1),
-                    None,
-                    Some(3),
-                    // row group 2
-                    Some(9),
-                    Some(5),
-                    None,
-                    // row group 3
-                    None,
-                    None,
-                    None,
-                ],
-                None,
-            ),
-            expected_min: timestamp_nanoseconds_array([Some(1), Some(5), None], None),
-            expected_max: timestamp_nanoseconds_array([Some(3), Some(9), None], None),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_timestamp_timezoned() {
-        Test {
-            input: timestamp_seconds_array(
-                [
-                    // row group 1
-                    Some(1),
-                    None,
-                    Some(3),
-                    // row group 2
-                    Some(9),
-                    Some(5),
-                    None,
-                    // row group 3
-                    None,
-                    None,
-                    None,
-                ],
-                Some("UTC"),
-            ),
-            expected_min: timestamp_seconds_array([Some(1), Some(5), None], Some("UTC")),
-            expected_max: timestamp_seconds_array([Some(3), Some(9), None], Some("UTC")),
-        }
-        .run();
-
-        Test {
-            input: timestamp_milliseconds_array(
-                [
-                    // row group 1
-                    Some(1),
-                    None,
-                    Some(3),
-                    // row group 2
-                    Some(9),
-                    Some(5),
-                    None,
-                    // row group 3
-                    None,
-                    None,
-                    None,
-                ],
-                Some("UTC"),
-            ),
-            expected_min: timestamp_milliseconds_array(
-                [Some(1), Some(5), None],
-                Some("UTC"),
-            ),
-            expected_max: timestamp_milliseconds_array(
-                [Some(3), Some(9), None],
-                Some("UTC"),
-            ),
-        }
-        .run();
-
-        Test {
-            input: timestamp_microseconds_array(
-                [
-                    // row group 1
-                    Some(1),
-                    None,
-                    Some(3),
-                    // row group 2
-                    Some(9),
-                    Some(5),
-                    None,
-                    // row group 3
-                    None,
-                    None,
-                    None,
-                ],
-                Some("UTC"),
-            ),
-            expected_min: timestamp_microseconds_array(
-                [Some(1), Some(5), None],
-                Some("UTC"),
-            ),
-            expected_max: timestamp_microseconds_array(
-                [Some(3), Some(9), None],
-                Some("UTC"),
-            ),
-        }
-        .run();
-
-        Test {
-            input: timestamp_nanoseconds_array(
-                [
-                    // row group 1
-                    Some(1),
-                    None,
-                    Some(3),
-                    // row group 2
-                    Some(9),
-                    Some(5),
-                    None,
-                    // row group 3
-                    None,
-                    None,
-                    None,
-                ],
-                Some("UTC"),
-            ),
-            expected_min: timestamp_nanoseconds_array(
-                [Some(1), Some(5), None],
-                Some("UTC"),
-            ),
-            expected_max: timestamp_nanoseconds_array(
-                [Some(3), Some(9), None],
-                Some("UTC"),
-            ),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_decimal() {
-        Test {
-            input: Arc::new(
-                Decimal128Array::from(vec![
-                    // row group 1
-                    Some(100),
-                    None,
-                    Some(22000),
-                    // row group 2
-                    Some(500000),
-                    Some(330000),
-                    None,
-                    // row group 3
-                    None,
-                    None,
-                    None,
-                ])
-                .with_precision_and_scale(9, 2)
-                .unwrap(),
-            ),
-            expected_min: Arc::new(
-                Decimal128Array::from(vec![Some(100), Some(330000), None])
-                    .with_precision_and_scale(9, 2)
-                    .unwrap(),
-            ),
-            expected_max: Arc::new(
-                Decimal128Array::from(vec![Some(22000), Some(500000), None])
-                    .with_precision_and_scale(9, 2)
-                    .unwrap(),
-            ),
-        }
-        .run();
-
-        Test {
-            input: Arc::new(
-                Decimal256Array::from(vec![
-                    // row group 1
-                    Some(i256::from(100)),
-                    None,
-                    Some(i256::from(22000)),
-                    // row group 2
-                    Some(i256::MAX),
-                    Some(i256::MIN),
-                    None,
-                    // row group 3
-                    None,
-                    None,
-                    None,
-                ])
-                .with_precision_and_scale(76, 76)
-                .unwrap(),
-            ),
-            expected_min: Arc::new(
-                Decimal256Array::from(vec![Some(i256::from(100)), Some(i256::MIN), None])
-                    .with_precision_and_scale(76, 76)
-                    .unwrap(),
-            ),
-            expected_max: Arc::new(
-                Decimal256Array::from(vec![
-                    Some(i256::from(22000)),
-                    Some(i256::MAX),
-                    None,
-                ])
-                .with_precision_and_scale(76, 76)
-                .unwrap(),
-            ),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_utf8() {
-        Test {
-            input: utf8_array([
-                // row group 1
-                Some("A"),
-                None,
-                Some("Q"),
-                // row group 2
-                Some("ZZ"),
-                Some("AA"),
-                None,
-                // row group 3
-                None,
-                None,
-                None,
-            ]),
-            expected_min: utf8_array([Some("A"), Some("AA"), None]),
-            expected_max: utf8_array([Some("Q"), Some("ZZ"), None]),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_struct() {
-        let mut test = Test {
-            input: struct_array(vec![
-                // row group 1
-                (Some(true), Some(1)),
-                (None, None),
-                (Some(true), Some(3)),
-                // row group 2
-                (Some(true), Some(0)),
-                (Some(false), Some(5)),
-                (None, None),
-                // row group 3
-                (None, None),
-                (None, None),
-                (None, None),
-            ]),
-            expected_min: struct_array(vec![
-                (Some(true), Some(1)),
-                (Some(true), Some(0)),
-                (None, None),
-            ]),
-
-            expected_max: struct_array(vec![
-                (Some(true), Some(3)),
-                (Some(true), Some(0)),
-                (None, None),
-            ]),
-        };
-        // Due to https://github.com/apache/datafusion/issues/8334,
-        // statistics for struct arrays are not supported
-        test.expected_min =
-            new_null_array(test.input.data_type(), test.expected_min.len());
-        test.expected_max =
-            new_null_array(test.input.data_type(), test.expected_min.len());
-        test.run()
-    }
-
-    #[test]
-    fn roundtrip_binary() {
-        Test {
-            input: Arc::new(BinaryArray::from_opt_vec(vec![
-                // row group 1
-                Some(b"A"),
-                None,
-                Some(b"Q"),
-                // row group 2
-                Some(b"ZZ"),
-                Some(b"AA"),
-                None,
-                // row group 3
-                None,
-                None,
-                None,
-            ])),
-            expected_min: Arc::new(BinaryArray::from_opt_vec(vec![
-                Some(b"A"),
-                Some(b"AA"),
-                None,
-            ])),
-            expected_max: Arc::new(BinaryArray::from_opt_vec(vec![
-                Some(b"Q"),
-                Some(b"ZZ"),
-                None,
-            ])),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_date32() {
-        Test {
-            input: date32_array(vec![
-                // row group 1
-                Some("2021-01-01"),
-                None,
-                Some("2021-01-03"),
-                // row group 2
-                Some("2021-01-01"),
-                Some("2021-01-05"),
-                None,
-                // row group 3
-                None,
-                None,
-                None,
-            ]),
-            expected_min: date32_array(vec![
-                Some("2021-01-01"),
-                Some("2021-01-01"),
-                None,
-            ]),
-            expected_max: date32_array(vec![
-                Some("2021-01-03"),
-                Some("2021-01-05"),
-                None,
-            ]),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_date64() {
-        Test {
-            input: date64_array(vec![
-                // row group 1
-                Some("2021-01-01"),
-                None,
-                Some("2021-01-03"),
-                // row group 2
-                Some("2021-01-01"),
-                Some("2021-01-05"),
-                None,
-                // row group 3
-                None,
-                None,
-                None,
-            ]),
-            expected_min: date64_array(vec![
-                Some("2021-01-01"),
-                Some("2021-01-01"),
-                None,
-            ]),
-            expected_max: date64_array(vec![
-                Some("2021-01-03"),
-                Some("2021-01-05"),
-                None,
-            ]),
-        }
-        .run()
-    }
-
-    #[test]
-    fn roundtrip_large_binary_array() {
-        let input: Vec<Option<&[u8]>> = vec![
-            // row group 1
-            Some(b"A"),
-            None,
-            Some(b"Q"),
-            // row group 2
-            Some(b"ZZ"),
-            Some(b"AA"),
-            None,
-            // row group 3
-            None,
-            None,
-            None,
-        ];
-
-        let expected_min: Vec<Option<&[u8]>> = vec![Some(b"A"), Some(b"AA"), None];
-        let expected_max: Vec<Option<&[u8]>> = vec![Some(b"Q"), Some(b"ZZ"), None];
-
-        Test {
-            input: large_binary_array(input),
-            expected_min: large_binary_array(expected_min),
-            expected_max: large_binary_array(expected_max),
-        }
-        .run();
-    }
-
-    #[test]
-    fn struct_and_non_struct() {
-        // Ensures that statistics for an array that appears *after* a struct
-        // array are not wrong
-        let struct_col = struct_array(vec![
-            // row group 1
-            (Some(true), Some(1)),
-            (None, None),
-            (Some(true), Some(3)),
-        ]);
-        let int_col = i32_array([Some(100), Some(200), Some(300)]);
-        let expected_min = i32_array([Some(100)]);
-        let expected_max = i32_array(vec![Some(300)]);
-
-        // use a name that shadows a name in the struct column
-        match struct_col.data_type() {
-            DataType::Struct(fields) => {
-                assert_eq!(fields.get(1).unwrap().name(), "int_col")
-            }
-            _ => panic!("unexpected data type for struct column"),
-        };
-
-        let input_batch = RecordBatch::try_from_iter([
-            ("struct_col", struct_col),
-            ("int_col", int_col),
-        ])
-        .unwrap();
-
-        let schema = input_batch.schema();
-
-        let metadata = parquet_metadata(schema.clone(), input_batch);
-        let parquet_schema = metadata.file_metadata().schema_descr();
-
-        // read the int_col statistics
-        let (idx, _) = parquet_column(parquet_schema, &schema, "int_col").unwrap();
-        assert_eq!(idx, 2);
-
-        let row_groups = metadata.row_groups();
-        let converter =
-            StatisticsConverter::try_new("int_col", &schema, parquet_schema).unwrap();
-
-        let min = converter.row_group_mins(row_groups.iter()).unwrap();
-        assert_eq!(
-            &min,
-            &expected_min,
-            "Min. Statistics\n\n{}\n\n",
-            DisplayStats(row_groups)
-        );
-
-        let max = converter.row_group_maxes(row_groups.iter()).unwrap();
-        assert_eq!(
-            &max,
-            &expected_max,
-            "Max. Statistics\n\n{}\n\n",
-            DisplayStats(row_groups)
-        );
-    }
-
-    #[test]
-    fn nan_in_stats() {
-        // /parquet-testing/data/nan_in_stats.parquet
-        // row_groups: 1
-        // "x": Double({min: Some(1.0), max: Some(NaN), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-
-        TestFile::new("nan_in_stats.parquet")
-            .with_column(ExpectedColumn {
-                name: "x",
-                expected_min: Arc::new(Float64Array::from(vec![Some(1.0)])),
-                expected_max: Arc::new(Float64Array::from(vec![Some(f64::NAN)])),
-            })
-            .run();
-    }
-
-    #[test]
-    fn alltypes_plain() {
-        // /parquet-testing/data/datapage_v1-snappy-compressed-checksum.parquet
-        // row_groups: 1
-        // (has no statistics)
-        TestFile::new("alltypes_plain.parquet")
-            // No column statistics should be read as NULL, but with the right type
-            .with_column(ExpectedColumn {
-                name: "id",
-                expected_min: i32_array([None]),
-                expected_max: i32_array([None]),
-            })
-            .with_column(ExpectedColumn {
-                name: "bool_col",
-                expected_min: bool_array([None]),
-                expected_max: bool_array([None]),
-            })
-            .run();
-    }
-
-    #[test]
-    fn alltypes_tiny_pages() {
-        // /parquet-testing/data/alltypes_tiny_pages.parquet
-        // row_groups: 1
-        // "id": Int32({min: Some(0), max: Some(7299), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "bool_col": Boolean({min: Some(false), max: Some(true), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "tinyint_col": Int32({min: Some(0), max: Some(9), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "smallint_col": Int32({min: Some(0), max: Some(9), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "int_col": Int32({min: Some(0), max: Some(9), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "bigint_col": Int64({min: Some(0), max: Some(90), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "float_col": Float({min: Some(0.0), max: Some(9.9), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "double_col": Double({min: Some(0.0), max: Some(90.89999999999999), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "date_string_col": ByteArray({min: Some(ByteArray { data: "01/01/09" }), max: Some(ByteArray { data: "12/31/10" }), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "string_col": ByteArray({min: Some(ByteArray { data: "0" }), max: Some(ByteArray { data: "9" }), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "timestamp_col": Int96({min: None, max: None, distinct_count: None, null_count: 0, min_max_deprecated: true, min_max_backwards_compatible: true})
-        // "year": Int32({min: Some(2009), max: Some(2010), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        // "month": Int32({min: Some(1), max: Some(12), distinct_count: None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: false})
-        TestFile::new("alltypes_tiny_pages.parquet")
-            .with_column(ExpectedColumn {
-                name: "id",
-                expected_min: i32_array([Some(0)]),
-                expected_max: i32_array([Some(7299)]),
-            })
-            .with_column(ExpectedColumn {
-                name: "bool_col",
-                expected_min: bool_array([Some(false)]),
-                expected_max: bool_array([Some(true)]),
-            })
-            .with_column(ExpectedColumn {
-                name: "tinyint_col",
-                expected_min: i8_array([Some(0)]),
-                expected_max: i8_array([Some(9)]),
-            })
-            .with_column(ExpectedColumn {
-                name: "smallint_col",
-                expected_min: i16_array([Some(0)]),
-                expected_max: i16_array([Some(9)]),
-            })
-            .with_column(ExpectedColumn {
-                name: "int_col",
-                expected_min: i32_array([Some(0)]),
-                expected_max: i32_array([Some(9)]),
-            })
-            .with_column(ExpectedColumn {
-                name: "bigint_col",
-                expected_min: i64_array([Some(0)]),
-                expected_max: i64_array([Some(90)]),
-            })
-            .with_column(ExpectedColumn {
-                name: "float_col",
-                expected_min: f32_array([Some(0.0)]),
-                expected_max: f32_array([Some(9.9)]),
-            })
-            .with_column(ExpectedColumn {
-                name: "double_col",
-                expected_min: f64_array([Some(0.0)]),
-                expected_max: f64_array([Some(90.89999999999999)]),
-            })
-            .with_column(ExpectedColumn {
-                name: "date_string_col",
-                expected_min: utf8_array([Some("01/01/09")]),
-                expected_max: utf8_array([Some("12/31/10")]),
-            })
-            .with_column(ExpectedColumn {
-                name: "string_col",
-                expected_min: utf8_array([Some("0")]),
-                expected_max: utf8_array([Some("9")]),
-            })
-            // File has no min/max for timestamp_col
-            .with_column(ExpectedColumn {
-                name: "timestamp_col",
-                expected_min: timestamp_nanoseconds_array([None], None),
-                expected_max: timestamp_nanoseconds_array([None], None),
-            })
-            .with_column(ExpectedColumn {
-                name: "year",
-                expected_min: i32_array([Some(2009)]),
-                expected_max: i32_array([Some(2010)]),
-            })
-            .with_column(ExpectedColumn {
-                name: "month",
-                expected_min: i32_array([Some(1)]),
-                expected_max: i32_array([Some(12)]),
-            })
-            .run();
-    }
-
-    #[test]
-    fn fixed_length_decimal_legacy() {
-        // /parquet-testing/data/fixed_length_decimal_legacy.parquet
-        // row_groups: 1
-        // "value": FixedLenByteArray({min: Some(FixedLenByteArray(ByteArray { data: Some(ByteBufferPtr { data: b"\0\0\0\0\0\xc8" }) })), max: Some(FixedLenByteArray(ByteArray { data: "\0\0\0\0\t`" })), distinct_count: None, null_count: 0, min_max_deprecated: true, min_max_backwards_compatible: true})
-
-        TestFile::new("fixed_length_decimal_legacy.parquet")
-            .with_column(ExpectedColumn {
-                name: "value",
-                expected_min: Arc::new(
-                    Decimal128Array::from(vec![Some(200)])
-                        .with_precision_and_scale(13, 2)
-                        .unwrap(),
-                ),
-                expected_max: Arc::new(
-                    Decimal128Array::from(vec![Some(2400)])
-                        .with_precision_and_scale(13, 2)
-                        .unwrap(),
-                ),
-            })
-            .run();
-    }
-
-    const ROWS_PER_ROW_GROUP: usize = 3;
-
-    /// Writes the input batch into a parquet file, with every every three rows as
-    /// their own row group, and compares the min/maxes to the expected values
-    struct Test {
-        input: ArrayRef,
-        expected_min: ArrayRef,
-        expected_max: ArrayRef,
-    }
-
-    impl Test {
-        fn run(self) {
-            let Self {
-                input,
-                expected_min,
-                expected_max,
-            } = self;
-
-            let input_batch = RecordBatch::try_from_iter([("c1", input)]).unwrap();
-
-            let schema = input_batch.schema();
-
-            let metadata = parquet_metadata(schema.clone(), input_batch);
-            let parquet_schema = metadata.file_metadata().schema_descr();
-
-            let row_groups = metadata.row_groups();
-
-            for field in schema.fields() {
-                if field.data_type().is_nested() {
-                    let lookup = parquet_column(parquet_schema, &schema, field.name());
-                    assert_eq!(lookup, None);
-                    continue;
-                }
-
-                let converter =
-                    StatisticsConverter::try_new(field.name(), &schema, parquet_schema)
-                        .unwrap();
-
-                assert_eq!(converter.arrow_field, field.as_ref());
-
-                let mins = converter.row_group_mins(row_groups.iter()).unwrap();
-                assert_eq!(
-                    &mins,
-                    &expected_min,
-                    "Min. Statistics\n\n{}\n\n",
-                    DisplayStats(row_groups)
-                );
-
-                let maxes = converter.row_group_maxes(row_groups.iter()).unwrap();
-                assert_eq!(
-                    &maxes,
-                    &expected_max,
-                    "Max. Statistics\n\n{}\n\n",
-                    DisplayStats(row_groups)
-                );
-            }
-        }
-    }
-
-    /// Write the specified batches out as parquet and return the metadata
-    fn parquet_metadata(schema: SchemaRef, batch: RecordBatch) -> Arc<ParquetMetaData> {
-        let props = WriterProperties::builder()
-            .set_statistics_enabled(EnabledStatistics::Chunk)
-            .set_max_row_group_size(ROWS_PER_ROW_GROUP)
-            .build();
-
-        let mut buffer = Vec::new();
-        let mut writer = ArrowWriter::try_new(&mut buffer, schema, Some(props)).unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::try_new(Bytes::from(buffer)).unwrap();
-        reader.metadata().clone()
-    }
-
-    /// Formats the statistics nicely for display
-    struct DisplayStats<'a>(&'a [RowGroupMetaData]);
-    impl<'a> std::fmt::Display for DisplayStats<'a> {
-        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            let row_groups = self.0;
-            writeln!(f, "  row_groups: {}", row_groups.len())?;
-            for rg in row_groups {
-                for col in rg.columns() {
-                    if let Some(statistics) = col.statistics() {
-                        writeln!(f, "   {}: {:?}", col.column_path(), statistics)?;
-                    }
-                }
-            }
-            Ok(())
-        }
-    }
-
-    struct ExpectedColumn {
-        name: &'static str,
-        expected_min: ArrayRef,
-        expected_max: ArrayRef,
-    }
-
-    /// Reads statistics out of the specified, and compares them to the expected values
-    struct TestFile {
-        file_name: &'static str,
-        expected_columns: Vec<ExpectedColumn>,
-    }
-
-    impl TestFile {
-        fn new(file_name: &'static str) -> Self {
-            Self {
-                file_name,
-                expected_columns: Vec::new(),
-            }
-        }
-
-        fn with_column(mut self, column: ExpectedColumn) -> Self {
-            self.expected_columns.push(column);
-            self
-        }
-
-        /// Reads the specified parquet file and validates that the expected min/max
-        /// values for the specified columns are as expected.
-        fn run(self) {
-            let path = PathBuf::from(parquet_test_data()).join(self.file_name);
-            let file = std::fs::File::open(path).unwrap();
-            let reader = ArrowReaderBuilder::try_new(file).unwrap();
-            let arrow_schema = reader.schema();
-            let metadata = reader.metadata();
-            let row_groups = metadata.row_groups();
-            let parquet_schema = metadata.file_metadata().schema_descr();
-
-            for expected_column in self.expected_columns {
-                let ExpectedColumn {
-                    name,
-                    expected_min,
-                    expected_max,
-                } = expected_column;
-
-                let converter =
-                    StatisticsConverter::try_new(name, arrow_schema, parquet_schema)
-                        .unwrap();
-                let actual_min = converter.row_group_mins(row_groups.iter()).unwrap();
-                assert_eq!(&expected_min, &actual_min, "column {name}");
-
-                let actual_max = converter.row_group_maxes(row_groups.iter()).unwrap();
-                assert_eq!(&expected_max, &actual_max, "column {name}");
-            }
-        }
-    }
-
-    fn bool_array(input: impl IntoIterator<Item = Option<bool>>) -> ArrayRef {
-        let array: BooleanArray = input.into_iter().collect();
-        Arc::new(array)
-    }
-
-    fn i8_array(input: impl IntoIterator<Item = Option<i8>>) -> ArrayRef {
-        let array: Int8Array = input.into_iter().collect();
-        Arc::new(array)
-    }
-
-    fn i16_array(input: impl IntoIterator<Item = Option<i16>>) -> ArrayRef {
-        let array: Int16Array = input.into_iter().collect();
-        Arc::new(array)
-    }
-
-    fn i32_array(input: impl IntoIterator<Item = Option<i32>>) -> ArrayRef {
-        let array: Int32Array = input.into_iter().collect();
-        Arc::new(array)
-    }
-
-    fn i64_array(input: impl IntoIterator<Item = Option<i64>>) -> ArrayRef {
-        let array: Int64Array = input.into_iter().collect();
-        Arc::new(array)
-    }
-
-    fn f32_array(input: impl IntoIterator<Item = Option<f32>>) -> ArrayRef {
-        let array: Float32Array = input.into_iter().collect();
-        Arc::new(array)
-    }
-
-    fn f64_array(input: impl IntoIterator<Item = Option<f64>>) -> ArrayRef {
-        let array: Float64Array = input.into_iter().collect();
-        Arc::new(array)
-    }
-
-    fn timestamp_seconds_array(
-        input: impl IntoIterator<Item = Option<i64>>,
-        timezone: Option<&str>,
-    ) -> ArrayRef {
-        let array: TimestampSecondArray = input.into_iter().collect();
-        match timezone {
-            Some(tz) => Arc::new(array.with_timezone(tz)),
-            None => Arc::new(array),
-        }
-    }
-
-    fn timestamp_milliseconds_array(
-        input: impl IntoIterator<Item = Option<i64>>,
-        timezone: Option<&str>,
-    ) -> ArrayRef {
-        let array: TimestampMillisecondArray = input.into_iter().collect();
-        match timezone {
-            Some(tz) => Arc::new(array.with_timezone(tz)),
-            None => Arc::new(array),
-        }
-    }
-
-    fn timestamp_microseconds_array(
-        input: impl IntoIterator<Item = Option<i64>>,
-        timezone: Option<&str>,
-    ) -> ArrayRef {
-        let array: TimestampMicrosecondArray = input.into_iter().collect();
-        match timezone {
-            Some(tz) => Arc::new(array.with_timezone(tz)),
-            None => Arc::new(array),
-        }
-    }
-
-    fn timestamp_nanoseconds_array(
-        input: impl IntoIterator<Item = Option<i64>>,
-        timezone: Option<&str>,
-    ) -> ArrayRef {
-        let array: TimestampNanosecondArray = input.into_iter().collect();
-        match timezone {
-            Some(tz) => Arc::new(array.with_timezone(tz)),
-            None => Arc::new(array),
-        }
-    }
-
-    fn utf8_array<'a>(input: impl IntoIterator<Item = Option<&'a str>>) -> ArrayRef {
-        let array: StringArray = input
-            .into_iter()
-            .map(|s| s.map(|s| s.to_string()))
-            .collect();
-        Arc::new(array)
-    }
-
-    // returns a struct array with columns "bool_col" and "int_col" with the specified values
-    fn struct_array(input: Vec<(Option<bool>, Option<i32>)>) -> ArrayRef {
-        let boolean: BooleanArray = input.iter().map(|(b, _i)| b).collect();
-        let int: Int32Array = input.iter().map(|(_b, i)| i).collect();
-
-        let nullable = true;
-        let struct_array = StructArray::from(vec![
-            (
-                Arc::new(Field::new("bool_col", DataType::Boolean, nullable)),
-                Arc::new(boolean) as ArrayRef,
-            ),
-            (
-                Arc::new(Field::new("int_col", DataType::Int32, nullable)),
-                Arc::new(int) as ArrayRef,
-            ),
-        ]);
-        Arc::new(struct_array)
-    }
-
-    fn date32_array<'a>(input: impl IntoIterator<Item = Option<&'a str>>) -> ArrayRef {
-        let array = Date32Array::from(
-            input
-                .into_iter()
-                .map(|s| Date32Type::parse(s.unwrap_or_default()))
-                .collect::<Vec<_>>(),
-        );
-        Arc::new(array)
-    }
-
-    fn date64_array<'a>(input: impl IntoIterator<Item = Option<&'a str>>) -> ArrayRef {
-        let array = Date64Array::from(
-            input
-                .into_iter()
-                .map(|s| Date64Type::parse(s.unwrap_or_default()))
-                .collect::<Vec<_>>(),
-        );
-        Arc::new(array)
-    }
-
-    fn large_binary_array<'a>(
-        input: impl IntoIterator<Item = Option<&'a [u8]>>,
-    ) -> ArrayRef {
-        let array =
-            LargeBinaryArray::from(input.into_iter().collect::<Vec<Option<&[u8]>>>());
-
-        Arc::new(array)
-    }
-}
diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs
deleted file mode 100644
index 623f321ce152e..0000000000000
--- a/datafusion/core/tests/parquet/arrow_statistics.rs
+++ /dev/null
@@ -1,2178 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This file contains an end to end test of extracting statistics from parquet files.
-//! It writes data into a parquet file, reads statistics and verifies they are correct
-
-use std::default::Default;
-use std::fs::File;
-use std::sync::Arc;
-
-use crate::parquet::{struct_array, Scenario};
-use arrow::compute::kernels::cast_utils::Parser;
-use arrow::datatypes::{
-    i256, Date32Type, Date64Type, TimestampMicrosecondType, TimestampMillisecondType,
-    TimestampNanosecondType, TimestampSecondType,
-};
-use arrow_array::{
-    make_array, new_null_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array,
-    Date64Array, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array,
-    Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
-    LargeBinaryArray, LargeStringArray, RecordBatch, StringArray, Time32MillisecondArray,
-    Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-};
-use arrow_schema::{DataType, Field, Schema, TimeUnit};
-use datafusion::datasource::physical_plan::parquet::StatisticsConverter;
-use half::f16;
-use parquet::arrow::arrow_reader::{
-    ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReaderBuilder,
-};
-use parquet::arrow::ArrowWriter;
-use parquet::file::properties::{EnabledStatistics, WriterProperties};
-
-use super::make_test_file_rg;
-
-#[derive(Debug, Default, Clone)]
-struct Int64Case {
-    /// Number of nulls in the column
-    null_values: usize,
-    /// Non null values in the range `[no_null_values_start,
-    /// no_null_values_end]`, one value for each row
-    no_null_values_start: i64,
-    no_null_values_end: i64,
-    /// Number of rows per row group
-    row_per_group: usize,
-    /// if specified, overrides default statistics settings
-    enable_stats: Option<EnabledStatistics>,
-    /// If specified, the number of values in each page
-    data_page_row_count_limit: Option<usize>,
-}
-
-impl Int64Case {
-    /// Return a record batch with i64 with Null values
-    /// The first no_null_values_end - no_null_values_start values
-    /// are non-null with the specified range, the rest are null
-    fn make_int64_batches_with_null(&self) -> RecordBatch {
-        let schema =
-            Arc::new(Schema::new(vec![Field::new("i64", DataType::Int64, true)]));
-
-        let v64: Vec<i64> =
-            (self.no_null_values_start as _..self.no_null_values_end as _).collect();
-
-        RecordBatch::try_new(
-            schema,
-            vec![make_array(
-                Int64Array::from_iter(
-                    v64.into_iter()
-                        .map(Some)
-                        .chain(std::iter::repeat(None).take(self.null_values)),
-                )
-                .to_data(),
-            )],
-        )
-        .unwrap()
-    }
-
-    // Create a parquet file with the specified settings
-    pub fn build(&self) -> ParquetRecordBatchReaderBuilder<File> {
-        let batches = vec![self.make_int64_batches_with_null()];
-        build_parquet_file(
-            self.row_per_group,
-            self.enable_stats,
-            self.data_page_row_count_limit,
-            batches,
-        )
-    }
-}
-
-fn build_parquet_file(
-    row_per_group: usize,
-    enable_stats: Option<EnabledStatistics>,
-    data_page_row_count_limit: Option<usize>,
-    batches: Vec<RecordBatch>,
-) -> ParquetRecordBatchReaderBuilder<File> {
-    let mut output_file = tempfile::Builder::new()
-        .prefix("parquert_statistics_test")
-        .suffix(".parquet")
-        .tempfile()
-        .expect("tempfile creation");
-
-    let mut builder = WriterProperties::builder().set_max_row_group_size(row_per_group);
-    if let Some(enable_stats) = enable_stats {
-        builder = builder.set_statistics_enabled(enable_stats);
-    }
-    if let Some(data_page_row_count_limit) = data_page_row_count_limit {
-        builder = builder.set_data_page_row_count_limit(data_page_row_count_limit);
-    }
-    let props = builder.build();
-
-    let schema = batches[0].schema();
-
-    let mut writer = ArrowWriter::try_new(&mut output_file, schema, Some(props)).unwrap();
-
-    // if we have a datapage limit send the batches in one at a time to give
-    // the writer a chance to be split into multiple pages
-    if data_page_row_count_limit.is_some() {
-        for batch in &batches {
-            for i in 0..batch.num_rows() {
-                writer.write(&batch.slice(i, 1)).expect("writing batch");
-            }
-        }
-    } else {
-        for batch in &batches {
-            writer.write(batch).expect("writing batch");
-        }
-    }
-
-    let _file_meta = writer.close().unwrap();
-
-    let file = output_file.reopen().unwrap();
-    let options = ArrowReaderOptions::new().with_page_index(true);
-    ArrowReaderBuilder::try_new_with_options(file, options).unwrap()
-}
-
-/// Defines what data to create in a parquet file
-#[derive(Debug, Clone, Copy)]
-struct TestReader {
-    /// What data to create in the parquet file
-    scenario: Scenario,
-    /// Number of rows per row group
-    row_per_group: usize,
-}
-
-impl TestReader {
-    /// Create a parquet file with the specified data, and return a
-    /// ParquetRecordBatchReaderBuilder opened to that file.
-    async fn build(self) -> ParquetRecordBatchReaderBuilder<File> {
-        let TestReader {
-            scenario,
-            row_per_group,
-        } = self;
-        let file = make_test_file_rg(scenario, row_per_group).await;
-
-        // open the file & get the reader
-        let file = file.reopen().unwrap();
-        let options = ArrowReaderOptions::new().with_page_index(true);
-        ArrowReaderBuilder::try_new_with_options(file, options).unwrap()
-    }
-}
-
-/// Which statistics should we check?
-#[derive(Clone, Debug, Copy)]
-enum Check {
-    /// Extract and check row group statistics
-    RowGroup,
-    /// Extract and check data page statistics
-    DataPage,
-    /// Extract and check both row group and data page statistics.
-    ///
-    /// Note if a row group contains a single data page,
-    /// the statistics for row groups and data pages are the same.
-    Both,
-}
-
-impl Check {
-    fn row_group(&self) -> bool {
-        match self {
-            Self::RowGroup | Self::Both => true,
-            Self::DataPage => false,
-        }
-    }
-
-    fn data_page(&self) -> bool {
-        match self {
-            Self::DataPage | Self::Both => true,
-            Self::RowGroup => false,
-        }
-    }
-}
-
-/// Defines a test case for statistics extraction
-struct Test<'a> {
-    /// The parquet file reader
-    reader: &'a ParquetRecordBatchReaderBuilder<File>,
-    expected_min: ArrayRef,
-    expected_max: ArrayRef,
-    expected_null_counts: UInt64Array,
-    expected_row_counts: Option<UInt64Array>,
-    /// Which column to extract statistics from
-    column_name: &'static str,
-    /// What statistics should be checked?
-    check: Check,
-}
-
-impl<'a> Test<'a> {
-    fn run(self) {
-        let converter = StatisticsConverter::try_new(
-            self.column_name,
-            self.reader.schema(),
-            self.reader.parquet_schema(),
-        )
-        .unwrap();
-
-        self.run_checks(converter);
-    }
-
-    fn run_with_schema(self, schema: &Schema) {
-        let converter = StatisticsConverter::try_new(
-            self.column_name,
-            schema,
-            self.reader.parquet_schema(),
-        )
-        .unwrap();
-
-        self.run_checks(converter);
-    }
-
-    fn run_checks(self, converter: StatisticsConverter) {
-        let Self {
-            reader,
-            expected_min,
-            expected_max,
-            expected_null_counts,
-            expected_row_counts,
-            column_name,
-            check,
-        } = self;
-
-        let row_groups = reader.metadata().row_groups();
-
-        if check.data_page() {
-            let column_page_index = reader
-                .metadata()
-                .column_index()
-                .expect("File should have column page indices");
-
-            let column_offset_index = reader
-                .metadata()
-                .offset_index()
-                .expect("File should have column offset indices");
-
-            let row_group_indices: Vec<_> = (0..row_groups.len()).collect();
-
-            let min = converter
-                .data_page_mins(
-                    column_page_index,
-                    column_offset_index,
-                    &row_group_indices,
-                )
-                .unwrap();
-            assert_eq!(
-                &min, &expected_min,
-                "{column_name}: Mismatch with expected data page minimums"
-            );
-
-            let max = converter
-                .data_page_maxes(
-                    column_page_index,
-                    column_offset_index,
-                    &row_group_indices,
-                )
-                .unwrap();
-            assert_eq!(
-                &max, &expected_max,
-                "{column_name}: Mismatch with expected data page maximum"
-            );
-
-            let null_counts = converter
-                .data_page_null_counts(
-                    column_page_index,
-                    column_offset_index,
-                    &row_group_indices,
-                )
-                .unwrap();
-
-            assert_eq!(
-                &null_counts, &expected_null_counts,
-                "{column_name}: Mismatch with expected data page null counts. \
-                Actual: {null_counts:?}. Expected: {expected_null_counts:?}"
-            );
-
-            let row_counts = converter
-                .data_page_row_counts(column_offset_index, row_groups, &row_group_indices)
-                .unwrap();
-            assert_eq!(
-                row_counts, expected_row_counts,
-                "{column_name}: Mismatch with expected row counts. \
-                Actual: {row_counts:?}. Expected: {expected_row_counts:?}"
-            );
-        }
-
-        if check.row_group() {
-            let min = converter.row_group_mins(row_groups).unwrap();
-            assert_eq!(
-                &min, &expected_min,
-                "{column_name}: Mismatch with expected minimums"
-            );
-
-            let max = converter.row_group_maxes(row_groups).unwrap();
-            assert_eq!(
-                &max, &expected_max,
-                "{column_name}: Mismatch with expected maximum"
-            );
-
-            let null_counts = converter.row_group_null_counts(row_groups).unwrap();
-            assert_eq!(
-                &null_counts, &expected_null_counts,
-                "{column_name}: Mismatch with expected null counts. \
-                Actual: {null_counts:?}. Expected: {expected_null_counts:?}"
-            );
-
-            let row_counts = converter
-                .row_group_row_counts(reader.metadata().row_groups().iter())
-                .unwrap();
-            assert_eq!(
-                row_counts, expected_row_counts,
-                "{column_name}: Mismatch with expected row counts. \
-                Actual: {row_counts:?}. Expected: {expected_row_counts:?}"
-            );
-        }
-    }
-
-    /// Run the test and expect a column not found error
-    fn run_col_not_found(self) {
-        let Self {
-            reader,
-            expected_min: _,
-            expected_max: _,
-            expected_null_counts: _,
-            expected_row_counts: _,
-            column_name,
-            ..
-        } = self;
-
-        let converter = StatisticsConverter::try_new(
-            column_name,
-            reader.schema(),
-            reader.parquet_schema(),
-        );
-
-        assert!(converter.is_err());
-    }
-}
-
-// TESTS
-//
-// Remaining cases
-//   f64::NAN
-// - Using truncated statistics  ("exact min value" and "exact max value" https://docs.rs/parquet/latest/parquet/file/statistics/enum.Statistics.html#method.max_is_exact)
-
-#[tokio::test]
-async fn test_one_row_group_without_null() {
-    let reader = Int64Case {
-        null_values: 0,
-        no_null_values_start: 4,
-        no_null_values_end: 7,
-        row_per_group: 20,
-        ..Default::default()
-    }
-    .build();
-
-    Test {
-        reader: &reader,
-        // min is 4
-        expected_min: Arc::new(Int64Array::from(vec![4])),
-        // max is 6
-        expected_max: Arc::new(Int64Array::from(vec![6])),
-        // no nulls
-        expected_null_counts: UInt64Array::from(vec![0]),
-        // 3 rows
-        expected_row_counts: Some(UInt64Array::from(vec![3])),
-        column_name: "i64",
-        check: Check::Both,
-    }
-    .run()
-}
-
-#[tokio::test]
-async fn test_one_row_group_with_null_and_negative() {
-    let reader = Int64Case {
-        null_values: 2,
-        no_null_values_start: -1,
-        no_null_values_end: 5,
-        row_per_group: 20,
-        ..Default::default()
-    }
-    .build();
-
-    Test {
-        reader: &reader,
-        // min is -1
-        expected_min: Arc::new(Int64Array::from(vec![-1])),
-        // max is 4
-        expected_max: Arc::new(Int64Array::from(vec![4])),
-        // 2 nulls
-        expected_null_counts: UInt64Array::from(vec![2]),
-        // 8 rows
-        expected_row_counts: Some(UInt64Array::from(vec![8])),
-        column_name: "i64",
-        check: Check::Both,
-    }
-    .run()
-}
-
-#[tokio::test]
-async fn test_two_row_group_with_null() {
-    let reader = Int64Case {
-        null_values: 2,
-        no_null_values_start: 4,
-        no_null_values_end: 17,
-        row_per_group: 10,
-        ..Default::default()
-    }
-    .build();
-
-    Test {
-        reader: &reader,
-        // mins are [4, 14]
-        expected_min: Arc::new(Int64Array::from(vec![4, 14])),
-        // maxes are [13, 16]
-        expected_max: Arc::new(Int64Array::from(vec![13, 16])),
-        // nulls are [0, 2]
-        expected_null_counts: UInt64Array::from(vec![0, 2]),
-        // row counts are [10, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![10, 5])),
-        column_name: "i64",
-        check: Check::Both,
-    }
-    .run()
-}
-
-#[tokio::test]
-async fn test_two_row_groups_with_all_nulls_in_one() {
-    let reader = Int64Case {
-        null_values: 4,
-        no_null_values_start: -2,
-        no_null_values_end: 2,
-        row_per_group: 5,
-        ..Default::default()
-    }
-    .build();
-
-    Test {
-        reader: &reader,
-        // mins are [-2, null]
-        expected_min: Arc::new(Int64Array::from(vec![Some(-2), None])),
-        // maxes are [1, null]
-        expected_max: Arc::new(Int64Array::from(vec![Some(1), None])),
-        // nulls are [1, 3]
-        expected_null_counts: UInt64Array::from(vec![1, 3]),
-        // row counts are [5, 3]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 3])),
-        column_name: "i64",
-        check: Check::Both,
-    }
-    .run()
-}
-
-#[tokio::test]
-async fn test_multiple_data_pages_nulls_and_negatives() {
-    let reader = Int64Case {
-        null_values: 3,
-        no_null_values_start: -1,
-        no_null_values_end: 10,
-        row_per_group: 20,
-        // limit page row count to 4
-        data_page_row_count_limit: Some(4),
-        enable_stats: Some(EnabledStatistics::Page),
-    }
-    .build();
-
-    // Data layout looks like this:
-    //
-    // page 0: [-1, 0, 1, 2]
-    // page 1: [3, 4, 5, 6]
-    // page 2: [7, 8, 9, null]
-    // page 3: [null, null]
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Int64Array::from(vec![Some(-1), Some(3), Some(7), None])),
-        expected_max: Arc::new(Int64Array::from(vec![Some(2), Some(6), Some(9), None])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 1, 2]),
-        expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 2])),
-        column_name: "i64",
-        check: Check::DataPage,
-    }
-    .run()
-}
-
-#[tokio::test]
-async fn test_data_page_stats_with_all_null_page() {
-    for data_type in &[
-        DataType::Boolean,
-        DataType::UInt64,
-        DataType::UInt32,
-        DataType::UInt16,
-        DataType::UInt8,
-        DataType::Int64,
-        DataType::Int32,
-        DataType::Int16,
-        DataType::Int8,
-        DataType::Float16,
-        DataType::Float32,
-        DataType::Float64,
-        DataType::Date32,
-        DataType::Date64,
-        DataType::Time32(TimeUnit::Millisecond),
-        DataType::Time32(TimeUnit::Second),
-        DataType::Time64(TimeUnit::Microsecond),
-        DataType::Time64(TimeUnit::Nanosecond),
-        DataType::Timestamp(TimeUnit::Second, None),
-        DataType::Timestamp(TimeUnit::Millisecond, None),
-        DataType::Timestamp(TimeUnit::Microsecond, None),
-        DataType::Timestamp(TimeUnit::Nanosecond, None),
-        DataType::Binary,
-        DataType::LargeBinary,
-        DataType::FixedSizeBinary(3),
-        DataType::Utf8,
-        DataType::LargeUtf8,
-        DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-        DataType::Decimal128(8, 2),  // as INT32
-        DataType::Decimal128(10, 2), // as INT64
-        DataType::Decimal128(20, 2), // as FIXED_LEN_BYTE_ARRAY
-        DataType::Decimal256(8, 2),  // as INT32
-        DataType::Decimal256(10, 2), // as INT64
-        DataType::Decimal256(20, 2), // as FIXED_LEN_BYTE_ARRAY
-    ] {
-        let batch =
-            RecordBatch::try_from_iter(vec![("col", new_null_array(data_type, 4))])
-                .expect("record batch creation");
-
-        let reader =
-            build_parquet_file(4, Some(EnabledStatistics::Page), Some(4), vec![batch]);
-
-        let expected_data_type = match data_type {
-            DataType::Dictionary(_, value_type) => value_type.as_ref(),
-            _ => data_type,
-        };
-
-        // There is one data page with 4 nulls
-        // The statistics should be present but null
-        Test {
-            reader: &reader,
-            expected_min: new_null_array(expected_data_type, 1),
-            expected_max: new_null_array(expected_data_type, 1),
-            expected_null_counts: UInt64Array::from(vec![4]),
-            expected_row_counts: Some(UInt64Array::from(vec![4])),
-            column_name: "col",
-            check: Check::DataPage,
-        }
-        .run()
-    }
-}
-
-/////////////// MORE GENERAL TESTS //////////////////////
-// . Many columns in a file
-// . Different data types
-// . Different row group sizes
-
-// Four different integer types
-#[tokio::test]
-async fn test_int_64() {
-    // This creates a parquet files of 4 columns named "i8", "i16", "i32", "i64"
-    let reader = TestReader {
-        scenario: Scenario::Int,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    // since each row has only one data page, the statistics are the same
-    Test {
-        reader: &reader,
-        // mins are [-5, -4, 0, 5]
-        expected_min: Arc::new(Int64Array::from(vec![-5, -4, 0, 5])),
-        // maxes are [-1, 0, 4, 9]
-        expected_max: Arc::new(Int64Array::from(vec![-1, 0, 4, 9])),
-        // nulls are [0, 0, 0, 0]
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "i64",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_int_32() {
-    // This creates a parquet files of 4 columns named "i8", "i16", "i32", "i64"
-    let reader = TestReader {
-        scenario: Scenario::Int,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        // mins are [-5, -4, 0, 5]
-        expected_min: Arc::new(Int32Array::from(vec![-5, -4, 0, 5])),
-        // maxes are [-1, 0, 4, 9]
-        expected_max: Arc::new(Int32Array::from(vec![-1, 0, 4, 9])),
-        // nulls are [0, 0, 0, 0]
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "i32",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_int_16() {
-    // This creates a parquet files of 4 columns named "i8", "i16", "i32", "i64"
-    let reader = TestReader {
-        scenario: Scenario::Int,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        // mins are [-5, -4, 0, 5]
-        expected_min: Arc::new(Int16Array::from(vec![-5, -4, 0, 5])), // panic here because the actual data is Int32Array
-        // maxes are [-1, 0, 4, 9]
-        expected_max: Arc::new(Int16Array::from(vec![-1, 0, 4, 9])),
-        // nulls are [0, 0, 0, 0]
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "i16",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_int_8() {
-    // This creates a parquet files of 4 columns named "i8", "i16", "i32", "i64"
-    let reader = TestReader {
-        scenario: Scenario::Int,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        // mins are [-5, -4, 0, 5]
-        expected_min: Arc::new(Int8Array::from(vec![-5, -4, 0, 5])), // panic here because the actual data is Int32Array
-        // maxes are [-1, 0, 4, 9]
-        expected_max: Arc::new(Int8Array::from(vec![-1, 0, 4, 9])),
-        // nulls are [0, 0, 0, 0]
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "i8",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_float_16() {
-    // This creates a parquet files of 1 column named f
-    let reader = TestReader {
-        scenario: Scenario::Float16,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        // mins are [-5, -4, 0, 5]
-        expected_min: Arc::new(Float16Array::from(vec![
-            f16::from_f32(-5.),
-            f16::from_f32(-4.),
-            f16::from_f32(-0.),
-            f16::from_f32(5.),
-        ])),
-        // maxes are [-1, 0, 4, 9]
-        expected_max: Arc::new(Float16Array::from(vec![
-            f16::from_f32(-1.),
-            f16::from_f32(0.),
-            f16::from_f32(4.),
-            f16::from_f32(9.),
-        ])),
-        // nulls are [0, 0, 0, 0]
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "f",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_float_32() {
-    // This creates a parquet files of 1 column named f
-    let reader = TestReader {
-        scenario: Scenario::Float32,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        // mins are [-5, -4, 0, 5]
-        expected_min: Arc::new(Float32Array::from(vec![-5., -4., -0., 5.0])),
-        // maxes are [-1, 0, 4, 9]
-        expected_max: Arc::new(Float32Array::from(vec![-1., 0., 4., 9.])),
-        // nulls are [0, 0, 0, 0]
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "f",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_float_64() {
-    // This creates a parquet files of 1 column named f
-    let reader = TestReader {
-        scenario: Scenario::Float64,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        // mins are [-5, -4, 0, 5]
-        expected_min: Arc::new(Float64Array::from(vec![-5., -4., -0., 5.0])),
-        // maxes are [-1, 0, 4, 9]
-        expected_max: Arc::new(Float64Array::from(vec![-1., 0., 4., 9.])),
-        // nulls are [0, 0, 0, 0]
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "f",
-        check: Check::Both,
-    }
-    .run();
-}
-
-// timestamp
-#[tokio::test]
-async fn test_timestamp() {
-    // This creates a parquet files of 9 columns named "nanos", "nanos_timezoned", "micros", "micros_timezoned", "millis", "millis_timezoned", "seconds", "seconds_timezoned", "names"
-    // "nanos" --> TimestampNanosecondArray
-    // "nanos_timezoned" --> TimestampNanosecondArray
-    // "micros" --> TimestampMicrosecondArray
-    // "micros_timezoned" --> TimestampMicrosecondArray
-    // "millis" --> TimestampMillisecondArray
-    // "millis_timezoned" --> TimestampMillisecondArray
-    // "seconds" --> TimestampSecondArray
-    // "seconds_timezoned" --> TimestampSecondArray
-    // "names" --> StringArray
-    //
-    // The file is created by 4 record batches, each has 5 rows.
-    // Since the row group size is set to 5, those 4 batches will go into 4 row groups
-    // This creates a parquet files of 4 columns named "nanos", "nanos_timezoned", "micros", "micros_timezoned", "millis", "millis_timezoned", "seconds", "seconds_timezoned"
-    let reader = TestReader {
-        scenario: Scenario::Timestamps,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    let tz = "Pacific/Efate";
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(TimestampNanosecondArray::from(vec![
-            TimestampNanosecondType::parse("2020-01-01T01:01:01"),
-            TimestampNanosecondType::parse("2020-01-01T01:01:11"),
-            TimestampNanosecondType::parse("2020-01-01T01:11:01"),
-            TimestampNanosecondType::parse("2020-01-11T01:01:01"),
-        ])),
-        expected_max: Arc::new(TimestampNanosecondArray::from(vec![
-            TimestampNanosecondType::parse("2020-01-02T01:01:01"),
-            TimestampNanosecondType::parse("2020-01-02T01:01:11"),
-            TimestampNanosecondType::parse("2020-01-02T01:11:01"),
-            TimestampNanosecondType::parse("2020-01-12T01:01:01"),
-        ])),
-        // nulls are [1, 1, 1, 1]
-        expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "nanos",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            TimestampNanosecondArray::from(vec![
-                TimestampNanosecondType::parse("2020-01-01T01:01:01"),
-                TimestampNanosecondType::parse("2020-01-01T01:01:11"),
-                TimestampNanosecondType::parse("2020-01-01T01:11:01"),
-                TimestampNanosecondType::parse("2020-01-11T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        expected_max: Arc::new(
-            TimestampNanosecondArray::from(vec![
-                TimestampNanosecondType::parse("2020-01-02T01:01:01"),
-                TimestampNanosecondType::parse("2020-01-02T01:01:11"),
-                TimestampNanosecondType::parse("2020-01-02T01:11:01"),
-                TimestampNanosecondType::parse("2020-01-12T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        // nulls are [1, 1, 1, 1]
-        expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "nanos_timezoned",
-        check: Check::Both,
-    }
-    .run();
-
-    // micros
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(TimestampMicrosecondArray::from(vec![
-            TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
-            TimestampMicrosecondType::parse("2020-01-01T01:01:11"),
-            TimestampMicrosecondType::parse("2020-01-01T01:11:01"),
-            TimestampMicrosecondType::parse("2020-01-11T01:01:01"),
-        ])),
-        expected_max: Arc::new(TimestampMicrosecondArray::from(vec![
-            TimestampMicrosecondType::parse("2020-01-02T01:01:01"),
-            TimestampMicrosecondType::parse("2020-01-02T01:01:11"),
-            TimestampMicrosecondType::parse("2020-01-02T01:11:01"),
-            TimestampMicrosecondType::parse("2020-01-12T01:01:01"),
-        ])),
-        expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "micros",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            TimestampMicrosecondArray::from(vec![
-                TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
-                TimestampMicrosecondType::parse("2020-01-01T01:01:11"),
-                TimestampMicrosecondType::parse("2020-01-01T01:11:01"),
-                TimestampMicrosecondType::parse("2020-01-11T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        expected_max: Arc::new(
-            TimestampMicrosecondArray::from(vec![
-                TimestampMicrosecondType::parse("2020-01-02T01:01:01"),
-                TimestampMicrosecondType::parse("2020-01-02T01:01:11"),
-                TimestampMicrosecondType::parse("2020-01-02T01:11:01"),
-                TimestampMicrosecondType::parse("2020-01-12T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        // nulls are [1, 1, 1, 1]
-        expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "micros_timezoned",
-        check: Check::Both,
-    }
-    .run();
-
-    // millis
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(TimestampMillisecondArray::from(vec![
-            TimestampMillisecondType::parse("2020-01-01T01:01:01"),
-            TimestampMillisecondType::parse("2020-01-01T01:01:11"),
-            TimestampMillisecondType::parse("2020-01-01T01:11:01"),
-            TimestampMillisecondType::parse("2020-01-11T01:01:01"),
-        ])),
-        expected_max: Arc::new(TimestampMillisecondArray::from(vec![
-            TimestampMillisecondType::parse("2020-01-02T01:01:01"),
-            TimestampMillisecondType::parse("2020-01-02T01:01:11"),
-            TimestampMillisecondType::parse("2020-01-02T01:11:01"),
-            TimestampMillisecondType::parse("2020-01-12T01:01:01"),
-        ])),
-        expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "millis",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            TimestampMillisecondArray::from(vec![
-                TimestampMillisecondType::parse("2020-01-01T01:01:01"),
-                TimestampMillisecondType::parse("2020-01-01T01:01:11"),
-                TimestampMillisecondType::parse("2020-01-01T01:11:01"),
-                TimestampMillisecondType::parse("2020-01-11T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        expected_max: Arc::new(
-            TimestampMillisecondArray::from(vec![
-                TimestampMillisecondType::parse("2020-01-02T01:01:01"),
-                TimestampMillisecondType::parse("2020-01-02T01:01:11"),
-                TimestampMillisecondType::parse("2020-01-02T01:11:01"),
-                TimestampMillisecondType::parse("2020-01-12T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        // nulls are [1, 1, 1, 1]
-        expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "millis_timezoned",
-        check: Check::Both,
-    }
-    .run();
-
-    // seconds
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(TimestampSecondArray::from(vec![
-            TimestampSecondType::parse("2020-01-01T01:01:01"),
-            TimestampSecondType::parse("2020-01-01T01:01:11"),
-            TimestampSecondType::parse("2020-01-01T01:11:01"),
-            TimestampSecondType::parse("2020-01-11T01:01:01"),
-        ])),
-        expected_max: Arc::new(TimestampSecondArray::from(vec![
-            TimestampSecondType::parse("2020-01-02T01:01:01"),
-            TimestampSecondType::parse("2020-01-02T01:01:11"),
-            TimestampSecondType::parse("2020-01-02T01:11:01"),
-            TimestampSecondType::parse("2020-01-12T01:01:01"),
-        ])),
-        expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "seconds",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            TimestampSecondArray::from(vec![
-                TimestampSecondType::parse("2020-01-01T01:01:01"),
-                TimestampSecondType::parse("2020-01-01T01:01:11"),
-                TimestampSecondType::parse("2020-01-01T01:11:01"),
-                TimestampSecondType::parse("2020-01-11T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        expected_max: Arc::new(
-            TimestampSecondArray::from(vec![
-                TimestampSecondType::parse("2020-01-02T01:01:01"),
-                TimestampSecondType::parse("2020-01-02T01:01:11"),
-                TimestampSecondType::parse("2020-01-02T01:11:01"),
-                TimestampSecondType::parse("2020-01-12T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        // nulls are [1, 1, 1, 1]
-        expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "seconds_timezoned",
-        check: Check::Both,
-    }
-    .run();
-}
-
-// timestamp with different row group sizes
-#[tokio::test]
-async fn test_timestamp_diff_rg_sizes() {
-    // This creates a parquet files of 9 columns named "nanos", "nanos_timezoned", "micros", "micros_timezoned", "millis", "millis_timezoned", "seconds", "seconds_timezoned", "names"
-    // "nanos" --> TimestampNanosecondArray
-    // "nanos_timezoned" --> TimestampNanosecondArray
-    // "micros" --> TimestampMicrosecondArray
-    // "micros_timezoned" --> TimestampMicrosecondArray
-    // "millis" --> TimestampMillisecondArray
-    // "millis_timezoned" --> TimestampMillisecondArray
-    // "seconds" --> TimestampSecondArray
-    // "seconds_timezoned" --> TimestampSecondArray
-    // "names" --> StringArray
-    //
-    // The file is created by 4 record batches (each has a null row), each has 5 rows but then will be split into 3 row groups with size 8, 8, 4
-    let reader = TestReader {
-        scenario: Scenario::Timestamps,
-        row_per_group: 8, // note that the row group size is 8
-    }
-    .build()
-    .await;
-
-    let tz = "Pacific/Efate";
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(TimestampNanosecondArray::from(vec![
-            TimestampNanosecondType::parse("2020-01-01T01:01:01"),
-            TimestampNanosecondType::parse("2020-01-01T01:11:01"),
-            TimestampNanosecondType::parse("2020-01-11T01:02:01"),
-        ])),
-        expected_max: Arc::new(TimestampNanosecondArray::from(vec![
-            TimestampNanosecondType::parse("2020-01-02T01:01:01"),
-            TimestampNanosecondType::parse("2020-01-11T01:01:01"),
-            TimestampNanosecondType::parse("2020-01-12T01:01:01"),
-        ])),
-        // nulls are [1, 2, 1]
-        expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
-        // row counts are [8, 8, 4]
-        expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
-        column_name: "nanos",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            TimestampNanosecondArray::from(vec![
-                TimestampNanosecondType::parse("2020-01-01T01:01:01"),
-                TimestampNanosecondType::parse("2020-01-01T01:11:01"),
-                TimestampNanosecondType::parse("2020-01-11T01:02:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        expected_max: Arc::new(
-            TimestampNanosecondArray::from(vec![
-                TimestampNanosecondType::parse("2020-01-02T01:01:01"),
-                TimestampNanosecondType::parse("2020-01-11T01:01:01"),
-                TimestampNanosecondType::parse("2020-01-12T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        // nulls are [1, 2, 1]
-        expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
-        // row counts are [8, 8, 4]
-        expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
-        column_name: "nanos_timezoned",
-        check: Check::Both,
-    }
-    .run();
-
-    // micros
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(TimestampMicrosecondArray::from(vec![
-            TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
-            TimestampMicrosecondType::parse("2020-01-01T01:11:01"),
-            TimestampMicrosecondType::parse("2020-01-11T01:02:01"),
-        ])),
-        expected_max: Arc::new(TimestampMicrosecondArray::from(vec![
-            TimestampMicrosecondType::parse("2020-01-02T01:01:01"),
-            TimestampMicrosecondType::parse("2020-01-11T01:01:01"),
-            TimestampMicrosecondType::parse("2020-01-12T01:01:01"),
-        ])),
-        expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
-        expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
-        column_name: "micros",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            TimestampMicrosecondArray::from(vec![
-                TimestampMicrosecondType::parse("2020-01-01T01:01:01"),
-                TimestampMicrosecondType::parse("2020-01-01T01:11:01"),
-                TimestampMicrosecondType::parse("2020-01-11T01:02:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        expected_max: Arc::new(
-            TimestampMicrosecondArray::from(vec![
-                TimestampMicrosecondType::parse("2020-01-02T01:01:01"),
-                TimestampMicrosecondType::parse("2020-01-11T01:01:01"),
-                TimestampMicrosecondType::parse("2020-01-12T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        // nulls are [1, 2, 1]
-        expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
-        // row counts are [8, 8, 4]
-        expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
-        column_name: "micros_timezoned",
-        check: Check::Both,
-    }
-    .run();
-
-    // millis
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(TimestampMillisecondArray::from(vec![
-            TimestampMillisecondType::parse("2020-01-01T01:01:01"),
-            TimestampMillisecondType::parse("2020-01-01T01:11:01"),
-            TimestampMillisecondType::parse("2020-01-11T01:02:01"),
-        ])),
-        expected_max: Arc::new(TimestampMillisecondArray::from(vec![
-            TimestampMillisecondType::parse("2020-01-02T01:01:01"),
-            TimestampMillisecondType::parse("2020-01-11T01:01:01"),
-            TimestampMillisecondType::parse("2020-01-12T01:01:01"),
-        ])),
-        expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
-        expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
-        column_name: "millis",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            TimestampMillisecondArray::from(vec![
-                TimestampMillisecondType::parse("2020-01-01T01:01:01"),
-                TimestampMillisecondType::parse("2020-01-01T01:11:01"),
-                TimestampMillisecondType::parse("2020-01-11T01:02:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        expected_max: Arc::new(
-            TimestampMillisecondArray::from(vec![
-                TimestampMillisecondType::parse("2020-01-02T01:01:01"),
-                TimestampMillisecondType::parse("2020-01-11T01:01:01"),
-                TimestampMillisecondType::parse("2020-01-12T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        // nulls are [1, 2, 1]
-        expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
-        // row counts are [8, 8, 4]
-        expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
-        column_name: "millis_timezoned",
-        check: Check::Both,
-    }
-    .run();
-
-    // seconds
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(TimestampSecondArray::from(vec![
-            TimestampSecondType::parse("2020-01-01T01:01:01"),
-            TimestampSecondType::parse("2020-01-01T01:11:01"),
-            TimestampSecondType::parse("2020-01-11T01:02:01"),
-        ])),
-        expected_max: Arc::new(TimestampSecondArray::from(vec![
-            TimestampSecondType::parse("2020-01-02T01:01:01"),
-            TimestampSecondType::parse("2020-01-11T01:01:01"),
-            TimestampSecondType::parse("2020-01-12T01:01:01"),
-        ])),
-        expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
-        expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
-        column_name: "seconds",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            TimestampSecondArray::from(vec![
-                TimestampSecondType::parse("2020-01-01T01:01:01"),
-                TimestampSecondType::parse("2020-01-01T01:11:01"),
-                TimestampSecondType::parse("2020-01-11T01:02:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        expected_max: Arc::new(
-            TimestampSecondArray::from(vec![
-                TimestampSecondType::parse("2020-01-02T01:01:01"),
-                TimestampSecondType::parse("2020-01-11T01:01:01"),
-                TimestampSecondType::parse("2020-01-12T01:01:01"),
-            ])
-            .with_timezone(tz),
-        ),
-        // nulls are [1, 2, 1]
-        expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
-        // row counts are [8, 8, 4]
-        expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
-        column_name: "seconds_timezoned",
-        check: Check::Both,
-    }
-    .run();
-}
-
-// date with different row group sizes
-#[tokio::test]
-async fn test_dates_32_diff_rg_sizes() {
-    // This creates a parquet files of 3 columns named "date32", "date64", "names"
-    // "date32" --> Date32Array
-    // "date64" --> Date64Array
-    // "names" --> StringArray
-    //
-    // The file is created by 4 record batches (each has a null row), each has 5 rows but then will be split into 2 row groups with size 13, 7
-    let reader = TestReader {
-        scenario: Scenario::Dates,
-        row_per_group: 13,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        // mins are [2020-01-01, 2020-10-30]
-        expected_min: Arc::new(Date32Array::from(vec![
-            Date32Type::parse("2020-01-01"),
-            Date32Type::parse("2020-10-30"),
-        ])),
-        // maxes are [2020-10-29, 2029-11-12]
-        expected_max: Arc::new(Date32Array::from(vec![
-            Date32Type::parse("2020-10-29"),
-            Date32Type::parse("2029-11-12"),
-        ])),
-        // nulls are [2, 2]
-        expected_null_counts: UInt64Array::from(vec![2, 2]),
-        // row counts are [13, 7]
-        expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
-        column_name: "date32",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_time32_second_diff_rg_sizes() {
-    let reader = TestReader {
-        scenario: Scenario::Time32Second,
-        row_per_group: 4,
-    }
-    .build()
-    .await;
-
-    // Test for Time32Second column
-    Test {
-        reader: &reader,
-        // Assuming specific minimum and maximum values for demonstration
-        expected_min: Arc::new(Time32SecondArray::from(vec![18506, 18510, 18514, 18518])),
-        expected_max: Arc::new(Time32SecondArray::from(vec![18509, 18513, 18517, 18521])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 1 null per row group for simplicity
-        expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])),
-        column_name: "second",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_time32_millisecond_diff_rg_sizes() {
-    let reader = TestReader {
-        scenario: Scenario::Time32Millisecond,
-        row_per_group: 4,
-    }
-    .build()
-    .await;
-
-    // Test for Time32Millisecond column
-    Test {
-        reader: &reader,
-        // Assuming specific minimum and maximum values for demonstration
-        expected_min: Arc::new(Time32MillisecondArray::from(vec![
-            3600000, 3600004, 3600008, 3600012,
-        ])),
-        expected_max: Arc::new(Time32MillisecondArray::from(vec![
-            3600003, 3600007, 3600011, 3600015,
-        ])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 1 null per row group for simplicity
-        expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])),
-        column_name: "millisecond",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_time64_microsecond_diff_rg_sizes() {
-    let reader = TestReader {
-        scenario: Scenario::Time64Microsecond,
-        row_per_group: 4,
-    }
-    .build()
-    .await;
-
-    // Test for Time64MicroSecond column
-    Test {
-        reader: &reader,
-        // Assuming specific minimum and maximum values for demonstration
-        expected_min: Arc::new(Time64MicrosecondArray::from(vec![
-            1234567890123,
-            1234567890127,
-            1234567890131,
-            1234567890135,
-        ])),
-        expected_max: Arc::new(Time64MicrosecondArray::from(vec![
-            1234567890126,
-            1234567890130,
-            1234567890134,
-            1234567890138,
-        ])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 1 null per row group for simplicity
-        expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])),
-        column_name: "microsecond",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_time64_nanosecond_diff_rg_sizes() {
-    let reader = TestReader {
-        scenario: Scenario::Time64Nanosecond,
-        row_per_group: 4,
-    }
-    .build()
-    .await;
-
-    // Test for Time32Second column
-    Test {
-        reader: &reader,
-        // Assuming specific minimum and maximum values for demonstration
-        expected_min: Arc::new(Time64NanosecondArray::from(vec![
-            987654321012345,
-            987654321012349,
-            987654321012353,
-            987654321012357,
-        ])),
-        expected_max: Arc::new(Time64NanosecondArray::from(vec![
-            987654321012348,
-            987654321012352,
-            987654321012356,
-            987654321012360,
-        ])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 1 null per row group for simplicity
-        expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])),
-        column_name: "nanosecond",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_dates_64_diff_rg_sizes() {
-    // The file is created by 4 record batches (each has a null row), each has 5 rows but then will be split into 2 row groups with size 13, 7
-    let reader = TestReader {
-        scenario: Scenario::Dates,
-        row_per_group: 13,
-    }
-    .build()
-    .await;
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Date64Array::from(vec![
-            Date64Type::parse("2020-01-01"),
-            Date64Type::parse("2020-10-30"),
-        ])),
-        expected_max: Arc::new(Date64Array::from(vec![
-            Date64Type::parse("2020-10-29"),
-            Date64Type::parse("2029-11-12"),
-        ])),
-        expected_null_counts: UInt64Array::from(vec![2, 2]),
-        expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
-        column_name: "date64",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_uint() {
-    // This creates a parquet files of 4 columns named "u8", "u16", "u32", "u64"
-    // "u8" --> UInt8Array
-    // "u16" --> UInt16Array
-    // "u32" --> UInt32Array
-    // "u64" --> UInt64Array
-
-    // The file is created by 4 record batches (each has a null row), each has 5 rows but then will be split into 5 row groups with size 4
-    let reader = TestReader {
-        scenario: Scenario::UInt,
-        row_per_group: 4,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(UInt8Array::from(vec![0, 1, 4, 7, 251])),
-        expected_max: Arc::new(UInt8Array::from(vec![3, 4, 6, 250, 254])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
-        column_name: "u8",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(UInt16Array::from(vec![0, 1, 4, 7, 251])),
-        expected_max: Arc::new(UInt16Array::from(vec![3, 4, 6, 250, 254])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
-        column_name: "u16",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(UInt32Array::from(vec![0, 1, 4, 7, 251])),
-        expected_max: Arc::new(UInt32Array::from(vec![3, 4, 6, 250, 254])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
-        column_name: "u32",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(UInt64Array::from(vec![0, 1, 4, 7, 251])),
-        expected_max: Arc::new(UInt64Array::from(vec![3, 4, 6, 250, 254])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
-        column_name: "u64",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_int32_range() {
-    // This creates a parquet file of 1 column "i"
-    // file has 2 record batches, each has 2 rows. They will be saved into one row group
-    let reader = TestReader {
-        scenario: Scenario::Int32Range,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Int32Array::from(vec![0])),
-        expected_max: Arc::new(Int32Array::from(vec![300000])),
-        expected_null_counts: UInt64Array::from(vec![0]),
-        expected_row_counts: Some(UInt64Array::from(vec![4])),
-        column_name: "i",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_uint32_range() {
-    // This creates a parquet file of 1 column "u"
-    // file has 2 record batches, each has 2 rows. They will be saved into one row group
-    let reader = TestReader {
-        scenario: Scenario::UInt32Range,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(UInt32Array::from(vec![0])),
-        expected_max: Arc::new(UInt32Array::from(vec![300000])),
-        expected_null_counts: UInt64Array::from(vec![0]),
-        expected_row_counts: Some(UInt64Array::from(vec![4])),
-        column_name: "u",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_numeric_limits_unsigned() {
-    // file has 7 rows, 2 row groups: one with 5 rows, one with 2 rows.
-    let reader = TestReader {
-        scenario: Scenario::NumericLimits,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(UInt8Array::from(vec![u8::MIN, 100])),
-        expected_max: Arc::new(UInt8Array::from(vec![100, u8::MAX])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "u8",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(UInt16Array::from(vec![u16::MIN, 100])),
-        expected_max: Arc::new(UInt16Array::from(vec![100, u16::MAX])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "u16",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(UInt32Array::from(vec![u32::MIN, 100])),
-        expected_max: Arc::new(UInt32Array::from(vec![100, u32::MAX])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "u32",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(UInt64Array::from(vec![u64::MIN, 100])),
-        expected_max: Arc::new(UInt64Array::from(vec![100, u64::MAX])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "u64",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_numeric_limits_signed() {
-    // file has 7 rows, 2 row groups: one with 5 rows, one with 2 rows.
-    let reader = TestReader {
-        scenario: Scenario::NumericLimits,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Int8Array::from(vec![i8::MIN, -100])),
-        expected_max: Arc::new(Int8Array::from(vec![100, i8::MAX])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "i8",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Int16Array::from(vec![i16::MIN, -100])),
-        expected_max: Arc::new(Int16Array::from(vec![100, i16::MAX])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "i16",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Int32Array::from(vec![i32::MIN, -100])),
-        expected_max: Arc::new(Int32Array::from(vec![100, i32::MAX])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "i32",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Int64Array::from(vec![i64::MIN, -100])),
-        expected_max: Arc::new(Int64Array::from(vec![100, i64::MAX])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "i64",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_numeric_limits_float() {
-    // file has 7 rows, 2 row groups: one with 5 rows, one with 2 rows.
-    let reader = TestReader {
-        scenario: Scenario::NumericLimits,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Float32Array::from(vec![f32::MIN, -100.0])),
-        expected_max: Arc::new(Float32Array::from(vec![100.0, f32::MAX])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "f32",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Float64Array::from(vec![f64::MIN, -100.0])),
-        expected_max: Arc::new(Float64Array::from(vec![100.0, f64::MAX])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "f64",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Float32Array::from(vec![-1.0, -100.0])),
-        expected_max: Arc::new(Float32Array::from(vec![100.0, -100.0])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "f32_nan",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Float64Array::from(vec![-1.0, -100.0])),
-        expected_max: Arc::new(Float64Array::from(vec![100.0, -100.0])),
-        expected_null_counts: UInt64Array::from(vec![0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "f64_nan",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_float64() {
-    // This creates a parquet file of 1 column "f"
-    // file has 4 record batches, each has 5 rows. They will be saved into 4 row groups
-    let reader = TestReader {
-        scenario: Scenario::Float64,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Float64Array::from(vec![-5.0, -4.0, -0.0, 5.0])),
-        expected_max: Arc::new(Float64Array::from(vec![-1.0, 0.0, 4.0, 9.0])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "f",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_float16() {
-    // This creates a parquet file of 1 column "f"
-    // file has 4 record batches, each has 5 rows. They will be saved into 4 row groups
-    let reader = TestReader {
-        scenario: Scenario::Float16,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Float16Array::from(
-            vec![-5.0, -4.0, -0.0, 5.0]
-                .into_iter()
-                .map(f16::from_f32)
-                .collect::<Vec<_>>(),
-        )),
-        expected_max: Arc::new(Float16Array::from(
-            vec![-1.0, 0.0, 4.0, 9.0]
-                .into_iter()
-                .map(f16::from_f32)
-                .collect::<Vec<_>>(),
-        )),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
-        column_name: "f",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_decimal() {
-    // This creates a parquet file of 1 column "decimal_col" with decimal data type and precision 9, scale 2
-    // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
-    let reader = TestReader {
-        scenario: Scenario::Decimal,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            Decimal128Array::from(vec![100, -500, 2000])
-                .with_precision_and_scale(9, 2)
-                .unwrap(),
-        ),
-        expected_max: Arc::new(
-            Decimal128Array::from(vec![600, 600, 6000])
-                .with_precision_and_scale(9, 2)
-                .unwrap(),
-        ),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
-        column_name: "decimal_col",
-        check: Check::Both,
-    }
-    .run();
-}
-#[tokio::test]
-async fn test_decimal_256() {
-    // This creates a parquet file of 1 column "decimal256_col" with decimal data type and precision 9, scale 2
-    // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
-    let reader = TestReader {
-        scenario: Scenario::Decimal256,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            Decimal256Array::from(vec![
-                i256::from(100),
-                i256::from(-500),
-                i256::from(2000),
-            ])
-            .with_precision_and_scale(9, 2)
-            .unwrap(),
-        ),
-        expected_max: Arc::new(
-            Decimal256Array::from(vec![
-                i256::from(600),
-                i256::from(600),
-                i256::from(6000),
-            ])
-            .with_precision_and_scale(9, 2)
-            .unwrap(),
-        ),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
-        column_name: "decimal256_col",
-        check: Check::Both,
-    }
-    .run();
-}
-#[tokio::test]
-async fn test_dictionary() {
-    let reader = TestReader {
-        scenario: Scenario::Dictionary,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(StringArray::from(vec!["abc", "aaa"])),
-        expected_max: Arc::new(StringArray::from(vec!["def", "fffff"])),
-        expected_null_counts: UInt64Array::from(vec![1, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "string_dict_i8",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(StringArray::from(vec!["abc", "aaa"])),
-        expected_max: Arc::new(StringArray::from(vec!["def", "fffff"])),
-        expected_null_counts: UInt64Array::from(vec![1, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "string_dict_i32",
-        check: Check::Both,
-    }
-    .run();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Int64Array::from(vec![-100, 0])),
-        expected_max: Arc::new(Int64Array::from(vec![0, 100])),
-        expected_null_counts: UInt64Array::from(vec![1, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
-        column_name: "int_dict_i8",
-        check: Check::Both,
-    }
-    .run();
-}
-
-#[tokio::test]
-async fn test_byte() {
-    // This creates a parquet file of 5 columns
-    // "name"
-    // "service_string"
-    // "service_binary"
-    // "service_fixedsize"
-    // "service_large_binary"
-
-    // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
-    let reader = TestReader {
-        scenario: Scenario::ByteArray,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    // column "name"
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(StringArray::from(vec![
-            "all frontends",
-            "mixed",
-            "all backends",
-        ])),
-        expected_max: Arc::new(StringArray::from(vec![
-            "all frontends",
-            "mixed",
-            "all backends",
-        ])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
-        column_name: "name",
-        check: Check::Both,
-    }
-    .run();
-
-    // column "service_string"
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(StringArray::from(vec![
-            "frontend five",
-            "backend one",
-            "backend eight",
-        ])),
-        expected_max: Arc::new(StringArray::from(vec![
-            "frontend two",
-            "frontend six",
-            "backend six",
-        ])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
-        column_name: "service_string",
-        check: Check::Both,
-    }
-    .run();
-
-    // column "service_binary"
-
-    let expected_service_binary_min_values: Vec<&[u8]> =
-        vec![b"frontend five", b"backend one", b"backend eight"];
-
-    let expected_service_binary_max_values: Vec<&[u8]> =
-        vec![b"frontend two", b"frontend six", b"backend six"];
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(BinaryArray::from(expected_service_binary_min_values)),
-        expected_max: Arc::new(BinaryArray::from(expected_service_binary_max_values)),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
-        column_name: "service_binary",
-        check: Check::Both,
-    }
-    .run();
-
-    // column "service_fixedsize"
-    // b"fe1", b"be1", b"be4"
-    let min_input = vec![vec![102, 101, 49], vec![98, 101, 49], vec![98, 101, 52]];
-    // b"fe5", b"fe6", b"be8"
-    let max_input = vec![vec![102, 101, 55], vec![102, 101, 54], vec![98, 101, 56]];
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(
-            FixedSizeBinaryArray::try_from_iter(min_input.into_iter()).unwrap(),
-        ),
-        expected_max: Arc::new(
-            FixedSizeBinaryArray::try_from_iter(max_input.into_iter()).unwrap(),
-        ),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
-        column_name: "service_fixedsize",
-        check: Check::Both,
-    }
-    .run();
-
-    let expected_service_large_binary_min_values: Vec<&[u8]> =
-        vec![b"frontend five", b"backend one", b"backend eight"];
-
-    let expected_service_large_binary_max_values: Vec<&[u8]> =
-        vec![b"frontend two", b"frontend six", b"backend six"];
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(LargeBinaryArray::from(
-            expected_service_large_binary_min_values,
-        )),
-        expected_max: Arc::new(LargeBinaryArray::from(
-            expected_service_large_binary_max_values,
-        )),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
-        column_name: "service_large_binary",
-        check: Check::Both,
-    }
-    .run();
-}
-
-// PeriodsInColumnNames
-#[tokio::test]
-async fn test_period_in_column_names() {
-    // This creates a parquet file of 2 columns "name" and "service.name"
-    // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
-    let reader = TestReader {
-        scenario: Scenario::PeriodsInColumnNames,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    // column "name"
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(StringArray::from(vec![
-            "HTTP GET / DISPATCH",
-            "HTTP PUT / DISPATCH",
-            "HTTP GET / DISPATCH",
-        ])),
-        expected_max: Arc::new(StringArray::from(vec![
-            "HTTP GET / DISPATCH",
-            "HTTP PUT / DISPATCH",
-            "HTTP GET / DISPATCH",
-        ])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
-        column_name: "name",
-        check: Check::Both,
-    }
-    .run();
-
-    // column "service.name"
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(StringArray::from(vec!["frontend", "backend", "backend"])),
-        expected_max: Arc::new(StringArray::from(vec![
-            "frontend", "frontend", "backend",
-        ])),
-        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
-        column_name: "service.name",
-        check: Check::Both,
-    }
-    .run();
-}
-
-// Boolean
-#[tokio::test]
-async fn test_boolean() {
-    // This creates a parquet files of 1 column named "bool"
-    // The file is created by 2 record batches each has 5 rows --> 2 row groups
-    let reader = TestReader {
-        scenario: Scenario::Boolean,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(BooleanArray::from(vec![false, false])),
-        expected_max: Arc::new(BooleanArray::from(vec![true, false])),
-        expected_null_counts: UInt64Array::from(vec![1, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5])),
-        column_name: "bool",
-        check: Check::Both,
-    }
-    .run();
-}
-
-// struct array
-// BUG
-// https://github.com/apache/datafusion/issues/10609
-// Note that: since I have not worked on struct array before, there may be a bug in the test code rather than the real bug in the code
-#[ignore]
-#[tokio::test]
-async fn test_struct() {
-    // This creates a parquet files of 1 column named "struct"
-    // The file is created by 1 record batch with 3 rows in the struct array
-    let reader = TestReader {
-        scenario: Scenario::StructArray,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(struct_array(vec![(Some(1), Some(6.0), Some(12.0))])),
-        expected_max: Arc::new(struct_array(vec![(Some(2), Some(8.5), Some(14.0))])),
-        expected_null_counts: UInt64Array::from(vec![0]),
-        expected_row_counts: Some(UInt64Array::from(vec![3])),
-        column_name: "struct",
-        check: Check::RowGroup,
-    }
-    .run();
-}
-
-// UTF8
-#[tokio::test]
-async fn test_utf8() {
-    let reader = TestReader {
-        scenario: Scenario::UTF8,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    // test for utf8
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(StringArray::from(vec!["a", "e"])),
-        expected_max: Arc::new(StringArray::from(vec!["d", "i"])),
-        expected_null_counts: UInt64Array::from(vec![1, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5])),
-        column_name: "utf8",
-        check: Check::Both,
-    }
-    .run();
-
-    // test for large_utf8
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(LargeStringArray::from(vec!["a", "e"])),
-        expected_max: Arc::new(LargeStringArray::from(vec!["d", "i"])),
-        expected_null_counts: UInt64Array::from(vec![1, 0]),
-        expected_row_counts: Some(UInt64Array::from(vec![5, 5])),
-        column_name: "large_utf8",
-        check: Check::Both,
-    }
-    .run();
-}
-
-////// Files with missing statistics ///////
-
-#[tokio::test]
-async fn test_missing_statistics() {
-    let reader = Int64Case {
-        null_values: 0,
-        no_null_values_start: 4,
-        no_null_values_end: 7,
-        row_per_group: 5,
-        enable_stats: Some(EnabledStatistics::None),
-        ..Default::default()
-    }
-    .build();
-
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Int64Array::from(vec![None])),
-        expected_max: Arc::new(Int64Array::from(vec![None])),
-        expected_null_counts: UInt64Array::from(vec![None]),
-        expected_row_counts: Some(UInt64Array::from(vec![3])), // still has row count statistics
-        column_name: "i64",
-        check: Check::Both,
-    }
-    .run();
-}
-
-/////// NEGATIVE TESTS ///////
-// column not found
-#[tokio::test]
-async fn test_column_not_found() {
-    let reader = TestReader {
-        scenario: Scenario::Dates,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-    Test {
-        reader: &reader,
-        expected_min: Arc::new(Int64Array::from(vec![18262, 18565])),
-        expected_max: Arc::new(Int64Array::from(vec![18564, 21865])),
-        expected_null_counts: UInt64Array::from(vec![2, 2]),
-        expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
-        column_name: "not_a_column",
-        check: Check::Both,
-    }
-    .run_col_not_found();
-}
-
-#[tokio::test]
-async fn test_column_non_existent() {
-    // Create a schema with an additional column
-    // that will not have a matching parquet index
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("i8", DataType::Int8, true),
-        Field::new("i16", DataType::Int16, true),
-        Field::new("i32", DataType::Int32, true),
-        Field::new("i64", DataType::Int64, true),
-        Field::new("i_do_not_exist", DataType::Int64, true),
-    ]));
-
-    let reader = TestReader {
-        scenario: Scenario::Int,
-        row_per_group: 5,
-    }
-    .build()
-    .await;
-
-    Test {
-        reader: &reader,
-        // mins are [-5, -4, 0, 5]
-        expected_min: Arc::new(Int64Array::from(vec![None, None, None, None])),
-        // maxes are [-1, 0, 4, 9]
-        expected_max: Arc::new(Int64Array::from(vec![None, None, None, None])),
-        // nulls are [0, 0, 0, 0]
-        expected_null_counts: UInt64Array::from(vec![None, None, None, None]),
-        // row counts are [5, 5, 5, 5]
-        expected_row_counts: None,
-        column_name: "i_do_not_exist",
-        check: Check::Both,
-    }
-    .run_with_schema(&schema);
-}
diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs
index 1b68a4aa4eb36..60a8dd4007865 100644
--- a/datafusion/core/tests/parquet/mod.rs
+++ b/datafusion/core/tests/parquet/mod.rs
@@ -18,19 +18,15 @@
 //! Parquet integration tests
 use crate::parquet::utils::MetricsFinder;
 use arrow::array::Decimal128Array;
-use arrow::datatypes::i256;
 use arrow::{
     array::{
-        make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
-        Decimal256Array, DictionaryArray, FixedSizeBinaryArray, Float16Array,
-        Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
-        LargeBinaryArray, LargeStringArray, StringArray, StructArray,
-        Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
-        Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-        TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
-        UInt64Array, UInt8Array,
+        make_array, Array, ArrayRef, BinaryArray, Date32Array, Date64Array,
+        FixedSizeBinaryArray, Float64Array, Int16Array, Int32Array, Int64Array,
+        Int8Array, LargeBinaryArray, LargeStringArray, StringArray,
+        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+        TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
     },
-    datatypes::{DataType, Field, Int32Type, Int8Type, Schema, TimeUnit},
+    datatypes::{DataType, Field, Schema},
     record_batch::RecordBatch,
     util::pretty::pretty_format_batches,
 };
@@ -41,13 +37,11 @@ use datafusion::{
     prelude::{ParquetReadOptions, SessionConfig, SessionContext},
 };
 use datafusion_expr::{Expr, LogicalPlan, LogicalPlanBuilder};
-use half::f16;
 use parquet::arrow::ArrowWriter;
 use parquet::file::properties::{EnabledStatistics, WriterProperties};
 use std::sync::Arc;
 use tempfile::NamedTempFile;
 
-mod arrow_statistics;
 mod custom_reader;
 // Don't run on windows as tempfiles don't seem to work the same
 #[cfg(not(target_os = "windows"))]
@@ -75,37 +69,23 @@ fn init() {
 /// What data to use
 #[derive(Debug, Clone, Copy)]
 enum Scenario {
-    Boolean,
     Timestamps,
     Dates,
     Int,
     Int32Range,
     UInt,
     UInt32Range,
-    Time32Second,
-    Time32Millisecond,
-    Time64Nanosecond,
-    Time64Microsecond,
-    /// 7 Rows, for each i8, i16, i32, i64, u8, u16, u32, u64, f32, f64
-    /// -MIN, -100, -1, 0, 1, 100, MAX
-    NumericLimits,
-    Float16,
-    Float32,
     Float64,
     Decimal,
-    Decimal256,
     DecimalBloomFilterInt32,
     DecimalBloomFilterInt64,
     DecimalLargePrecision,
     DecimalLargePrecisionBloomFilter,
     /// StringArray, BinaryArray, FixedSizeBinaryArray
     ByteArray,
-    /// DictionaryArray
-    Dictionary,
     PeriodsInColumnNames,
     WithNullValues,
     WithNullValuesPageLevel,
-    StructArray,
     UTF8,
 }
 
@@ -321,16 +301,6 @@ impl ContextWithParquet {
     }
 }
 
-fn make_boolean_batch(v: Vec<Option<bool>>) -> RecordBatch {
-    let schema = Arc::new(Schema::new(vec![Field::new(
-        "bool",
-        DataType::Boolean,
-        true,
-    )]));
-    let array = Arc::new(BooleanArray::from(v)) as ArrayRef;
-    RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
-}
-
 /// Return record batch with a few rows of data for all of the supported timestamp types
 /// values with the specified offset
 ///
@@ -484,55 +454,6 @@ fn make_int_batches(start: i8, end: i8) -> RecordBatch {
     .unwrap()
 }
 
-/// Return record batch with Time32Second, Time32Millisecond sequences
-fn make_time32_batches(scenario: Scenario, v: Vec<i32>) -> RecordBatch {
-    match scenario {
-        Scenario::Time32Second => {
-            let schema = Arc::new(Schema::new(vec![Field::new(
-                "second",
-                DataType::Time32(TimeUnit::Second),
-                true,
-            )]));
-            let array = Arc::new(Time32SecondArray::from(v)) as ArrayRef;
-            RecordBatch::try_new(schema, vec![array]).unwrap()
-        }
-        Scenario::Time32Millisecond => {
-            let schema = Arc::new(Schema::new(vec![Field::new(
-                "millisecond",
-                DataType::Time32(TimeUnit::Millisecond),
-                true,
-            )]));
-            let array = Arc::new(Time32MillisecondArray::from(v)) as ArrayRef;
-            RecordBatch::try_new(schema, vec![array]).unwrap()
-        }
-        _ => panic!("Unsupported scenario for Time32"),
-    }
-}
-
-/// Return record batch with Time64Microsecond, Time64Nanosecond sequences
-fn make_time64_batches(scenario: Scenario, v: Vec<i64>) -> RecordBatch {
-    match scenario {
-        Scenario::Time64Microsecond => {
-            let schema = Arc::new(Schema::new(vec![Field::new(
-                "microsecond",
-                DataType::Time64(TimeUnit::Microsecond),
-                true,
-            )]));
-            let array = Arc::new(Time64MicrosecondArray::from(v)) as ArrayRef;
-            RecordBatch::try_new(schema, vec![array]).unwrap()
-        }
-        Scenario::Time64Nanosecond => {
-            let schema = Arc::new(Schema::new(vec![Field::new(
-                "nanosecond",
-                DataType::Time64(TimeUnit::Nanosecond),
-                true,
-            )]));
-            let array = Arc::new(Time64NanosecondArray::from(v)) as ArrayRef;
-            RecordBatch::try_new(schema, vec![array]).unwrap()
-        }
-        _ => panic!("Unsupported scenario for Time64"),
-    }
-}
 /// Return record batch with u8, u16, u32, and u64 sequences
 ///
 /// Columns are named
@@ -587,18 +508,6 @@ fn make_f64_batch(v: Vec<f64>) -> RecordBatch {
     RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
 }
 
-fn make_f32_batch(v: Vec<f32>) -> RecordBatch {
-    let schema = Arc::new(Schema::new(vec![Field::new("f", DataType::Float32, true)]));
-    let array = Arc::new(Float32Array::from(v)) as ArrayRef;
-    RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
-}
-
-fn make_f16_batch(v: Vec<f16>) -> RecordBatch {
-    let schema = Arc::new(Schema::new(vec![Field::new("f", DataType::Float16, true)]));
-    let array = Arc::new(Float16Array::from(v)) as ArrayRef;
-    RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
-}
-
 /// Return record batch with decimal vector
 ///
 /// Columns are named
@@ -617,24 +526,6 @@ fn make_decimal_batch(v: Vec<i128>, precision: u8, scale: i8) -> RecordBatch {
     RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
 }
 
-/// Return record batch with decimal256 vector
-///
-/// Columns are named
-/// "decimal256_col" -> Decimal256Array
-fn make_decimal256_batch(v: Vec<i256>, precision: u8, scale: i8) -> RecordBatch {
-    let schema = Arc::new(Schema::new(vec![Field::new(
-        "decimal256_col",
-        DataType::Decimal256(precision, scale),
-        true,
-    )]));
-    let array = Arc::new(
-        Decimal256Array::from(v)
-            .with_precision_and_scale(precision, scale)
-            .unwrap(),
-    ) as ArrayRef;
-    RecordBatch::try_new(schema, vec![array]).unwrap()
-}
-
 /// Return record batch with a few rows of data for all of the supported date
 /// types with the specified offset (in days)
 ///
@@ -843,39 +734,6 @@ fn make_int_batches_with_null(
     .unwrap()
 }
 
-fn make_numeric_limit_batch() -> RecordBatch {
-    let i8 = Int8Array::from(vec![i8::MIN, 100, -1, 0, 1, -100, i8::MAX]);
-    let i16 = Int16Array::from(vec![i16::MIN, 100, -1, 0, 1, -100, i16::MAX]);
-    let i32 = Int32Array::from(vec![i32::MIN, 100, -1, 0, 1, -100, i32::MAX]);
-    let i64 = Int64Array::from(vec![i64::MIN, 100, -1, 0, 1, -100, i64::MAX]);
-    let u8 = UInt8Array::from(vec![u8::MIN, 100, 1, 0, 1, 100, u8::MAX]);
-    let u16 = UInt16Array::from(vec![u16::MIN, 100, 1, 0, 1, 100, u16::MAX]);
-    let u32 = UInt32Array::from(vec![u32::MIN, 100, 1, 0, 1, 100, u32::MAX]);
-    let u64 = UInt64Array::from(vec![u64::MIN, 100, 1, 0, 1, 100, u64::MAX]);
-    let f32 = Float32Array::from(vec![f32::MIN, 100.0, -1.0, 0.0, 1.0, -100.0, f32::MAX]);
-    let f64 = Float64Array::from(vec![f64::MIN, 100.0, -1.0, 0.0, 1.0, -100.0, f64::MAX]);
-    let f32_nan =
-        Float32Array::from(vec![f32::NAN, 100.0, -1.0, 0.0, 1.0, -100.0, f32::NAN]);
-    let f64_nan =
-        Float64Array::from(vec![f64::NAN, 100.0, -1.0, 0.0, 1.0, -100.0, f64::NAN]);
-
-    RecordBatch::try_from_iter(vec![
-        ("i8", Arc::new(i8) as _),
-        ("i16", Arc::new(i16) as _),
-        ("i32", Arc::new(i32) as _),
-        ("i64", Arc::new(i64) as _),
-        ("u8", Arc::new(u8) as _),
-        ("u16", Arc::new(u16) as _),
-        ("u32", Arc::new(u32) as _),
-        ("u64", Arc::new(u64) as _),
-        ("f32", Arc::new(f32) as _),
-        ("f64", Arc::new(f64) as _),
-        ("f32_nan", Arc::new(f32_nan) as _),
-        ("f64_nan", Arc::new(f64_nan) as _),
-    ])
-    .unwrap()
-}
-
 fn make_utf8_batch(value: Vec<Option<&str>>) -> RecordBatch {
     let utf8 = StringArray::from(value.clone());
     let large_utf8 = LargeStringArray::from(value);
@@ -886,61 +744,8 @@ fn make_utf8_batch(value: Vec<Option<&str>>) -> RecordBatch {
     .unwrap()
 }
 
-fn make_dict_batch() -> RecordBatch {
-    let values = [
-        Some("abc"),
-        Some("def"),
-        None,
-        Some("def"),
-        Some("abc"),
-        Some("fffff"),
-        Some("aaa"),
-    ];
-    let dict_i8_array = DictionaryArray::<Int8Type>::from_iter(values.iter().cloned());
-    let dict_i32_array = DictionaryArray::<Int32Type>::from_iter(values.iter().cloned());
-
-    // Dictionary array of integers
-    let int64_values = Int64Array::from(vec![0, -100, 100]);
-    let keys = Int8Array::from_iter([
-        Some(0),
-        Some(1),
-        None,
-        Some(0),
-        Some(0),
-        Some(2),
-        Some(0),
-    ]);
-    let dict_i8_int_array =
-        DictionaryArray::<Int8Type>::try_new(keys, Arc::new(int64_values)).unwrap();
-
-    RecordBatch::try_from_iter(vec![
-        ("string_dict_i8", Arc::new(dict_i8_array) as _),
-        ("string_dict_i32", Arc::new(dict_i32_array) as _),
-        ("int_dict_i8", Arc::new(dict_i8_int_array) as _),
-    ])
-    .unwrap()
-}
-
 fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
     match scenario {
-        Scenario::Boolean => {
-            vec![
-                make_boolean_batch(vec![
-                    Some(true),
-                    Some(false),
-                    Some(true),
-                    Some(false),
-                    None,
-                ]),
-                make_boolean_batch(vec![
-                    Some(false),
-                    Some(false),
-                    Some(false),
-                    Some(false),
-                    Some(false),
-                ]),
-            ]
-        }
         Scenario::Timestamps => {
             vec![
                 make_timestamp_batch(TimeDelta::try_seconds(0).unwrap()),
@@ -979,45 +784,7 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
         Scenario::UInt32Range => {
             vec![make_uint32_range(0, 10), make_uint32_range(200000, 300000)]
         }
-        Scenario::NumericLimits => {
-            vec![make_numeric_limit_batch()]
-        }
-        Scenario::Float16 => {
-            vec![
-                make_f16_batch(
-                    vec![-5.0, -4.0, -3.0, -2.0, -1.0]
-                        .into_iter()
-                        .map(f16::from_f32)
-                        .collect(),
-                ),
-                make_f16_batch(
-                    vec![-4.0, -3.0, -2.0, -1.0, 0.0]
-                        .into_iter()
-                        .map(f16::from_f32)
-                        .collect(),
-                ),
-                make_f16_batch(
-                    vec![0.0, 1.0, 2.0, 3.0, 4.0]
-                        .into_iter()
-                        .map(f16::from_f32)
-                        .collect(),
-                ),
-                make_f16_batch(
-                    vec![5.0, 6.0, 7.0, 8.0, 9.0]
-                        .into_iter()
-                        .map(f16::from_f32)
-                        .collect(),
-                ),
-            ]
-        }
-        Scenario::Float32 => {
-            vec![
-                make_f32_batch(vec![-5.0, -4.0, -3.0, -2.0, -1.0]),
-                make_f32_batch(vec![-4.0, -3.0, -2.0, -1.0, 0.0]),
-                make_f32_batch(vec![0.0, 1.0, 2.0, 3.0, 4.0]),
-                make_f32_batch(vec![5.0, 6.0, 7.0, 8.0, 9.0]),
-            ]
-        }
+
         Scenario::Float64 => {
             vec![
                 make_f64_batch(vec![-5.0, -4.0, -3.0, -2.0, -1.0]),
@@ -1034,44 +801,7 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
                 make_decimal_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
             ]
         }
-        Scenario::Decimal256 => {
-            // decimal256 record batch
-            vec![
-                make_decimal256_batch(
-                    vec![
-                        i256::from(100),
-                        i256::from(200),
-                        i256::from(300),
-                        i256::from(400),
-                        i256::from(600),
-                    ],
-                    9,
-                    2,
-                ),
-                make_decimal256_batch(
-                    vec![
-                        i256::from(-500),
-                        i256::from(100),
-                        i256::from(300),
-                        i256::from(400),
-                        i256::from(600),
-                    ],
-                    9,
-                    2,
-                ),
-                make_decimal256_batch(
-                    vec![
-                        i256::from(2000),
-                        i256::from(3000),
-                        i256::from(3000),
-                        i256::from(4000),
-                        i256::from(6000),
-                    ],
-                    9,
-                    2,
-                ),
-            ]
-        }
+
         Scenario::DecimalBloomFilterInt32 => {
             // decimal record batch
             vec![
@@ -1187,9 +917,7 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
                 ),
             ]
         }
-        Scenario::Dictionary => {
-            vec![make_dict_batch()]
-        }
+
         Scenario::PeriodsInColumnNames => {
             vec![
                 // all frontend
@@ -1224,120 +952,7 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
                 make_int_batches_with_null(5, 1, 6),
             ]
         }
-        Scenario::StructArray => {
-            let struct_array_data = struct_array(vec![
-                (Some(1), Some(6.0), Some(12.0)),
-                (Some(2), Some(8.5), None),
-                (None, Some(8.5), Some(14.0)),
-            ]);
-
-            let schema = Arc::new(Schema::new(vec![Field::new(
-                "struct",
-                struct_array_data.data_type().clone(),
-                true,
-            )]));
-            vec![RecordBatch::try_new(schema, vec![struct_array_data]).unwrap()]
-        }
-        Scenario::Time32Second => {
-            vec![
-                make_time32_batches(
-                    Scenario::Time32Second,
-                    vec![18506, 18507, 18508, 18509],
-                ),
-                make_time32_batches(
-                    Scenario::Time32Second,
-                    vec![18510, 18511, 18512, 18513],
-                ),
-                make_time32_batches(
-                    Scenario::Time32Second,
-                    vec![18514, 18515, 18516, 18517],
-                ),
-                make_time32_batches(
-                    Scenario::Time32Second,
-                    vec![18518, 18519, 18520, 18521],
-                ),
-            ]
-        }
-        Scenario::Time32Millisecond => {
-            vec![
-                make_time32_batches(
-                    Scenario::Time32Millisecond,
-                    vec![3600000, 3600001, 3600002, 3600003],
-                ),
-                make_time32_batches(
-                    Scenario::Time32Millisecond,
-                    vec![3600004, 3600005, 3600006, 3600007],
-                ),
-                make_time32_batches(
-                    Scenario::Time32Millisecond,
-                    vec![3600008, 3600009, 3600010, 3600011],
-                ),
-                make_time32_batches(
-                    Scenario::Time32Millisecond,
-                    vec![3600012, 3600013, 3600014, 3600015],
-                ),
-            ]
-        }
-        Scenario::Time64Microsecond => {
-            vec![
-                make_time64_batches(
-                    Scenario::Time64Microsecond,
-                    vec![1234567890123, 1234567890124, 1234567890125, 1234567890126],
-                ),
-                make_time64_batches(
-                    Scenario::Time64Microsecond,
-                    vec![1234567890127, 1234567890128, 1234567890129, 1234567890130],
-                ),
-                make_time64_batches(
-                    Scenario::Time64Microsecond,
-                    vec![1234567890131, 1234567890132, 1234567890133, 1234567890134],
-                ),
-                make_time64_batches(
-                    Scenario::Time64Microsecond,
-                    vec![1234567890135, 1234567890136, 1234567890137, 1234567890138],
-                ),
-            ]
-        }
-        Scenario::Time64Nanosecond => {
-            vec![
-                make_time64_batches(
-                    Scenario::Time64Nanosecond,
-                    vec![
-                        987654321012345,
-                        987654321012346,
-                        987654321012347,
-                        987654321012348,
-                    ],
-                ),
-                make_time64_batches(
-                    Scenario::Time64Nanosecond,
-                    vec![
-                        987654321012349,
-                        987654321012350,
-                        987654321012351,
-                        987654321012352,
-                    ],
-                ),
-                make_time64_batches(
-                    Scenario::Time64Nanosecond,
-                    vec![
-                        987654321012353,
-                        987654321012354,
-                        987654321012355,
-                        987654321012356,
-                    ],
-                ),
-                make_time64_batches(
-                    Scenario::Time64Nanosecond,
-                    vec![
-                        987654321012357,
-                        987654321012358,
-                        987654321012359,
-                        987654321012360,
-                    ],
-                ),
-            ]
-        }
+
         Scenario::UTF8 => {
             vec![
                 make_utf8_batch(vec![Some("a"), Some("b"), Some("c"), Some("d"), None]),
@@ -1405,27 +1020,3 @@ async fn make_test_file_page(scenario: Scenario, row_per_page: usize) -> NamedTe
     writer.close().unwrap();
     output_file
 }
-
-// returns a struct array with columns "int32_col", "float32_col" and "float64_col" with the specified values
-fn struct_array(input: Vec<(Option<i32>, Option<f32>, Option<f64>)>) -> ArrayRef {
-    let int_32: Int32Array = input.iter().map(|(i, _, _)| i).collect();
-    let float_32: Float32Array = input.iter().map(|(_, f, _)| f).collect();
-    let float_64: Float64Array = input.iter().map(|(_, _, f)| f).collect();
-
-    let nullable = true;
-    let struct_array = StructArray::from(vec![
-        (
-            Arc::new(Field::new("int32_col", DataType::Int32, nullable)),
-            Arc::new(int_32) as ArrayRef,
-        ),
-        (
-            Arc::new(Field::new("float32_col", DataType::Float32, nullable)),
-            Arc::new(float_32) as ArrayRef,
-        ),
-        (
-            Arc::new(Field::new("float64_col", DataType::Float64, nullable)),
-            Arc::new(float_64) as ArrayRef,
-        ),
-    ]);
-    Arc::new(struct_array)
-}

From 0b8da6d6e75e2384a40c65f089ec17219f36b2ff Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 1 Aug 2024 07:29:12 -0400
Subject: [PATCH 186/357] Rename RepartitionExec metric `repart_time` to
 `repartition_time` (#11703)

* Rename RepartitionExec metric `repart_time` to `repartition_time`

* Update datafusion/physical-plan/src/repartition/mod.rs

Co-authored-by: Oleks V <comphead@users.noreply.github.com>

* fmt

---------

Co-authored-by: Oleks V <comphead@users.noreply.github.com>
---
 datafusion/physical-plan/src/repartition/mod.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index f09324c4019cf..656d82215bbe3 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -414,8 +414,8 @@ pub struct RepartitionExec {
 struct RepartitionMetrics {
     /// Time in nanos to execute child operator and fetch batches
     fetch_time: metrics::Time,
-    /// Time in nanos to perform repartitioning
-    repart_time: metrics::Time,
+    /// Repartitioning elapsed time in nanos
+    repartition_time: metrics::Time,
     /// Time in nanos for sending resulting batches to channels.
     ///
     /// One metric per output partition.
@@ -433,8 +433,8 @@ impl RepartitionMetrics {
             MetricBuilder::new(metrics).subset_time("fetch_time", input_partition);
 
         // Time in nanos to perform repartitioning
-        let repart_time =
-            MetricBuilder::new(metrics).subset_time("repart_time", input_partition);
+        let repartition_time =
+            MetricBuilder::new(metrics).subset_time("repartition_time", input_partition);
 
         // Time in nanos for sending resulting batches to channels
         let send_time = (0..num_output_partitions)
@@ -449,7 +449,7 @@ impl RepartitionMetrics {
 
         Self {
             fetch_time,
-            repart_time,
+            repartition_time,
             send_time,
         }
     }
@@ -775,7 +775,7 @@ impl RepartitionExec {
         context: Arc<TaskContext>,
     ) -> Result<()> {
         let mut partitioner =
-            BatchPartitioner::try_new(partitioning, metrics.repart_time.clone())?;
+            BatchPartitioner::try_new(partitioning, metrics.repartition_time.clone())?;
 
         // execute the child operator
         let timer = metrics.fetch_time.timer();

From 1ce546168de23137dbe30b70b7f948d131681a00 Mon Sep 17 00:00:00 2001
From: Yasser Latreche <yasserlat@gmail.com>
Date: Thu, 1 Aug 2024 13:31:34 +0200
Subject: [PATCH 187/357] Fix `plan_to_sql`: Add wildcard projection to SELECT
 statement if no projection was set (#11744)

* Fix: Add wildcard projection to SELECT statement if no projection is set

* run cargo fmt
---
 datafusion/sql/src/unparser/plan.rs       |  8 ++++++++
 datafusion/sql/tests/cases/plan_to_sql.rs | 24 +++++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index b30e109881c2e..e08f25d3c27ce 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -142,6 +142,14 @@ impl Unparser<'_> {
             return Ok(*body);
         }
 
+        // If no projection is set, add a wildcard projection to the select
+        // which will be translated to `SELECT *` in the SQL statement
+        if !select_builder.already_projected() {
+            select_builder.projection(vec![ast::SelectItem::Wildcard(
+                ast::WildcardAdditionalOptions::default(),
+            )]);
+        }
+
         let mut twj = select_builder.pop_from().unwrap();
         twj.relation(relation_builder);
         select_builder.push_from(twj);
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index a52333e54fac6..d1ac7a0c96d1e 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -449,6 +449,30 @@ fn test_table_references_in_plan_to_sql() {
     );
 }
 
+#[test]
+fn test_table_scan_with_no_projection_in_plan_to_sql() {
+    fn test(table_name: &str, expected_sql: &str) {
+        let schema = Schema::new(vec![
+            Field::new("id", DataType::Utf8, false),
+            Field::new("value", DataType::Utf8, false),
+        ]);
+
+        let plan = table_scan(Some(table_name), &schema, None)
+            .unwrap()
+            .build()
+            .unwrap();
+        let sql = plan_to_sql(&plan).unwrap();
+        assert_eq!(format!("{}", sql), expected_sql)
+    }
+
+    test(
+        "catalog.schema.table",
+        "SELECT * FROM catalog.\"schema\".\"table\"",
+    );
+    test("schema.table", "SELECT * FROM \"schema\".\"table\"");
+    test("table", "SELECT * FROM \"table\"");
+}
+
 #[test]
 fn test_pretty_roundtrip() -> Result<()> {
     let schema = Schema::new(vec![

From cf98d94c9c055a4714c7ac3b06203754fcb653c6 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 1 Aug 2024 07:56:55 -0400
Subject: [PATCH 188/357] Use upstream `DataType::from_str` in arrow-cast
 (#11254)

* Use upstream DataType::from_str in arrow-cast

* Apply suggestions from code review

Co-authored-by: Oleks V <comphead@users.noreply.github.com>

* fix error handlign

---------

Co-authored-by: Oleks V <comphead@users.noreply.github.com>
---
 datafusion/functions/src/core/arrow_cast.rs | 777 +-------------------
 1 file changed, 10 insertions(+), 767 deletions(-)

diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs
index 9227f9e3a2a8c..c4db3e77049df 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_cast.rs
@@ -18,12 +18,11 @@
 //! [`ArrowCastFunc`]: Implementation of the `arrow_cast`
 
 use std::any::Any;
-use std::{fmt::Display, iter::Peekable, str::Chars, sync::Arc};
 
-use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
+use arrow::datatypes::DataType;
 use datafusion_common::{
-    internal_err, plan_datafusion_err, plan_err, DataFusionError, ExprSchema, Result,
-    ScalarValue,
+    arrow_datafusion_err, internal_err, plan_datafusion_err, plan_err, DataFusionError,
+    ExprSchema, Result, ScalarValue,
 };
 
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
@@ -44,7 +43,7 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
 /// select cast(column_x as int) ...
 /// ```
 ///
-/// You can use the `arrow_cast` functiont to cast to a specific arrow type
+/// Use the `arrow_cast` function to cast to a specific arrow type
 ///
 /// For example
 /// ```sql
@@ -139,767 +138,11 @@ fn data_type_from_args(args: &[Expr]) -> Result<DataType> {
             &args[1]
         );
     };
-    parse_data_type(val)
-}
-
-/// Parses `str` into a `DataType`.
-///
-/// `parse_data_type` is the reverse of [`DataType`]'s `Display`
-/// impl, and maintains the invariant that
-/// `parse_data_type(data_type.to_string()) == data_type`
-///
-/// Remove if added to arrow: <https://github.com/apache/arrow-rs/issues/3821>
-fn parse_data_type(val: &str) -> Result<DataType> {
-    Parser::new(val).parse()
-}
-
-fn make_error(val: &str, msg: &str) -> DataFusionError {
-    plan_datafusion_err!("Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(Nanosecond, None)'. Error {msg}" )
-}
-
-fn make_error_expected(val: &str, expected: &Token, actual: &Token) -> DataFusionError {
-    make_error(val, &format!("Expected '{expected}', got '{actual}'"))
-}
-
-#[derive(Debug)]
-/// Implementation of `parse_data_type`, modeled after <https://github.com/sqlparser-rs/sqlparser-rs>
-struct Parser<'a> {
-    val: &'a str,
-    tokenizer: Tokenizer<'a>,
-}
-
-impl<'a> Parser<'a> {
-    fn new(val: &'a str) -> Self {
-        Self {
-            val,
-            tokenizer: Tokenizer::new(val),
-        }
-    }
-
-    fn parse(mut self) -> Result<DataType> {
-        let data_type = self.parse_next_type()?;
-        // ensure that there is no trailing content
-        if self.tokenizer.next().is_some() {
-            Err(make_error(
-                self.val,
-                &format!("checking trailing content after parsing '{data_type}'"),
-            ))
-        } else {
-            Ok(data_type)
-        }
-    }
-
-    /// parses the next full DataType
-    fn parse_next_type(&mut self) -> Result<DataType> {
-        match self.next_token()? {
-            Token::SimpleType(data_type) => Ok(data_type),
-            Token::Timestamp => self.parse_timestamp(),
-            Token::Time32 => self.parse_time32(),
-            Token::Time64 => self.parse_time64(),
-            Token::Duration => self.parse_duration(),
-            Token::Interval => self.parse_interval(),
-            Token::FixedSizeBinary => self.parse_fixed_size_binary(),
-            Token::Decimal128 => self.parse_decimal_128(),
-            Token::Decimal256 => self.parse_decimal_256(),
-            Token::Dictionary => self.parse_dictionary(),
-            Token::List => self.parse_list(),
-            Token::LargeList => self.parse_large_list(),
-            Token::FixedSizeList => self.parse_fixed_size_list(),
-            tok => Err(make_error(
-                self.val,
-                &format!("finding next type, got unexpected '{tok}'"),
-            )),
-        }
-    }
-
-    /// Parses the List type
-    fn parse_list(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let data_type = self.parse_next_type()?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::List(Arc::new(Field::new(
-            "item", data_type, true,
-        ))))
-    }
-
-    /// Parses the LargeList type
-    fn parse_large_list(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let data_type = self.parse_next_type()?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::LargeList(Arc::new(Field::new(
-            "item", data_type, true,
-        ))))
-    }
-
-    /// Parses the FixedSizeList type
-    fn parse_fixed_size_list(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let length = self.parse_i32("FixedSizeList")?;
-        self.expect_token(Token::Comma)?;
-        let data_type = self.parse_next_type()?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::FixedSizeList(
-            Arc::new(Field::new("item", data_type, true)),
-            length,
-        ))
-    }
-
-    /// Parses the next timeunit
-    fn parse_time_unit(&mut self, context: &str) -> Result<TimeUnit> {
-        match self.next_token()? {
-            Token::TimeUnit(time_unit) => Ok(time_unit),
-            tok => Err(make_error(
-                self.val,
-                &format!("finding TimeUnit for {context}, got {tok}"),
-            )),
-        }
-    }
-
-    /// Parses the next timezone
-    fn parse_timezone(&mut self, context: &str) -> Result<Option<String>> {
-        match self.next_token()? {
-            Token::None => Ok(None),
-            Token::Some => {
-                self.expect_token(Token::LParen)?;
-                let timezone = self.parse_double_quoted_string("Timezone")?;
-                self.expect_token(Token::RParen)?;
-                Ok(Some(timezone))
-            }
-            tok => Err(make_error(
-                self.val,
-                &format!("finding Timezone for {context}, got {tok}"),
-            )),
-        }
-    }
-
-    /// Parses the next double quoted string
-    fn parse_double_quoted_string(&mut self, context: &str) -> Result<String> {
-        match self.next_token()? {
-            Token::DoubleQuotedString(s) => Ok(s),
-            tok => Err(make_error(
-                self.val,
-                &format!("finding double quoted string for {context}, got '{tok}'"),
-            )),
-        }
-    }
-
-    /// Parses the next integer value
-    fn parse_i64(&mut self, context: &str) -> Result<i64> {
-        match self.next_token()? {
-            Token::Integer(v) => Ok(v),
-            tok => Err(make_error(
-                self.val,
-                &format!("finding i64 for {context}, got '{tok}'"),
-            )),
-        }
-    }
-
-    /// Parses the next i32 integer value
-    fn parse_i32(&mut self, context: &str) -> Result<i32> {
-        let length = self.parse_i64(context)?;
-        length.try_into().map_err(|e| {
-            make_error(
-                self.val,
-                &format!("converting {length} into i32 for {context}: {e}"),
-            )
-        })
-    }
-
-    /// Parses the next i8 integer value
-    fn parse_i8(&mut self, context: &str) -> Result<i8> {
-        let length = self.parse_i64(context)?;
-        length.try_into().map_err(|e| {
-            make_error(
-                self.val,
-                &format!("converting {length} into i8 for {context}: {e}"),
-            )
-        })
-    }
-
-    /// Parses the next u8 integer value
-    fn parse_u8(&mut self, context: &str) -> Result<u8> {
-        let length = self.parse_i64(context)?;
-        length.try_into().map_err(|e| {
-            make_error(
-                self.val,
-                &format!("converting {length} into u8 for {context}: {e}"),
-            )
-        })
-    }
-
-    /// Parses the next timestamp (called after `Timestamp` has been consumed)
-    fn parse_timestamp(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let time_unit = self.parse_time_unit("Timestamp")?;
-        self.expect_token(Token::Comma)?;
-        let timezone = self.parse_timezone("Timestamp")?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::Timestamp(time_unit, timezone.map(Into::into)))
-    }
-
-    /// Parses the next Time32 (called after `Time32` has been consumed)
-    fn parse_time32(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let time_unit = self.parse_time_unit("Time32")?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::Time32(time_unit))
-    }
-
-    /// Parses the next Time64 (called after `Time64` has been consumed)
-    fn parse_time64(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let time_unit = self.parse_time_unit("Time64")?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::Time64(time_unit))
-    }
-
-    /// Parses the next Duration (called after `Duration` has been consumed)
-    fn parse_duration(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let time_unit = self.parse_time_unit("Duration")?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::Duration(time_unit))
-    }
-
-    /// Parses the next Interval (called after `Interval` has been consumed)
-    fn parse_interval(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let interval_unit = match self.next_token()? {
-            Token::IntervalUnit(interval_unit) => interval_unit,
-            tok => {
-                return Err(make_error(
-                    self.val,
-                    &format!("finding IntervalUnit for Interval, got {tok}"),
-                ))
-            }
-        };
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::Interval(interval_unit))
-    }
-
-    /// Parses the next FixedSizeBinary (called after `FixedSizeBinary` has been consumed)
-    fn parse_fixed_size_binary(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let length = self.parse_i32("FixedSizeBinary")?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::FixedSizeBinary(length))
-    }
-
-    /// Parses the next Decimal128 (called after `Decimal128` has been consumed)
-    fn parse_decimal_128(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let precision = self.parse_u8("Decimal128")?;
-        self.expect_token(Token::Comma)?;
-        let scale = self.parse_i8("Decimal128")?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::Decimal128(precision, scale))
-    }
-
-    /// Parses the next Decimal256 (called after `Decimal256` has been consumed)
-    fn parse_decimal_256(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let precision = self.parse_u8("Decimal256")?;
-        self.expect_token(Token::Comma)?;
-        let scale = self.parse_i8("Decimal256")?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::Decimal256(precision, scale))
-    }
-
-    /// Parses the next Dictionary (called after `Dictionary` has been consumed)
-    fn parse_dictionary(&mut self) -> Result<DataType> {
-        self.expect_token(Token::LParen)?;
-        let key_type = self.parse_next_type()?;
-        self.expect_token(Token::Comma)?;
-        let value_type = self.parse_next_type()?;
-        self.expect_token(Token::RParen)?;
-        Ok(DataType::Dictionary(
-            Box::new(key_type),
-            Box::new(value_type),
-        ))
-    }
 
-    /// return the next token, or an error if there are none left
-    fn next_token(&mut self) -> Result<Token> {
-        match self.tokenizer.next() {
-            None => Err(make_error(self.val, "finding next token")),
-            Some(token) => token,
-        }
-    }
-
-    /// consume the next token, returning OK(()) if it matches tok, and Err if not
-    fn expect_token(&mut self, tok: Token) -> Result<()> {
-        let next_token = self.next_token()?;
-        if next_token == tok {
-            Ok(())
-        } else {
-            Err(make_error_expected(self.val, &tok, &next_token))
-        }
-    }
-}
-
-/// returns true if this character is a separator
-fn is_separator(c: char) -> bool {
-    c == '(' || c == ')' || c == ',' || c == ' '
-}
-
-#[derive(Debug)]
-/// Splits a strings like Dictionary(Int32, Int64) into tokens suitable for parsing
-///
-/// For example the string "Timestamp(Nanosecond, None)" would be parsed into:
-///
-/// * Token::Timestamp
-/// * Token::Lparen
-/// * Token::IntervalUnit(IntervalUnit::Nanosecond)
-/// * Token::Comma,
-/// * Token::None,
-/// * Token::Rparen,
-struct Tokenizer<'a> {
-    val: &'a str,
-    chars: Peekable<Chars<'a>>,
-    // temporary buffer for parsing words
-    word: String,
-}
-
-impl<'a> Tokenizer<'a> {
-    fn new(val: &'a str) -> Self {
-        Self {
-            val,
-            chars: val.chars().peekable(),
-            word: String::new(),
-        }
-    }
-
-    /// returns the next char, without consuming it
-    fn peek_next_char(&mut self) -> Option<char> {
-        self.chars.peek().copied()
-    }
-
-    /// returns the next char, and consuming it
-    fn next_char(&mut self) -> Option<char> {
-        self.chars.next()
-    }
-
-    /// parse the characters in val starting at pos, until the next
-    /// `,`, `(`, or `)` or end of line
-    fn parse_word(&mut self) -> Result<Token> {
-        // reset temp space
-        self.word.clear();
-        loop {
-            match self.peek_next_char() {
-                None => break,
-                Some(c) if is_separator(c) => break,
-                Some(c) => {
-                    self.next_char();
-                    self.word.push(c);
-                }
-            }
-        }
-
-        if let Some(c) = self.word.chars().next() {
-            // if it started with a number, try parsing it as an integer
-            if c == '-' || c.is_numeric() {
-                let val: i64 = self.word.parse().map_err(|e| {
-                    make_error(
-                        self.val,
-                        &format!("parsing {} as integer: {e}", self.word),
-                    )
-                })?;
-                return Ok(Token::Integer(val));
-            }
-            // if it started with a double quote `"`, try parsing it as a double quoted string
-            else if c == '"' {
-                let len = self.word.chars().count();
-
-                // to verify it's double quoted
-                if let Some(last_c) = self.word.chars().last() {
-                    if last_c != '"' || len < 2 {
-                        return Err(make_error(
-                            self.val,
-                            &format!("parsing {} as double quoted string: last char must be \"", self.word),
-                        ));
-                    }
-                }
-
-                if len == 2 {
-                    return Err(make_error(
-                        self.val,
-                        &format!("parsing {} as double quoted string: empty string isn't supported", self.word),
-                    ));
-                }
-
-                let val: String = self.word.parse().map_err(|e| {
-                    make_error(
-                        self.val,
-                        &format!("parsing {} as double quoted string: {e}", self.word),
-                    )
-                })?;
-
-                let s = val[1..len - 1].to_string();
-                if s.contains('"') {
-                    return Err(make_error(
-                        self.val,
-                        &format!("parsing {} as double quoted string: escaped double quote isn't supported", self.word),
-                    ));
-                }
-
-                return Ok(Token::DoubleQuotedString(s));
-            }
-        }
-
-        // figure out what the word was
-        let token = match self.word.as_str() {
-            "Null" => Token::SimpleType(DataType::Null),
-            "Boolean" => Token::SimpleType(DataType::Boolean),
-
-            "Int8" => Token::SimpleType(DataType::Int8),
-            "Int16" => Token::SimpleType(DataType::Int16),
-            "Int32" => Token::SimpleType(DataType::Int32),
-            "Int64" => Token::SimpleType(DataType::Int64),
-
-            "UInt8" => Token::SimpleType(DataType::UInt8),
-            "UInt16" => Token::SimpleType(DataType::UInt16),
-            "UInt32" => Token::SimpleType(DataType::UInt32),
-            "UInt64" => Token::SimpleType(DataType::UInt64),
-
-            "Utf8" => Token::SimpleType(DataType::Utf8),
-            "LargeUtf8" => Token::SimpleType(DataType::LargeUtf8),
-            "Utf8View" => Token::SimpleType(DataType::Utf8View),
-            "Binary" => Token::SimpleType(DataType::Binary),
-            "BinaryView" => Token::SimpleType(DataType::BinaryView),
-            "LargeBinary" => Token::SimpleType(DataType::LargeBinary),
-
-            "Float16" => Token::SimpleType(DataType::Float16),
-            "Float32" => Token::SimpleType(DataType::Float32),
-            "Float64" => Token::SimpleType(DataType::Float64),
-
-            "Date32" => Token::SimpleType(DataType::Date32),
-            "Date64" => Token::SimpleType(DataType::Date64),
-
-            "List" => Token::List,
-            "LargeList" => Token::LargeList,
-            "FixedSizeList" => Token::FixedSizeList,
-
-            "Second" => Token::TimeUnit(TimeUnit::Second),
-            "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
-            "Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
-            "Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),
-
-            "Timestamp" => Token::Timestamp,
-            "Time32" => Token::Time32,
-            "Time64" => Token::Time64,
-            "Duration" => Token::Duration,
-            "Interval" => Token::Interval,
-            "Dictionary" => Token::Dictionary,
-
-            "FixedSizeBinary" => Token::FixedSizeBinary,
-            "Decimal128" => Token::Decimal128,
-            "Decimal256" => Token::Decimal256,
-
-            "YearMonth" => Token::IntervalUnit(IntervalUnit::YearMonth),
-            "DayTime" => Token::IntervalUnit(IntervalUnit::DayTime),
-            "MonthDayNano" => Token::IntervalUnit(IntervalUnit::MonthDayNano),
-
-            "Some" => Token::Some,
-            "None" => Token::None,
-
-            _ => {
-                return Err(make_error(
-                    self.val,
-                    &format!("unrecognized word: {}", self.word),
-                ))
-            }
-        };
-        Ok(token)
-    }
-}
-
-impl<'a> Iterator for Tokenizer<'a> {
-    type Item = Result<Token>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        loop {
-            match self.peek_next_char()? {
-                ' ' => {
-                    // skip whitespace
-                    self.next_char();
-                    continue;
-                }
-                '(' => {
-                    self.next_char();
-                    return Some(Ok(Token::LParen));
-                }
-                ')' => {
-                    self.next_char();
-                    return Some(Ok(Token::RParen));
-                }
-                ',' => {
-                    self.next_char();
-                    return Some(Ok(Token::Comma));
-                }
-                _ => return Some(self.parse_word()),
-            }
-        }
-    }
-}
-
-/// Grammar is
-///
-#[derive(Debug, PartialEq)]
-enum Token {
-    // Null, or Int32
-    SimpleType(DataType),
-    Timestamp,
-    Time32,
-    Time64,
-    Duration,
-    Interval,
-    FixedSizeBinary,
-    Decimal128,
-    Decimal256,
-    Dictionary,
-    TimeUnit(TimeUnit),
-    IntervalUnit(IntervalUnit),
-    LParen,
-    RParen,
-    Comma,
-    Some,
-    None,
-    Integer(i64),
-    DoubleQuotedString(String),
-    List,
-    LargeList,
-    FixedSizeList,
-}
-
-impl Display for Token {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Token::SimpleType(t) => write!(f, "{t}"),
-            Token::List => write!(f, "List"),
-            Token::LargeList => write!(f, "LargeList"),
-            Token::FixedSizeList => write!(f, "FixedSizeList"),
-            Token::Timestamp => write!(f, "Timestamp"),
-            Token::Time32 => write!(f, "Time32"),
-            Token::Time64 => write!(f, "Time64"),
-            Token::Duration => write!(f, "Duration"),
-            Token::Interval => write!(f, "Interval"),
-            Token::TimeUnit(u) => write!(f, "TimeUnit({u:?})"),
-            Token::IntervalUnit(u) => write!(f, "IntervalUnit({u:?})"),
-            Token::LParen => write!(f, "("),
-            Token::RParen => write!(f, ")"),
-            Token::Comma => write!(f, ","),
-            Token::Some => write!(f, "Some"),
-            Token::None => write!(f, "None"),
-            Token::FixedSizeBinary => write!(f, "FixedSizeBinary"),
-            Token::Decimal128 => write!(f, "Decimal128"),
-            Token::Decimal256 => write!(f, "Decimal256"),
-            Token::Dictionary => write!(f, "Dictionary"),
-            Token::Integer(v) => write!(f, "Integer({v})"),
-            Token::DoubleQuotedString(s) => write!(f, "DoubleQuotedString({s})"),
-        }
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn test_parse_data_type() {
-        // this ensures types can be parsed correctly from their string representations
-        for dt in list_datatypes() {
-            round_trip(dt)
-        }
-    }
-
-    /// convert data_type to a string, and then parse it as a type
-    /// verifying it is the same
-    fn round_trip(data_type: DataType) {
-        let data_type_string = data_type.to_string();
-        println!("Input '{data_type_string}' ({data_type:?})");
-        let parsed_type = parse_data_type(&data_type_string).unwrap();
-        assert_eq!(
-            data_type, parsed_type,
-            "Mismatch parsing {data_type_string}"
-        );
-    }
-
-    fn list_datatypes() -> Vec<DataType> {
-        vec![
-            // ---------
-            // Non Nested types
-            // ---------
-            DataType::Null,
-            DataType::Boolean,
-            DataType::Int8,
-            DataType::Int16,
-            DataType::Int32,
-            DataType::Int64,
-            DataType::UInt8,
-            DataType::UInt16,
-            DataType::UInt32,
-            DataType::UInt64,
-            DataType::Float16,
-            DataType::Float32,
-            DataType::Float64,
-            DataType::Timestamp(TimeUnit::Second, None),
-            DataType::Timestamp(TimeUnit::Millisecond, None),
-            DataType::Timestamp(TimeUnit::Microsecond, None),
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            // we can't cover all possible timezones, here we only test utc and +08:00
-            DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())),
-            DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
-            DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
-            DataType::Timestamp(TimeUnit::Second, Some("+00:00".into())),
-            DataType::Timestamp(TimeUnit::Nanosecond, Some("+08:00".into())),
-            DataType::Timestamp(TimeUnit::Microsecond, Some("+08:00".into())),
-            DataType::Timestamp(TimeUnit::Millisecond, Some("+08:00".into())),
-            DataType::Timestamp(TimeUnit::Second, Some("+08:00".into())),
-            DataType::Date32,
-            DataType::Date64,
-            DataType::Time32(TimeUnit::Second),
-            DataType::Time32(TimeUnit::Millisecond),
-            DataType::Time32(TimeUnit::Microsecond),
-            DataType::Time32(TimeUnit::Nanosecond),
-            DataType::Time64(TimeUnit::Second),
-            DataType::Time64(TimeUnit::Millisecond),
-            DataType::Time64(TimeUnit::Microsecond),
-            DataType::Time64(TimeUnit::Nanosecond),
-            DataType::Duration(TimeUnit::Second),
-            DataType::Duration(TimeUnit::Millisecond),
-            DataType::Duration(TimeUnit::Microsecond),
-            DataType::Duration(TimeUnit::Nanosecond),
-            DataType::Interval(IntervalUnit::YearMonth),
-            DataType::Interval(IntervalUnit::DayTime),
-            DataType::Interval(IntervalUnit::MonthDayNano),
-            DataType::Binary,
-            DataType::BinaryView,
-            DataType::FixedSizeBinary(0),
-            DataType::FixedSizeBinary(1234),
-            DataType::FixedSizeBinary(-432),
-            DataType::LargeBinary,
-            DataType::Utf8,
-            DataType::Utf8View,
-            DataType::LargeUtf8,
-            DataType::Decimal128(7, 12),
-            DataType::Decimal256(6, 13),
-            // ---------
-            // Nested types
-            // ---------
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
-            DataType::Dictionary(
-                Box::new(DataType::Int8),
-                Box::new(DataType::Timestamp(TimeUnit::Nanosecond, None)),
-            ),
-            DataType::Dictionary(
-                Box::new(DataType::Int8),
-                Box::new(DataType::FixedSizeBinary(23)),
-            ),
-            DataType::Dictionary(
-                Box::new(DataType::Int8),
-                Box::new(
-                    // nested dictionaries are probably a bad idea but they are possible
-                    DataType::Dictionary(
-                        Box::new(DataType::Int8),
-                        Box::new(DataType::Utf8),
-                    ),
-                ),
-            ),
-            // TODO support more structured types (List, LargeList, Struct, Union, Map, RunEndEncoded, etc)
-        ]
-    }
-
-    #[test]
-    fn test_parse_data_type_whitespace_tolerance() {
-        // (string to parse, expected DataType)
-        let cases = [
-            ("Int8", DataType::Int8),
-            (
-                "Timestamp        (Nanosecond,      None)",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-            ),
-            (
-                "Timestamp        (Nanosecond,      None)  ",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-            ),
-            (
-                "          Timestamp        (Nanosecond,      None               )",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-            ),
-            (
-                "Timestamp        (Nanosecond,      None               )  ",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-            ),
-        ];
-
-        for (data_type_string, expected_data_type) in cases {
-            println!("Parsing '{data_type_string}', expecting '{expected_data_type:?}'");
-            let parsed_data_type = parse_data_type(data_type_string).unwrap();
-            assert_eq!(parsed_data_type, expected_data_type);
-        }
-    }
-
-    #[test]
-    fn parse_data_type_errors() {
-        // (string to parse, expected error message)
-        let cases = [
-            ("", "Unsupported type ''"),
-            ("", "Error finding next token"),
-            ("null", "Unsupported type 'null'"),
-            ("Nu", "Unsupported type 'Nu'"),
-            (
-                r#"Timestamp(Nanosecond, Some(+00:00))"#,
-                "Error unrecognized word: +00:00",
-            ),
-            (
-                r#"Timestamp(Nanosecond, Some("+00:00))"#,
-                r#"parsing "+00:00 as double quoted string: last char must be ""#,
-            ),
-            (
-                r#"Timestamp(Nanosecond, Some(""))"#,
-                r#"parsing "" as double quoted string: empty string isn't supported"#,
-            ),
-            (
-                r#"Timestamp(Nanosecond, Some("+00:00""))"#,
-                r#"parsing "+00:00"" as double quoted string: escaped double quote isn't supported"#,
-            ),
-            ("Timestamp(Nanosecond, ", "Error finding next token"),
-            (
-                "Float32 Float32",
-                "trailing content after parsing 'Float32'",
-            ),
-            ("Int32, ", "trailing content after parsing 'Int32'"),
-            ("Int32(3), ", "trailing content after parsing 'Int32'"),
-            ("FixedSizeBinary(Int32), ", "Error finding i64 for FixedSizeBinary, got 'Int32'"),
-            ("FixedSizeBinary(3.0), ", "Error parsing 3.0 as integer: invalid digit found in string"),
-            // too large for i32
-            ("FixedSizeBinary(4000000000), ", "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted"),
-            // can't have negative precision
-            ("Decimal128(-3, 5)", "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted"),
-            ("Decimal256(-3, 5)", "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted"),
-            ("Decimal128(3, 500)", "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted"),
-            ("Decimal256(3, 500)", "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted"),
-
-        ];
-
-        for (data_type_string, expected_message) in cases {
-            print!("Parsing '{data_type_string}', expecting '{expected_message}'");
-            match parse_data_type(data_type_string) {
-                Ok(d) => panic!(
-                    "Expected error while parsing '{data_type_string}', but got '{d}'"
-                ),
-                Err(e) => {
-                    let message = e.to_string();
-                    assert!(
-                        message.contains(expected_message),
-                        "\n\ndid not find expected in actual.\n\nexpected: {expected_message}\nactual:{message}\n"
-                    );
-                    // errors should also contain  a help message
-                    assert!(message.contains("Must be a supported arrow type name such as 'Int32' or 'Timestamp(Nanosecond, None)'"));
-                }
-            }
-        }
-    }
+    val.parse().map_err(|e| match e {
+        // If the data type cannot be parsed, return a Plan error to signal an
+        // error in the input rather than a more general ArrowError
+        arrow::error::ArrowError::ParseError(e) => plan_datafusion_err!("{e}"),
+        e => arrow_datafusion_err!(e),
+    })
 }

From a4ac0829ecf63b3640315835b1374211dfadd953 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 1 Aug 2024 07:57:47 -0400
Subject: [PATCH 189/357] Fix documentation warnings, make CsvExecBuilder and
 Unparsed pub (#11729)

* Fix documentation warnings, make CsvExecBuilder and Unparsed pub

* Update datafusion/functions-aggregate/src/string_agg.rs

Co-authored-by: Jonah Gao <jonahgao@msn.com>

---------

Co-authored-by: Jonah Gao <jonahgao@msn.com>
---
 .github/workflows/rust.yml                                   | 2 +-
 datafusion/common/src/error.rs                               | 3 ++-
 datafusion/core/src/datasource/physical_plan/mod.rs          | 2 +-
 datafusion/core/src/datasource/physical_plan/parquet/mod.rs  | 4 ++--
 datafusion/functions-aggregate/src/string_agg.rs             | 2 +-
 .../optimizer/src/simplify_expressions/expr_simplifier.rs    | 2 +-
 datafusion/physical-plan/src/joins/utils.rs                  | 2 +-
 datafusion/sql/src/unparser/expr.rs                          | 5 +++--
 datafusion/sql/src/unparser/mod.rs                           | 2 ++
 9 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 45abeb8f6fe26..fd8c2d2090b92 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -234,7 +234,7 @@ jobs:
           rust-version: stable
       - name: Run cargo doc
         run: |
-          export RUSTDOCFLAGS="-D warnings -A rustdoc::private-intra-doc-links"
+          export RUSTDOCFLAGS="-D warnings"
           cargo doc --document-private-items --no-deps --workspace
           cd datafusion-cli
           cargo doc --document-private-items --no-deps
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 58ff1121e36d0..f62acaf0493b8 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -321,7 +321,8 @@ impl From<DataFusionError> for io::Error {
 }
 
 impl DataFusionError {
-    const BACK_TRACE_SEP: &'static str = "\n\nbacktrace: ";
+    /// The separator between the error message and the backtrace
+    pub const BACK_TRACE_SEP: &'static str = "\n\nbacktrace: ";
 
     /// Get deepest underlying [`DataFusionError`]
     ///
diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs
index a897895246e3a..f810fb86bd896 100644
--- a/datafusion/core/src/datasource/physical_plan/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/mod.rs
@@ -35,7 +35,7 @@ pub use self::parquet::{ParquetExec, ParquetFileMetrics, ParquetFileReaderFactor
 
 pub use arrow_file::ArrowExec;
 pub use avro::AvroExec;
-pub use csv::{CsvConfig, CsvExec, CsvOpener};
+pub use csv::{CsvConfig, CsvExec, CsvExecBuilder, CsvOpener};
 pub use file_groups::FileGroupPartitioner;
 pub use file_scan_config::{
     wrap_partition_type_in_dict, wrap_partition_value_in_dict, FileScanConfig,
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 91e2f8866bffb..72aabefba5952 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -189,9 +189,9 @@ pub use writer::plan_to_parquet;
 /// # Execution Overview
 ///
 /// * Step 1: [`ParquetExec::execute`] is called, returning a [`FileStream`]
-///   configured to open parquet files with a [`ParquetOpener`].
+///   configured to open parquet files with a `ParquetOpener`.
 ///
-/// * Step 2: When the stream is polled, the [`ParquetOpener`] is called to open
+/// * Step 2: When the stream is polled, the `ParquetOpener` is called to open
 ///   the file.
 ///
 /// * Step 3: The `ParquetOpener` gets the [`ParquetMetaData`] (file metadata)
diff --git a/datafusion/functions-aggregate/src/string_agg.rs b/datafusion/functions-aggregate/src/string_agg.rs
index 371cc8fb97394..5d91a52bc4c65 100644
--- a/datafusion/functions-aggregate/src/string_agg.rs
+++ b/datafusion/functions-aggregate/src/string_agg.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`StringAgg`] and [`StringAggAccumulator`] accumulator for the `string_agg` function
+//! [`StringAgg`] accumulator for the `string_agg` function
 
 use arrow::array::ArrayRef;
 use arrow_schema::DataType;
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 38dfbb3ed5514..1e1418744fb8a 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -289,7 +289,7 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
         self
     }
 
-    /// Should [`Canonicalizer`] be applied before simplification?
+    /// Should `Canonicalizer` be applied before simplification?
     ///
     /// If true (the default), the expression will be rewritten to canonical
     /// form before simplification. This is useful to ensure that the simplifier
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index 51744730a5a16..b8a58e4d0d302 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -66,7 +66,7 @@ use parking_lot::Mutex;
 /// E.g. 1 -> [3, 6, 8] indicates that the column values map to rows 3, 6 and 8 for hash value 1
 /// As the key is a hash value, we need to check possible hash collisions in the probe stage
 /// During this stage it might be the case that a row is contained the same hashmap value,
-/// but the values don't match. Those are checked in the [`equal_rows_arr`](crate::joins::hash_join::equal_rows_arr) method.
+/// but the values don't match. Those are checked in the `equal_rows_arr` method.
 ///
 /// The indices (values) are stored in a separate chained list stored in the `Vec<u64>`.
 ///
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index e144dfd649d20..9b44848a91a88 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -522,8 +522,9 @@ impl Unparser<'_> {
         }
     }
 
-    /// This function can convert more [`Expr`] types than `expr_to_sql`, returning an [`Unparsed`]
-    /// like `Sort` expressions to `OrderByExpr` expressions.
+    /// This function can convert more [`Expr`] types than `expr_to_sql`,
+    /// returning an [`Unparsed`] like `Sort` expressions to `OrderByExpr`
+    /// expressions.
     pub fn expr_to_unparsed(&self, expr: &Expr) -> Result<Unparsed> {
         match expr {
             Expr::Sort(Sort {
diff --git a/datafusion/sql/src/unparser/mod.rs b/datafusion/sql/src/unparser/mod.rs
index 83ae64ba238b0..b2fd32566aa84 100644
--- a/datafusion/sql/src/unparser/mod.rs
+++ b/datafusion/sql/src/unparser/mod.rs
@@ -29,6 +29,8 @@ pub use plan::plan_to_sql;
 use self::dialect::{DefaultDialect, Dialect};
 pub mod dialect;
 
+pub use expr::Unparsed;
+
 /// Convert a DataFusion [`Expr`] to [`sqlparser::ast::Expr`]
 ///
 /// See [`expr_to_sql`] for background. `Unparser` allows greater control of

From 3fe18604e8a127dbecf78acff0dce7596790bfe8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Thu, 1 Aug 2024 13:58:33 +0200
Subject: [PATCH 190/357] Add null test (#11760)

---
 datafusion/sqllogictest/test_files/aggregate.slt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 6ec1e0c52690c..ee72289d66eb1 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -1243,6 +1243,12 @@ SELECT APPROX_PERCENTILE_CONT(v, 0.5) FROM (VALUES (1), (2), (3), (NULL), (NULL)
 ----
 2
 
+# percentile_cont_with_nulls_only
+query I
+SELECT APPROX_PERCENTILE_CONT(v, 0.5) FROM (VALUES (CAST(NULL as INT))) as t (v);
+----
+NULL
+
 # csv_query_cube_avg
 query TIR
 SELECT c1, c2, AVG(c3) FROM aggregate_test_100 GROUP BY CUBE (c1, c2) ORDER BY c1, c2

From 921c3b6b181b6175041c6927d38c7ce2c0735121 Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Thu, 1 Aug 2024 08:15:58 -0700
Subject: [PATCH 191/357] Add `TrackedMemoryPool` with better error messages on
 exhaustion (#11665)

* feat(11523): TrackConsumersPool impl which includes errors messages with top K of consumers

* test(11523): unit tests for TrackConsumersPool

* test(11523): integration test for tracked consumers oom message

* chore(11523): use nonzero usize

* chore(11523): document the what the memory insufficient_capacity_err is actually returning

* chore(11523): improve test failure coverage for TrackConsumersPool

* fix(11523): handle additive tracking of same hashed consumer, across different reservations

* refactor(11523): update error message to delineate the multiple consumer with the same name, but different hash

* test(11523): demonstrate the underlying pool behavior on deregister

* chore: make explicit what the insufficient_capacity_err() logs

* fix(11523): remove to_root() for the error, since the immediate inner child should be returning an OOM

* chore(11523): add result to logging of failed CI tests

* fix(11523): splice error message to get consumers prior to error message

* Revert "fix(11523): splice error message to get consumers prior to error message"

This reverts commit 09b20d289f53d3b61b976313f8731e8a6711f370.

* fix(11523): fix without splicing error messages, and instead handle the proper error bubbling (msg wrapping)

* chore: update docs to explain purpose of TrackConsumersPool

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* refactor(11523): enable TrackConsumersPool to be used in runtime metrics

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/core/tests/memory_limit/mod.rs    |  55 ++-
 datafusion/execution/src/memory_pool/mod.rs  |   2 +-
 datafusion/execution/src/memory_pool/pool.rs | 377 ++++++++++++++++++-
 3 files changed, 431 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index a2bdbe64aa430..5c712af801922 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -26,10 +26,14 @@ use datafusion::assert_batches_eq;
 use datafusion::physical_optimizer::PhysicalOptimizerRule;
 use datafusion::physical_plan::memory::MemoryExec;
 use datafusion::physical_plan::streaming::PartitionStream;
+use datafusion_execution::memory_pool::{
+    GreedyMemoryPool, MemoryPool, TrackConsumersPool,
+};
 use datafusion_expr::{Expr, TableType};
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
 use futures::StreamExt;
 use std::any::Any;
+use std::num::NonZeroUsize;
 use std::sync::{Arc, OnceLock};
 use tokio::fs::File;
 
@@ -371,6 +375,39 @@ async fn oom_parquet_sink() {
         .await
 }
 
+#[tokio::test]
+async fn oom_with_tracked_consumer_pool() {
+    let dir = tempfile::tempdir().unwrap();
+    let path = dir.into_path().join("test.parquet");
+    let _ = File::create(path.clone()).await.unwrap();
+
+    TestCase::new()
+        .with_config(
+            SessionConfig::new()
+        )
+        .with_query(format!(
+            "
+            COPY (select * from t)
+            TO '{}'
+            STORED AS PARQUET OPTIONS (compression 'uncompressed');
+        ",
+            path.to_string_lossy()
+        ))
+        .with_expected_errors(vec![
+            "Failed to allocate additional",
+            "for ParquetSink(ArrowColumnWriter)",
+            "Resources exhausted with top memory consumers (across reservations) are: ParquetSink(ArrowColumnWriter)"
+        ])
+        .with_memory_pool(Arc::new(
+            TrackConsumersPool::new(
+                GreedyMemoryPool::new(200_000),
+                NonZeroUsize::new(1).unwrap()
+            )
+        ))
+        .run()
+        .await
+}
+
 /// Run the query with the specified memory limit,
 /// and verifies the expected errors are returned
 #[derive(Clone, Debug)]
@@ -378,6 +415,7 @@ struct TestCase {
     query: Option<String>,
     expected_errors: Vec<String>,
     memory_limit: usize,
+    memory_pool: Option<Arc<dyn MemoryPool>>,
     config: SessionConfig,
     scenario: Scenario,
     /// How should the disk manager (that allows spilling) be
@@ -396,6 +434,7 @@ impl TestCase {
             expected_errors: vec![],
             memory_limit: 0,
             config: SessionConfig::new(),
+            memory_pool: None,
             scenario: Scenario::AccessLog,
             disk_manager_config: DiskManagerConfig::Disabled,
             expected_plan: vec![],
@@ -425,6 +464,15 @@ impl TestCase {
         self
     }
 
+    /// Set the memory pool to be used
+    ///
+    /// This will override the memory_limit requested,
+    /// as the memory pool includes the limit.
+    fn with_memory_pool(mut self, memory_pool: Arc<dyn MemoryPool>) -> Self {
+        self.memory_pool = Some(memory_pool);
+        self
+    }
+
     /// Specify the configuration to use
     pub fn with_config(mut self, config: SessionConfig) -> Self {
         self.config = config;
@@ -465,6 +513,7 @@ impl TestCase {
             query,
             expected_errors,
             memory_limit,
+            memory_pool,
             config,
             scenario,
             disk_manager_config,
@@ -474,11 +523,15 @@ impl TestCase {
 
         let table = scenario.table();
 
-        let rt_config = RuntimeConfig::new()
+        let mut rt_config = RuntimeConfig::new()
             // disk manager setting controls the spilling
             .with_disk_manager(disk_manager_config)
             .with_memory_limit(memory_limit, MEMORY_FRACTION);
 
+        if let Some(pool) = memory_pool {
+            rt_config = rt_config.with_memory_pool(pool);
+        };
+
         let runtime = RuntimeEnv::new(rt_config).unwrap();
 
         // Configure execution
diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs
index 3df212d466c9f..dcd59acbd49eb 100644
--- a/datafusion/execution/src/memory_pool/mod.rs
+++ b/datafusion/execution/src/memory_pool/mod.rs
@@ -117,7 +117,7 @@ pub trait MemoryPool: Send + Sync + std::fmt::Debug {
 /// For help with allocation accounting, see the [proxy] module.
 ///
 /// [proxy]: crate::memory_pool::proxy
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash, Clone)]
 pub struct MemoryConsumer {
     name: String,
     can_spill: bool,
diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs
index fd7724f3076c4..9cb6f207e59cd 100644
--- a/datafusion/execution/src/memory_pool/pool.rs
+++ b/datafusion/execution/src/memory_pool/pool.rs
@@ -17,9 +17,13 @@
 
 use crate::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
 use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
+use hashbrown::HashMap;
 use log::debug;
 use parking_lot::Mutex;
-use std::sync::atomic::{AtomicUsize, Ordering};
+use std::{
+    num::NonZeroUsize,
+    sync::atomic::{AtomicU64, AtomicUsize, Ordering},
+};
 
 /// A [`MemoryPool`] that enforces no limit
 #[derive(Debug, Default)]
@@ -231,6 +235,11 @@ impl MemoryPool for FairSpillPool {
     }
 }
 
+/// Constructs a resources error based upon the individual [`MemoryReservation`].
+///
+/// The error references the `bytes already allocated` for the reservation,
+/// and not the total within the collective [`MemoryPool`],
+/// nor the total across multiple reservations with the same [`MemoryConsumer`].
 #[inline(always)]
 fn insufficient_capacity_err(
     reservation: &MemoryReservation,
@@ -240,6 +249,152 @@ fn insufficient_capacity_err(
     resources_datafusion_err!("Failed to allocate additional {} bytes for {} with {} bytes already allocated - maximum available is {}", additional, reservation.registration.consumer.name, reservation.size, available)
 }
 
+/// A [`MemoryPool`] that tracks the consumers that have
+/// reserved memory within the inner memory pool.
+///
+/// By tracking memory reservations more carefully this pool
+/// can provide better error messages on the largest memory users
+///
+/// Tracking is per hashed [`MemoryConsumer`], not per [`MemoryReservation`].
+/// The same consumer can have multiple reservations.
+#[derive(Debug)]
+pub struct TrackConsumersPool<I> {
+    inner: I,
+    top: NonZeroUsize,
+    tracked_consumers: Mutex<HashMap<MemoryConsumer, AtomicU64>>,
+}
+
+impl<I: MemoryPool> TrackConsumersPool<I> {
+    /// Creates a new [`TrackConsumersPool`].
+    ///
+    /// The `top` determines how many Top K [`MemoryConsumer`]s to include
+    /// in the reported [`DataFusionError::ResourcesExhausted`].
+    pub fn new(inner: I, top: NonZeroUsize) -> Self {
+        Self {
+            inner,
+            top,
+            tracked_consumers: Default::default(),
+        }
+    }
+
+    /// Determine if there are multiple [`MemoryConsumer`]s registered
+    /// which have the same name.
+    ///
+    /// This is very tied to the implementation of the memory consumer.
+    fn has_multiple_consumers(&self, name: &String) -> bool {
+        let consumer = MemoryConsumer::new(name);
+        let consumer_with_spill = consumer.clone().with_can_spill(true);
+        let guard = self.tracked_consumers.lock();
+        guard.contains_key(&consumer) && guard.contains_key(&consumer_with_spill)
+    }
+
+    /// The top consumers in a report string.
+    pub fn report_top(&self, top: usize) -> String {
+        let mut consumers = self
+            .tracked_consumers
+            .lock()
+            .iter()
+            .map(|(consumer, reserved)| {
+                (
+                    (consumer.name().to_owned(), consumer.can_spill()),
+                    reserved.load(Ordering::Acquire),
+                )
+            })
+            .collect::<Vec<_>>();
+        consumers.sort_by(|a, b| b.1.cmp(&a.1)); // inverse ordering
+
+        consumers[0..std::cmp::min(top, consumers.len())]
+            .iter()
+            .map(|((name, can_spill), size)| {
+                if self.has_multiple_consumers(name) {
+                    format!("{name}(can_spill={}) consumed {:?} bytes", can_spill, size)
+                } else {
+                    format!("{name} consumed {:?} bytes", size)
+                }
+            })
+            .collect::<Vec<_>>()
+            .join(", ")
+    }
+}
+
+impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
+    fn register(&self, consumer: &MemoryConsumer) {
+        self.inner.register(consumer);
+
+        let mut guard = self.tracked_consumers.lock();
+        if let Some(already_reserved) = guard.insert(consumer.clone(), Default::default())
+        {
+            guard.entry_ref(consumer).and_modify(|bytes| {
+                bytes.fetch_add(
+                    already_reserved.load(Ordering::Acquire),
+                    Ordering::AcqRel,
+                );
+            });
+        }
+    }
+
+    fn unregister(&self, consumer: &MemoryConsumer) {
+        self.inner.unregister(consumer);
+        self.tracked_consumers.lock().remove(consumer);
+    }
+
+    fn grow(&self, reservation: &MemoryReservation, additional: usize) {
+        self.inner.grow(reservation, additional);
+        self.tracked_consumers
+            .lock()
+            .entry_ref(reservation.consumer())
+            .and_modify(|bytes| {
+                bytes.fetch_add(additional as u64, Ordering::AcqRel);
+            });
+    }
+
+    fn shrink(&self, reservation: &MemoryReservation, shrink: usize) {
+        self.inner.shrink(reservation, shrink);
+        self.tracked_consumers
+            .lock()
+            .entry_ref(reservation.consumer())
+            .and_modify(|bytes| {
+                bytes.fetch_sub(shrink as u64, Ordering::AcqRel);
+            });
+    }
+
+    fn try_grow(&self, reservation: &MemoryReservation, additional: usize) -> Result<()> {
+        self.inner
+            .try_grow(reservation, additional)
+            .map_err(|e| match e {
+                DataFusionError::ResourcesExhausted(e) => {
+                    // wrap OOM message in top consumers
+                    DataFusionError::ResourcesExhausted(
+                        provide_top_memory_consumers_to_error_msg(
+                            e.to_owned(),
+                            self.report_top(self.top.into()),
+                        ),
+                    )
+                }
+                _ => e,
+            })?;
+
+        self.tracked_consumers
+            .lock()
+            .entry_ref(reservation.consumer())
+            .and_modify(|bytes| {
+                bytes.fetch_add(additional as u64, Ordering::AcqRel);
+            });
+        Ok(())
+    }
+
+    fn reserved(&self) -> usize {
+        self.inner.reserved()
+    }
+}
+
+fn provide_top_memory_consumers_to_error_msg(
+    error_msg: String,
+    top_consumers: String,
+) -> String {
+    format!("Resources exhausted with top memory consumers (across reservations) are: {}. Error: {}", top_consumers, error_msg)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -311,4 +466,224 @@ mod tests {
         let err = r4.try_grow(30).unwrap_err().strip_backtrace();
         assert_eq!(err, "Resources exhausted: Failed to allocate additional 30 bytes for s4 with 0 bytes already allocated - maximum available is 20");
     }
+
+    #[test]
+    fn test_tracked_consumers_pool() {
+        let pool: Arc<dyn MemoryPool> = Arc::new(TrackConsumersPool::new(
+            GreedyMemoryPool::new(100),
+            NonZeroUsize::new(3).unwrap(),
+        ));
+
+        // Test: use all the different interfaces to change reservation size
+
+        // set r1=50, using grow and shrink
+        let mut r1 = MemoryConsumer::new("r1").register(&pool);
+        r1.grow(70);
+        r1.shrink(20);
+
+        // set r2=15 using try_grow
+        let mut r2 = MemoryConsumer::new("r2").register(&pool);
+        r2.try_grow(15)
+            .expect("should succeed in memory allotment for r2");
+
+        // set r3=20 using try_resize
+        let mut r3 = MemoryConsumer::new("r3").register(&pool);
+        r3.try_resize(25)
+            .expect("should succeed in memory allotment for r3");
+        r3.try_resize(20)
+            .expect("should succeed in memory allotment for r3");
+
+        // set r4=10
+        // this should not be reported in top 3
+        let mut r4 = MemoryConsumer::new("r4").register(&pool);
+        r4.grow(10);
+
+        // Test: reports if new reservation causes error
+        // using the previously set sizes for other consumers
+        let mut r5 = MemoryConsumer::new("r5").register(&pool);
+        let expected = "Resources exhausted with top memory consumers (across reservations) are: r1 consumed 50 bytes, r3 consumed 20 bytes, r2 consumed 15 bytes. Error: Failed to allocate additional 150 bytes for r5 with 0 bytes already allocated - maximum available is 5";
+        let res = r5.try_grow(150);
+        assert!(
+            matches!(
+                &res,
+                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+            ),
+            "should provide list of top memory consumers, instead found {:?}",
+            res
+        );
+    }
+
+    #[test]
+    fn test_tracked_consumers_pool_register() {
+        let pool: Arc<dyn MemoryPool> = Arc::new(TrackConsumersPool::new(
+            GreedyMemoryPool::new(100),
+            NonZeroUsize::new(3).unwrap(),
+        ));
+
+        let same_name = "foo";
+
+        // Test: see error message when no consumers recorded yet
+        let mut r0 = MemoryConsumer::new(same_name).register(&pool);
+        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated - maximum available is 100";
+        let res = r0.try_grow(150);
+        assert!(
+            matches!(
+                &res,
+                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+            ),
+            "should provide proper error when no reservations have been made yet, instead found {:?}", res
+        );
+
+        // API: multiple registrations using the same hashed consumer,
+        // will be recognized as the same in the TrackConsumersPool.
+
+        // Test: will be the same per Top Consumers reported.
+        r0.grow(10); // make r0=10, pool available=90
+        let new_consumer_same_name = MemoryConsumer::new(same_name);
+        let mut r1 = new_consumer_same_name.clone().register(&pool);
+        // TODO: the insufficient_capacity_err() message is per reservation, not per consumer.
+        // a followup PR will clarify this message "0 bytes already allocated for this reservation"
+        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 10 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated - maximum available is 90";
+        let res = r1.try_grow(150);
+        assert!(
+            matches!(
+                &res,
+                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+            ),
+            "should provide proper error with same hashed consumer (a single foo=10 bytes, available=90), instead found {:?}", res
+        );
+
+        // Test: will accumulate size changes per consumer, not per reservation
+        r1.grow(20);
+        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 30 bytes. Error: Failed to allocate additional 150 bytes for foo with 20 bytes already allocated - maximum available is 70";
+        let res = r1.try_grow(150);
+        assert!(
+            matches!(
+                &res,
+                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+            ),
+            "should provide proper error with same hashed consumer (a single foo=30 bytes, available=70), instead found {:?}", res
+        );
+
+        // Test: different hashed consumer, (even with the same name),
+        // will be recognized as different in the TrackConsumersPool
+        let consumer_with_same_name_but_different_hash =
+            MemoryConsumer::new(same_name).with_can_spill(true);
+        let mut r2 = consumer_with_same_name_but_different_hash.register(&pool);
+        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo(can_spill=false) consumed 30 bytes, foo(can_spill=true) consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated - maximum available is 70";
+        let res = r2.try_grow(150);
+        assert!(
+            matches!(
+                &res,
+                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+            ),
+            "should provide proper error with different hashed consumer (foo(can_spill=false)=30 bytes and foo(can_spill=true)=0 bytes, available=70), instead found {:?}", res
+        );
+    }
+
+    #[test]
+    fn test_tracked_consumers_pool_deregister() {
+        fn test_per_pool_type(pool: Arc<dyn MemoryPool>) {
+            // Baseline: see the 2 memory consumers
+            let mut r0 = MemoryConsumer::new("r0").register(&pool);
+            r0.grow(10);
+            let r1_consumer = MemoryConsumer::new("r1");
+            let mut r1 = r1_consumer.clone().register(&pool);
+            r1.grow(20);
+            let expected = "Resources exhausted with top memory consumers (across reservations) are: r1 consumed 20 bytes, r0 consumed 10 bytes. Error: Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated - maximum available is 70";
+            let res = r0.try_grow(150);
+            assert!(
+                matches!(
+                    &res,
+                    Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+                ),
+                "should provide proper error with both consumers, instead found {:?}",
+                res
+            );
+
+            // Test: unregister one
+            // only the remaining one should be listed
+            pool.unregister(&r1_consumer);
+            let expected_consumers = "Resources exhausted with top memory consumers (across reservations) are: r0 consumed 10 bytes";
+            let res = r0.try_grow(150);
+            assert!(
+                matches!(
+                    &res,
+                    Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected_consumers)
+                ),
+                "should provide proper error with only 1 consumer left registered, instead found {:?}", res
+            );
+
+            // Test: actual message we see is the `available is 70`. When it should be `available is 90`.
+            // This is because the pool.shrink() does not automatically occur within the inner_pool.deregister().
+            let expected_70_available = "Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated - maximum available is 70";
+            let res = r0.try_grow(150);
+            assert!(
+                matches!(
+                    &res,
+                    Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected_70_available)
+                ),
+                "should find that the inner pool will still count all bytes for the deregistered consumer until the reservation is dropped, instead found {:?}", res
+            );
+
+            // Test: the registration needs to free itself (or be dropped),
+            // for the proper error message
+            r1.free();
+            let expected_90_available = "Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated - maximum available is 90";
+            let res = r0.try_grow(150);
+            assert!(
+                matches!(
+                    &res,
+                    Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected_90_available)
+                ),
+                "should correctly account the total bytes after reservation is free, instead found {:?}", res
+            );
+        }
+
+        let tracked_spill_pool: Arc<dyn MemoryPool> = Arc::new(TrackConsumersPool::new(
+            FairSpillPool::new(100),
+            NonZeroUsize::new(3).unwrap(),
+        ));
+        test_per_pool_type(tracked_spill_pool);
+
+        let tracked_greedy_pool: Arc<dyn MemoryPool> = Arc::new(TrackConsumersPool::new(
+            GreedyMemoryPool::new(100),
+            NonZeroUsize::new(3).unwrap(),
+        ));
+        test_per_pool_type(tracked_greedy_pool);
+    }
+
+    #[test]
+    fn test_tracked_consumers_pool_use_beyond_errors() {
+        let upcasted: Arc<dyn std::any::Any + Send + Sync> =
+            Arc::new(TrackConsumersPool::new(
+                GreedyMemoryPool::new(100),
+                NonZeroUsize::new(3).unwrap(),
+            ));
+        let pool: Arc<dyn MemoryPool> = Arc::clone(&upcasted)
+            .downcast::<TrackConsumersPool<GreedyMemoryPool>>()
+            .unwrap();
+        // set r1=20
+        let mut r1 = MemoryConsumer::new("r1").register(&pool);
+        r1.grow(20);
+        // set r2=15
+        let mut r2 = MemoryConsumer::new("r2").register(&pool);
+        r2.grow(15);
+        // set r3=45
+        let mut r3 = MemoryConsumer::new("r3").register(&pool);
+        r3.grow(45);
+
+        let downcasted = upcasted
+            .downcast::<TrackConsumersPool<GreedyMemoryPool>>()
+            .unwrap();
+
+        // Test: can get runtime metrics, even without an error thrown
+        let expected = "r3 consumed 45 bytes, r1 consumed 20 bytes";
+        let res = downcasted.report_top(2);
+        assert_eq!(
+            res, expected,
+            "should provide list of top memory consumers, instead found {:?}",
+            res
+        );
+    }
 }

From 6e2ff2955d96cacba905d24993353c7e5fe0cf93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Thu, 1 Aug 2024 23:16:05 +0800
Subject: [PATCH 192/357] Derive Debug for logical plan nodes (#11757)

---
 datafusion/expr/src/logical_plan/plan.rs | 40 ++++++++++++------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 54c857a2b7013..6bea1ad948a16 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -1895,7 +1895,7 @@ impl ToStringifiedPlan for LogicalPlan {
 }
 
 /// Produces no rows: An empty relation with an empty schema
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct EmptyRelation {
     /// Whether to produce a placeholder row
     pub produce_one_row: bool,
@@ -1925,7 +1925,7 @@ pub struct EmptyRelation {
 ///   intermediate table, then empty the intermediate table.
 ///
 /// [Postgres Docs]: https://www.postgresql.org/docs/current/queries-with.html#QUERIES-WITH-RECURSIVE
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct RecursiveQuery {
     /// Name of the query
     pub name: String,
@@ -1942,7 +1942,7 @@ pub struct RecursiveQuery {
 /// Values expression. See
 /// [Postgres VALUES](https://www.postgresql.org/docs/current/queries-values.html)
 /// documentation for more details.
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Values {
     /// The table schema
     pub schema: DFSchemaRef,
@@ -2023,7 +2023,7 @@ pub fn projection_schema(input: &LogicalPlan, exprs: &[Expr]) -> Result<Arc<DFSc
 }
 
 /// Aliased subquery
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 // mark non_exhaustive to encourage use of try_new/new()
 #[non_exhaustive]
 pub struct SubqueryAlias {
@@ -2071,7 +2071,7 @@ impl SubqueryAlias {
 ///
 /// Filter should not be created directly but instead use `try_new()`
 /// and that these fields are only pub to support pattern matching
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 #[non_exhaustive]
 pub struct Filter {
     /// The predicate expression, which must have Boolean type.
@@ -2174,7 +2174,7 @@ impl Filter {
 }
 
 /// Window its input based on a set of window spec and window function (e.g. SUM or RANK)
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Window {
     /// The incoming logical plan
     pub input: Arc<LogicalPlan>,
@@ -2368,7 +2368,7 @@ impl TableScan {
 }
 
 /// Apply Cross Join to two logical plans
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct CrossJoin {
     /// Left input
     pub left: Arc<LogicalPlan>,
@@ -2379,7 +2379,7 @@ pub struct CrossJoin {
 }
 
 /// Repartition the plan based on a partitioning scheme.
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Repartition {
     /// The incoming logical plan
     pub input: Arc<LogicalPlan>,
@@ -2388,7 +2388,7 @@ pub struct Repartition {
 }
 
 /// Union multiple inputs
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Union {
     /// Inputs to merge
     pub inputs: Vec<Arc<LogicalPlan>>,
@@ -2398,7 +2398,7 @@ pub struct Union {
 
 /// Prepare a statement but do not execute it. Prepare statements can have 0 or more
 /// `Expr::Placeholder` expressions that are filled in during execution
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Prepare {
     /// The name of the statement
     pub name: String,
@@ -2430,7 +2430,7 @@ pub struct Prepare {
 /// | parent_span_id     | Utf8                        | YES         |
 /// +--------------------+-----------------------------+-------------+
 /// ```
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct DescribeTable {
     /// Table schema
     pub schema: Arc<Schema>,
@@ -2440,7 +2440,7 @@ pub struct DescribeTable {
 
 /// Produces a relation with string representations of
 /// various parts of the plan
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Explain {
     /// Should extra (detailed, intermediate plans) be included?
     pub verbose: bool,
@@ -2456,7 +2456,7 @@ pub struct Explain {
 
 /// Runs the actual plan, and then prints the physical plan with
 /// with execution metrics.
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Analyze {
     /// Should extra detail be included?
     pub verbose: bool,
@@ -2471,7 +2471,7 @@ pub struct Analyze {
 // the manual `PartialEq` is removed in favor of a derive.
 // (see `PartialEq` the impl for details.)
 #[allow(clippy::derived_hash_with_manual_eq)]
-#[derive(Clone, Eq, Hash)]
+#[derive(Debug, Clone, Eq, Hash)]
 pub struct Extension {
     /// The runtime extension operator
     pub node: Arc<dyn UserDefinedLogicalNode>,
@@ -2487,7 +2487,7 @@ impl PartialEq for Extension {
 }
 
 /// Produces the first `n` tuples from its input and discards the rest.
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Limit {
     /// Number of rows to skip before fetch
     pub skip: usize,
@@ -2499,7 +2499,7 @@ pub struct Limit {
 }
 
 /// Removes duplicate rows from the input
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Distinct {
     /// Plain `DISTINCT` referencing all selection expressions
     All(Arc<LogicalPlan>),
@@ -2518,7 +2518,7 @@ impl Distinct {
 }
 
 /// Removes duplicate rows from the input
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct DistinctOn {
     /// The `DISTINCT ON` clause expression list
     pub on_expr: Vec<Expr>,
@@ -2604,7 +2604,7 @@ impl DistinctOn {
 
 /// Aggregates its input based on a set of grouping and aggregate
 /// expressions (e.g. SUM).
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 // mark non_exhaustive to encourage use of try_new/new()
 #[non_exhaustive]
 pub struct Aggregate {
@@ -2767,7 +2767,7 @@ fn calc_func_dependencies_for_project(
 }
 
 /// Sorts its input according to a list of sort expressions.
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Sort {
     /// The sort expressions
     pub expr: Vec<Expr>,
@@ -2778,7 +2778,7 @@ pub struct Sort {
 }
 
 /// Join two logical plans on one or more join columns
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Join {
     /// Left input
     pub left: Arc<LogicalPlan>,

From 45b40c711e94b167fdb372694f082e4c7cc5c673 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 1 Aug 2024 12:36:20 -0400
Subject: [PATCH 193/357] Minor: add "clickbench extended" queries to unit
 tests (#11763)

---
 benchmarks/queries/clickbench/README.md        |  2 +-
 .../sqllogictest/test_files/clickbench.slt     | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/benchmarks/queries/clickbench/README.md b/benchmarks/queries/clickbench/README.md
index 29b1a7588f17f..560b54181d5ff 100644
--- a/benchmarks/queries/clickbench/README.md
+++ b/benchmarks/queries/clickbench/README.md
@@ -14,7 +14,7 @@ ClickBench is focused on aggregation and filtering performance (though it has no
 
 The "extended" queries are not part of the official ClickBench benchmark.
 Instead they are used to test other DataFusion features that are not covered by
-the standard benchmark  Each description below is for the corresponding line in
+the standard benchmark. Each description below is for the corresponding line in
 `extended.sql` (line 1 is `Q0`, line 2 is `Q1`, etc.)
 
 ### Q0: Data Exploration
diff --git a/datafusion/sqllogictest/test_files/clickbench.slt b/datafusion/sqllogictest/test_files/clickbench.slt
index c2dba435263d9..733c0a3cd9728 100644
--- a/datafusion/sqllogictest/test_files/clickbench.slt
+++ b/datafusion/sqllogictest/test_files/clickbench.slt
@@ -274,5 +274,23 @@ query PI
 SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-14' AND "EventDate"::INT::DATE <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
 ----
 
+# Clickbench "Extended" queries that test count distinct
+
+query III
+SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") FROM hits;
+----
+1 1 1
+
+query III
+SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), COUNT(DISTINCT "BrowserLanguage")  FROM hits;
+----
+1 1 1
+
+query TIIII
+SELECT "BrowserCountry",  COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
+----
+� 1 1 1 1
+
+
 statement ok
 drop table hits;

From 0d98b997436c6ed131b972370edfdb787881899b Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 1 Aug 2024 16:13:43 -0400
Subject: [PATCH 194/357] Minor: Add comment explaining rationale for hash
 check (#11750)

---
 .../physical-plan/src/aggregates/group_values/row.rs   | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 9f05da7cff533..dc948e28bb2d7 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -122,10 +122,14 @@ impl GroupValues for GroupValuesRows {
 
         for (row, &target_hash) in batch_hashes.iter().enumerate() {
             let entry = self.map.get_mut(target_hash, |(exist_hash, group_idx)| {
-                // verify that a group that we are inserting with hash is
-                // actually the same key value as the group in
-                // existing_idx  (aka group_values @ row)
+                // Somewhat surprisingly, this closure can be called even if the
+                // hash doesn't match, so check the hash first with an integer
+                // comparison first avoid the more expensive comparison with
+                // group value. https://github.com/apache/datafusion/pull/11718
                 target_hash == *exist_hash
+                    // verify that the group that we are inserting with hash is
+                    // actually the same key value as the group in
+                    // existing_idx  (aka group_values @ row)
                     && group_rows.row(row) == group_values.row(*group_idx)
             });
 

From f044bc8371d5b4e1e51a9026f3eccac16a6d4648 Mon Sep 17 00:00:00 2001
From: Xiangpeng Hao <haoxiangpeng123@gmail.com>
Date: Fri, 2 Aug 2024 04:33:05 +0800
Subject: [PATCH 195/357] Fix bug that `COUNT(DISTINCT)` on StringView panics 
 (#11768)

* fix bug

* Add test showing panic on string view

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions-aggregate/src/count.rs   |  5 ++-
 .../sqllogictest/test_files/string_view.slt   | 34 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index e2d59003fca14..64eb7253f5c9d 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -237,7 +237,7 @@ impl AggregateUDFImpl for Count {
                 Box::new(BytesDistinctCountAccumulator::<i32>::new(OutputType::Utf8))
             }
             DataType::Utf8View => {
-                Box::new(BytesViewDistinctCountAccumulator::new(OutputType::Utf8))
+                Box::new(BytesViewDistinctCountAccumulator::new(OutputType::Utf8View))
             }
             DataType::LargeUtf8 => {
                 Box::new(BytesDistinctCountAccumulator::<i64>::new(OutputType::Utf8))
@@ -245,6 +245,9 @@ impl AggregateUDFImpl for Count {
             DataType::Binary => Box::new(BytesDistinctCountAccumulator::<i32>::new(
                 OutputType::Binary,
             )),
+            DataType::BinaryView => Box::new(BytesViewDistinctCountAccumulator::new(
+                OutputType::BinaryView,
+            )),
             DataType::LargeBinary => Box::new(BytesDistinctCountAccumulator::<i64>::new(
                 OutputType::Binary,
             )),
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 3f9a4793f655d..763b4e99c6145 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -321,6 +321,40 @@ logical_plan
 02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
 03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view]
 
+## Test distinct aggregates
+query III
+SELECT
+  COUNT(DISTINCT column1_utf8),
+  COUNT(DISTINCT column1_utf8view),
+  COUNT(DISTINCT column1_dict)
+FROM test;
+----
+3 3 3
+
+query III
+SELECT
+  COUNT(DISTINCT column1_utf8),
+  COUNT(DISTINCT column1_utf8view),
+  COUNT(DISTINCT column1_dict)
+FROM test
+GROUP BY column2_utf8view;
+----
+1 1 1
+1 1 1
+1 1 1
+
+
+query TT
+EXPLAIN SELECT
+  COUNT(DISTINCT column1_utf8),
+  COUNT(DISTINCT column1_utf8view),
+  COUNT(DISTINCT column1_dict)
+FROM test;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT test.column1_utf8), count(DISTINCT test.column1_utf8view), count(DISTINCT test.column1_dict)]]
+02)--TableScan: test projection=[column1_utf8, column1_utf8view, column1_dict]
+
 
 statement ok
 drop table test;

From a0ad376840daac8fdfecee5a4988c585350c629b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Fri, 2 Aug 2024 02:47:27 +0200
Subject: [PATCH 196/357] [Minor] Refactor approx_percentile (#11769)

* Refactor approx_percentile

* Refactor approx_percentile

* Types

* Types

* Types
---
 .../functions-aggregate/src/approx_median.rs  |  2 +-
 .../src/approx_percentile_cont.rs             |  8 +--
 .../src/aggregate/tdigest.rs                  | 62 +++++++++++--------
 3 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/datafusion/functions-aggregate/src/approx_median.rs b/datafusion/functions-aggregate/src/approx_median.rs
index e12e3445a83ed..c386ad89f0fb7 100644
--- a/datafusion/functions-aggregate/src/approx_median.rs
+++ b/datafusion/functions-aggregate/src/approx_median.rs
@@ -78,7 +78,7 @@ impl AggregateUDFImpl for ApproxMedian {
         Ok(vec![
             Field::new(format_state_name(args.name, "max_size"), UInt64, false),
             Field::new(format_state_name(args.name, "sum"), Float64, false),
-            Field::new(format_state_name(args.name, "count"), Float64, false),
+            Field::new(format_state_name(args.name, "count"), UInt64, false),
             Field::new(format_state_name(args.name, "max"), Float64, false),
             Field::new(format_state_name(args.name, "min"), Float64, false),
             Field::new_list(
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 844e48f0a44dc..af2a26fd05ece 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -214,7 +214,7 @@ impl AggregateUDFImpl for ApproxPercentileCont {
             ),
             Field::new(
                 format_state_name(args.name, "count"),
-                DataType::Float64,
+                DataType::UInt64,
                 false,
             ),
             Field::new(
@@ -406,7 +406,7 @@ impl Accumulator for ApproxPercentileAccumulator {
     }
 
     fn evaluate(&mut self) -> datafusion_common::Result<ScalarValue> {
-        if self.digest.count() == 0.0 {
+        if self.digest.count() == 0 {
             return ScalarValue::try_from(self.return_type.clone());
         }
         let q = self.digest.estimate_quantile(self.percentile);
@@ -487,8 +487,8 @@ mod tests {
             ApproxPercentileAccumulator::new_with_max_size(0.5, DataType::Float64, 100);
 
         accumulator.merge_digests(&[t1]);
-        assert_eq!(accumulator.digest.count(), 50_000.0);
+        assert_eq!(accumulator.digest.count(), 50_000);
         accumulator.merge_digests(&[t2]);
-        assert_eq!(accumulator.digest.count(), 100_000.0);
+        assert_eq!(accumulator.digest.count(), 100_000);
     }
 }
diff --git a/datafusion/physical-expr-common/src/aggregate/tdigest.rs b/datafusion/physical-expr-common/src/aggregate/tdigest.rs
index 1da3d7180d84c..070ebc46483b6 100644
--- a/datafusion/physical-expr-common/src/aggregate/tdigest.rs
+++ b/datafusion/physical-expr-common/src/aggregate/tdigest.rs
@@ -47,6 +47,17 @@ macro_rules! cast_scalar_f64 {
     };
 }
 
+// Cast a non-null [`ScalarValue::UInt64`] to an [`u64`], or
+// panic.
+macro_rules! cast_scalar_u64 {
+    ($value:expr ) => {
+        match &$value {
+            ScalarValue::UInt64(Some(v)) => *v,
+            v => panic!("invalid type {:?}", v),
+        }
+    };
+}
+
 /// This trait is implemented for each type a [`TDigest`] can operate on,
 /// allowing it to support both numerical rust types (obtained from
 /// `PrimitiveArray` instances), and [`ScalarValue`] instances.
@@ -142,7 +153,7 @@ pub struct TDigest {
     centroids: Vec<Centroid>,
     max_size: usize,
     sum: f64,
-    count: f64,
+    count: u64,
     max: f64,
     min: f64,
 }
@@ -153,7 +164,7 @@ impl TDigest {
             centroids: Vec::new(),
             max_size,
             sum: 0_f64,
-            count: 0_f64,
+            count: 0,
             max: f64::NAN,
             min: f64::NAN,
         }
@@ -164,14 +175,14 @@ impl TDigest {
             centroids: vec![centroid.clone()],
             max_size,
             sum: centroid.mean * centroid.weight,
-            count: 1_f64,
+            count: 1,
             max: centroid.mean,
             min: centroid.mean,
         }
     }
 
     #[inline]
-    pub fn count(&self) -> f64 {
+    pub fn count(&self) -> u64 {
         self.count
     }
 
@@ -203,7 +214,7 @@ impl Default for TDigest {
             centroids: Vec::new(),
             max_size: 100,
             sum: 0_f64,
-            count: 0_f64,
+            count: 0,
             max: f64::NAN,
             min: f64::NAN,
         }
@@ -211,8 +222,8 @@ impl Default for TDigest {
 }
 
 impl TDigest {
-    fn k_to_q(k: f64, d: f64) -> f64 {
-        let k_div_d = k / d;
+    fn k_to_q(k: u64, d: usize) -> f64 {
+        let k_div_d = k as f64 / d as f64;
         if k_div_d >= 0.5 {
             let base = 1.0 - k_div_d;
             1.0 - 2.0 * base * base
@@ -244,12 +255,12 @@ impl TDigest {
         }
 
         let mut result = TDigest::new(self.max_size());
-        result.count = self.count() + (sorted_values.len() as f64);
+        result.count = self.count() + sorted_values.len() as u64;
 
         let maybe_min = *sorted_values.first().unwrap();
         let maybe_max = *sorted_values.last().unwrap();
 
-        if self.count() > 0.0 {
+        if self.count() > 0 {
             result.min = self.min.min(maybe_min);
             result.max = self.max.max(maybe_max);
         } else {
@@ -259,10 +270,10 @@ impl TDigest {
 
         let mut compressed: Vec<Centroid> = Vec::with_capacity(self.max_size);
 
-        let mut k_limit: f64 = 1.0;
+        let mut k_limit: u64 = 1;
         let mut q_limit_times_count =
-            Self::k_to_q(k_limit, self.max_size as f64) * result.count();
-        k_limit += 1.0;
+            Self::k_to_q(k_limit, self.max_size) * result.count() as f64;
+        k_limit += 1;
 
         let mut iter_centroids = self.centroids.iter().peekable();
         let mut iter_sorted_values = sorted_values.iter().peekable();
@@ -309,8 +320,8 @@ impl TDigest {
 
                 compressed.push(curr.clone());
                 q_limit_times_count =
-                    Self::k_to_q(k_limit, self.max_size as f64) * result.count();
-                k_limit += 1.0;
+                    Self::k_to_q(k_limit, self.max_size) * result.count() as f64;
+                k_limit += 1;
                 curr = next;
             }
         }
@@ -381,7 +392,7 @@ impl TDigest {
         let mut centroids: Vec<Centroid> = Vec::with_capacity(n_centroids);
         let mut starts: Vec<usize> = Vec::with_capacity(digests.len());
 
-        let mut count: f64 = 0.0;
+        let mut count = 0;
         let mut min = f64::INFINITY;
         let mut max = f64::NEG_INFINITY;
 
@@ -389,8 +400,8 @@ impl TDigest {
         for digest in digests.iter() {
             starts.push(start);
 
-            let curr_count: f64 = digest.count();
-            if curr_count > 0.0 {
+            let curr_count = digest.count();
+            if curr_count > 0 {
                 min = min.min(digest.min);
                 max = max.max(digest.max);
                 count += curr_count;
@@ -424,8 +435,8 @@ impl TDigest {
         let mut result = TDigest::new(max_size);
         let mut compressed: Vec<Centroid> = Vec::with_capacity(max_size);
 
-        let mut k_limit: f64 = 1.0;
-        let mut q_limit_times_count = Self::k_to_q(k_limit, max_size as f64) * (count);
+        let mut k_limit = 1;
+        let mut q_limit_times_count = Self::k_to_q(k_limit, max_size) * count as f64;
 
         let mut iter_centroids = centroids.iter_mut();
         let mut curr = iter_centroids.next().unwrap();
@@ -444,8 +455,8 @@ impl TDigest {
                 sums_to_merge = 0_f64;
                 weights_to_merge = 0_f64;
                 compressed.push(curr.clone());
-                q_limit_times_count = Self::k_to_q(k_limit, max_size as f64) * (count);
-                k_limit += 1.0;
+                q_limit_times_count = Self::k_to_q(k_limit, max_size) * count as f64;
+                k_limit += 1;
                 curr = centroid;
             }
         }
@@ -468,8 +479,7 @@ impl TDigest {
             return 0.0;
         }
 
-        let count_ = self.count;
-        let rank = q * count_;
+        let rank = q * self.count as f64;
 
         let mut pos: usize;
         let mut t;
@@ -479,7 +489,7 @@ impl TDigest {
             }
 
             pos = 0;
-            t = count_;
+            t = self.count as f64;
 
             for (k, centroid) in self.centroids.iter().enumerate().rev() {
                 t -= centroid.weight();
@@ -581,7 +591,7 @@ impl TDigest {
         vec![
             ScalarValue::UInt64(Some(self.max_size as u64)),
             ScalarValue::Float64(Some(self.sum)),
-            ScalarValue::Float64(Some(self.count)),
+            ScalarValue::UInt64(Some(self.count)),
             ScalarValue::Float64(Some(self.max)),
             ScalarValue::Float64(Some(self.min)),
             ScalarValue::List(arr),
@@ -627,7 +637,7 @@ impl TDigest {
         Self {
             max_size,
             sum: cast_scalar_f64!(state[1]),
-            count: cast_scalar_f64!(&state[2]),
+            count: cast_scalar_u64!(&state[2]),
             max,
             min,
             centroids,

From 70aba2bd6c0351f780157f2595a51ac4a9df56e2 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 2 Aug 2024 00:11:59 -0600
Subject: [PATCH 197/357] minor: always time batch_filter even when the result
 is an empty batch (#11775)

---
 datafusion/physical-plan/src/filter.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 67de0989649e6..69bcfefcd4764 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -380,11 +380,11 @@ impl Stream for FilterExecStream {
                 Some(Ok(batch)) => {
                     let timer = self.baseline_metrics.elapsed_compute().timer();
                     let filtered_batch = batch_filter(&batch, &self.predicate)?;
+                    timer.done();
                     // skip entirely filtered batches
                     if filtered_batch.num_rows() == 0 {
                         continue;
                     }
-                    timer.done();
                     poll = Poll::Ready(Some(Ok(filtered_batch)));
                     break;
                 }

From d010ce90f40f2866904a4eea563afbbff72497cc Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Thu, 1 Aug 2024 23:12:23 -0700
Subject: [PATCH 198/357] refactor(11523): update OOM message provided for a
 single failed reservation (#11771)

---
 datafusion/execution/src/memory_pool/pool.rs | 28 ++++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs
index 9cb6f207e59cd..4a41602bd961f 100644
--- a/datafusion/execution/src/memory_pool/pool.rs
+++ b/datafusion/execution/src/memory_pool/pool.rs
@@ -246,7 +246,7 @@ fn insufficient_capacity_err(
     additional: usize,
     available: usize,
 ) -> DataFusionError {
-    resources_datafusion_err!("Failed to allocate additional {} bytes for {} with {} bytes already allocated - maximum available is {}", additional, reservation.registration.consumer.name, reservation.size, available)
+    resources_datafusion_err!("Failed to allocate additional {} bytes for {} with {} bytes already allocated for this reservation - {} bytes remain available for the total pool", additional, reservation.registration.consumer.name, reservation.size, available)
 }
 
 /// A [`MemoryPool`] that tracks the consumers that have
@@ -418,10 +418,10 @@ mod tests {
         assert_eq!(pool.reserved(), 4000);
 
         let err = r2.try_grow(1).unwrap_err().strip_backtrace();
-        assert_eq!(err, "Resources exhausted: Failed to allocate additional 1 bytes for r2 with 2000 bytes already allocated - maximum available is 0");
+        assert_eq!(err, "Resources exhausted: Failed to allocate additional 1 bytes for r2 with 2000 bytes already allocated for this reservation - 0 bytes remain available for the total pool");
 
         let err = r2.try_grow(1).unwrap_err().strip_backtrace();
-        assert_eq!(err, "Resources exhausted: Failed to allocate additional 1 bytes for r2 with 2000 bytes already allocated - maximum available is 0");
+        assert_eq!(err, "Resources exhausted: Failed to allocate additional 1 bytes for r2 with 2000 bytes already allocated for this reservation - 0 bytes remain available for the total pool");
 
         r1.shrink(1990);
         r2.shrink(2000);
@@ -446,12 +446,12 @@ mod tests {
             .register(&pool);
 
         let err = r3.try_grow(70).unwrap_err().strip_backtrace();
-        assert_eq!(err, "Resources exhausted: Failed to allocate additional 70 bytes for r3 with 0 bytes already allocated - maximum available is 40");
+        assert_eq!(err, "Resources exhausted: Failed to allocate additional 70 bytes for r3 with 0 bytes already allocated for this reservation - 40 bytes remain available for the total pool");
 
         //Shrinking r2 to zero doesn't allow a3 to allocate more than 45
         r2.free();
         let err = r3.try_grow(70).unwrap_err().strip_backtrace();
-        assert_eq!(err, "Resources exhausted: Failed to allocate additional 70 bytes for r3 with 0 bytes already allocated - maximum available is 40");
+        assert_eq!(err, "Resources exhausted: Failed to allocate additional 70 bytes for r3 with 0 bytes already allocated for this reservation - 40 bytes remain available for the total pool");
 
         // But dropping r2 does
         drop(r2);
@@ -464,7 +464,7 @@ mod tests {
 
         let mut r4 = MemoryConsumer::new("s4").register(&pool);
         let err = r4.try_grow(30).unwrap_err().strip_backtrace();
-        assert_eq!(err, "Resources exhausted: Failed to allocate additional 30 bytes for s4 with 0 bytes already allocated - maximum available is 20");
+        assert_eq!(err, "Resources exhausted: Failed to allocate additional 30 bytes for s4 with 0 bytes already allocated for this reservation - 20 bytes remain available for the total pool");
     }
 
     #[test]
@@ -501,7 +501,7 @@ mod tests {
         // Test: reports if new reservation causes error
         // using the previously set sizes for other consumers
         let mut r5 = MemoryConsumer::new("r5").register(&pool);
-        let expected = "Resources exhausted with top memory consumers (across reservations) are: r1 consumed 50 bytes, r3 consumed 20 bytes, r2 consumed 15 bytes. Error: Failed to allocate additional 150 bytes for r5 with 0 bytes already allocated - maximum available is 5";
+        let expected = "Resources exhausted with top memory consumers (across reservations) are: r1 consumed 50 bytes, r3 consumed 20 bytes, r2 consumed 15 bytes. Error: Failed to allocate additional 150 bytes for r5 with 0 bytes already allocated for this reservation - 5 bytes remain available for the total pool";
         let res = r5.try_grow(150);
         assert!(
             matches!(
@@ -524,7 +524,7 @@ mod tests {
 
         // Test: see error message when no consumers recorded yet
         let mut r0 = MemoryConsumer::new(same_name).register(&pool);
-        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated - maximum available is 100";
+        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 100 bytes remain available for the total pool";
         let res = r0.try_grow(150);
         assert!(
             matches!(
@@ -543,7 +543,7 @@ mod tests {
         let mut r1 = new_consumer_same_name.clone().register(&pool);
         // TODO: the insufficient_capacity_err() message is per reservation, not per consumer.
         // a followup PR will clarify this message "0 bytes already allocated for this reservation"
-        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 10 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated - maximum available is 90";
+        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 10 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 90 bytes remain available for the total pool";
         let res = r1.try_grow(150);
         assert!(
             matches!(
@@ -555,7 +555,7 @@ mod tests {
 
         // Test: will accumulate size changes per consumer, not per reservation
         r1.grow(20);
-        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 30 bytes. Error: Failed to allocate additional 150 bytes for foo with 20 bytes already allocated - maximum available is 70";
+        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 30 bytes. Error: Failed to allocate additional 150 bytes for foo with 20 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
         let res = r1.try_grow(150);
         assert!(
             matches!(
@@ -570,7 +570,7 @@ mod tests {
         let consumer_with_same_name_but_different_hash =
             MemoryConsumer::new(same_name).with_can_spill(true);
         let mut r2 = consumer_with_same_name_but_different_hash.register(&pool);
-        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo(can_spill=false) consumed 30 bytes, foo(can_spill=true) consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated - maximum available is 70";
+        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo(can_spill=false) consumed 30 bytes, foo(can_spill=true) consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
         let res = r2.try_grow(150);
         assert!(
             matches!(
@@ -590,7 +590,7 @@ mod tests {
             let r1_consumer = MemoryConsumer::new("r1");
             let mut r1 = r1_consumer.clone().register(&pool);
             r1.grow(20);
-            let expected = "Resources exhausted with top memory consumers (across reservations) are: r1 consumed 20 bytes, r0 consumed 10 bytes. Error: Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated - maximum available is 70";
+            let expected = "Resources exhausted with top memory consumers (across reservations) are: r1 consumed 20 bytes, r0 consumed 10 bytes. Error: Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
             let res = r0.try_grow(150);
             assert!(
                 matches!(
@@ -616,7 +616,7 @@ mod tests {
 
             // Test: actual message we see is the `available is 70`. When it should be `available is 90`.
             // This is because the pool.shrink() does not automatically occur within the inner_pool.deregister().
-            let expected_70_available = "Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated - maximum available is 70";
+            let expected_70_available = "Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
             let res = r0.try_grow(150);
             assert!(
                 matches!(
@@ -629,7 +629,7 @@ mod tests {
             // Test: the registration needs to free itself (or be dropped),
             // for the proper error message
             r1.free();
-            let expected_90_available = "Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated - maximum available is 90";
+            let expected_90_available = "Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated for this reservation - 90 bytes remain available for the total pool";
             let res = r0.try_grow(150);
             assert!(
                 matches!(

From df4e6cc4e59fd41b88433a84cc7a9f519ea0ebc4 Mon Sep 17 00:00:00 2001
From: Marko Grujic <markoog@gmail.com>
Date: Fri, 2 Aug 2024 11:55:48 +0200
Subject: [PATCH 199/357] [Minor] Short circuit `ApplyFunctionRewrites` if
 there are no function rewrites (#11765)

* Short circuit ApplyFunctionRewrites if there are no function rewrites

* Short circuit ApplyFunctionRewrites in the Analyzer itself
---
 datafusion/optimizer/src/analyzer/mod.rs       | 12 +++++++++---
 datafusion/sqllogictest/test_files/explain.slt |  1 -
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/datafusion/optimizer/src/analyzer/mod.rs b/datafusion/optimizer/src/analyzer/mod.rs
index 32bb2bc70452a..91ee8a9e1033a 100644
--- a/datafusion/optimizer/src/analyzer/mod.rs
+++ b/datafusion/optimizer/src/analyzer/mod.rs
@@ -136,9 +136,15 @@ impl Analyzer {
         // Note this is run before all other rules since it rewrites based on
         // the argument types (List or Scalar), and TypeCoercion may cast the
         // argument types from Scalar to List.
-        let expr_to_function: Arc<dyn AnalyzerRule + Send + Sync> =
-            Arc::new(ApplyFunctionRewrites::new(self.function_rewrites.clone()));
-        let rules = std::iter::once(&expr_to_function).chain(self.rules.iter());
+        let expr_to_function: Option<Arc<dyn AnalyzerRule + Send + Sync>> =
+            if self.function_rewrites.is_empty() {
+                None
+            } else {
+                Some(Arc::new(ApplyFunctionRewrites::new(
+                    self.function_rewrites.clone(),
+                )))
+            };
+        let rules = expr_to_function.iter().chain(self.rules.iter());
 
         // TODO add common rule executor for Analyzer and Optimizer
         for rule in rules {
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index 5a17334601208..eae4f428b4b42 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -176,7 +176,6 @@ EXPLAIN VERBOSE SELECT a, b, c FROM simple_explain_test
 initial_logical_plan
 01)Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
 02)--TableScan: simple_explain_test
-logical_plan after apply_function_rewrites SAME TEXT AS ABOVE
 logical_plan after inline_table_scan SAME TEXT AS ABOVE
 logical_plan after type_coercion SAME TEXT AS ABOVE
 logical_plan after count_wildcard_rule SAME TEXT AS ABOVE

From 80848f2a0a08b61be441ac37a00073d4447e9603 Mon Sep 17 00:00:00 2001
From: Alexander Rafferty <hello@alexanderrafferty.com>
Date: Fri, 2 Aug 2024 20:35:04 +1000
Subject: [PATCH 200/357] Fix #11692: Improve doc comments within macros
 (#11694)

* Fix #11692: Improve doc comments within macros

* Fix doc errors

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/expr/src/test/function_stub.rs    | 6 ++----
 datafusion/functions-aggregate/src/macros.rs | 6 ++----
 datafusion/functions-nested/src/macros.rs    | 6 +++---
 datafusion/functions/src/macros.rs           | 5 ++---
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs
index 14a6522ebe91e..3e0760b5c0dec 100644
--- a/datafusion/expr/src/test/function_stub.rs
+++ b/datafusion/expr/src/test/function_stub.rs
@@ -44,11 +44,9 @@ macro_rules! create_func {
             /// named STATIC_$(UDAF). For example `STATIC_FirstValue`
             #[allow(non_upper_case_globals)]
             static [< STATIC_ $UDAF >]: std::sync::OnceLock<std::sync::Arc<crate::AggregateUDF>> =
-            std::sync::OnceLock::new();
+                std::sync::OnceLock::new();
 
-            /// AggregateFunction that returns a [AggregateUDF] for [$UDAF]
-            ///
-            /// [AggregateUDF]: crate::AggregateUDF
+            #[doc = concat!("AggregateFunction that returns a [AggregateUDF](crate::AggregateUDF) for [`", stringify!($UDAF), "`]")]
             pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc<crate::AggregateUDF> {
                 [< STATIC_ $UDAF >]
                     .get_or_init(|| {
diff --git a/datafusion/functions-aggregate/src/macros.rs b/datafusion/functions-aggregate/src/macros.rs
index cae72cf352238..573b9fd5bdb2f 100644
--- a/datafusion/functions-aggregate/src/macros.rs
+++ b/datafusion/functions-aggregate/src/macros.rs
@@ -86,11 +86,9 @@ macro_rules! create_func {
             /// named STATIC_$(UDAF). For example `STATIC_FirstValue`
             #[allow(non_upper_case_globals)]
             static [< STATIC_ $UDAF >]: std::sync::OnceLock<std::sync::Arc<datafusion_expr::AggregateUDF>> =
-            std::sync::OnceLock::new();
+                std::sync::OnceLock::new();
 
-            /// AggregateFunction that returns a [AggregateUDF] for [$UDAF]
-            ///
-            /// [AggregateUDF]: datafusion_expr::AggregateUDF
+            #[doc = concat!("AggregateFunction that returns a [`AggregateUDF`](datafusion_expr::AggregateUDF) for [`", stringify!($UDAF), "`]")]
             pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc<datafusion_expr::AggregateUDF> {
                 [< STATIC_ $UDAF >]
                     .get_or_init(|| {
diff --git a/datafusion/functions-nested/src/macros.rs b/datafusion/functions-nested/src/macros.rs
index a6e0c2ee62be6..00247f39ac10f 100644
--- a/datafusion/functions-nested/src/macros.rs
+++ b/datafusion/functions-nested/src/macros.rs
@@ -90,9 +90,9 @@ macro_rules! create_func {
             #[allow(non_upper_case_globals)]
             static [< STATIC_ $UDF >]: std::sync::OnceLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
                 std::sync::OnceLock::new();
-            /// ScalarFunction that returns a [`ScalarUDF`] for [`$UDF`]
-            ///
-            /// [`ScalarUDF`]: datafusion_expr::ScalarUDF
+
+            #[doc = concat!("ScalarFunction that returns a [`ScalarUDF`](datafusion_expr::ScalarUDF) for ")]
+            #[doc = stringify!($UDF)]
             pub fn $SCALAR_UDF_FN() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
                 [< STATIC_ $UDF >]
                     .get_or_init(|| {
diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
index e26c94e1bb799..484afb57f74e0 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -75,9 +75,8 @@ macro_rules! make_udf_function {
         static $GNAME: std::sync::OnceLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
             std::sync::OnceLock::new();
 
-        /// Return a [`ScalarUDF`] for [`$UDF`]
-        ///
-        /// [`ScalarUDF`]: datafusion_expr::ScalarUDF
+        #[doc = "Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) for "]
+        #[doc = stringify!($UDF)]
         pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
             $GNAME
                 .get_or_init(|| {

From 5ca4ec3b59044f08a7b5487de2d146e1b9b3bd29 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 2 Aug 2024 08:05:48 -0400
Subject: [PATCH 201/357] Extract CoalesceBatchesStream to a struct (#11610)

---
 .../physical-plan/src/coalesce_batches.rs     | 602 +++++++++++-------
 1 file changed, 382 insertions(+), 220 deletions(-)

diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 038727daa7d87..b822ec2dafeb1 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -15,13 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! CoalesceBatchesExec combines small batches into larger batches for more efficient use of
-//! vectorized processing by upstream operators.
+//! [`CoalesceBatchesExec`] combines small batches into larger batches.
 
 use std::any::Any;
 use std::pin::Pin;
 use std::sync::Arc;
-use std::task::{Context, Poll};
+use std::task::{ready, Context, Poll};
 
 use arrow::array::{AsArray, StringViewBuilder};
 use arrow::compute::concat_batches;
@@ -41,11 +40,43 @@ use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
 
 /// `CoalesceBatchesExec` combines small batches into larger batches for more
-/// efficient use of vectorized processing by later operators. The operator
-/// works by buffering batches until it collects `target_batch_size` rows. When
-/// only a limited number of rows are necessary (specified by the `fetch`
-/// parameter), the operator will stop buffering and return the final batch
-/// once the number of collected rows reaches the `fetch` value.
+/// efficient use of vectorized processing by later operators.
+///
+/// The operator buffers batches until it collects `target_batch_size` rows and
+/// then emits a single concatenated batch. When only a limited number of rows
+/// are necessary (specified by the `fetch` parameter), the operator will stop
+/// buffering and returns the final batch once the number of collected rows
+/// reaches the `fetch` value.
+///
+/// # Background
+///
+/// Generally speaking, larger RecordBatches are more efficient to process than
+/// smaller record batches (until the CPU cache is exceeded) because there is
+/// fixed processing overhead per batch. This code concatenates multiple small
+/// record batches into larger ones to amortize this overhead.
+///
+/// ```text
+/// ┌────────────────────┐
+/// │    RecordBatch     │
+/// │   num_rows = 23    │
+/// └────────────────────┘                 ┌────────────────────┐
+///                                        │                    │
+/// ┌────────────────────┐     Coalesce    │                    │
+/// │                    │      Batches    │                    │
+/// │    RecordBatch     │                 │                    │
+/// │   num_rows = 50    │  ─ ─ ─ ─ ─ ─ ▶  │                    │
+/// │                    │                 │    RecordBatch     │
+/// │                    │                 │   num_rows = 106   │
+/// └────────────────────┘                 │                    │
+///                                        │                    │
+/// ┌────────────────────┐                 │                    │
+/// │                    │                 │                    │
+/// │    RecordBatch     │                 │                    │
+/// │   num_rows = 33    │                 └────────────────────┘
+/// │                    │
+/// └────────────────────┘
+/// ```
+
 #[derive(Debug)]
 pub struct CoalesceBatchesExec {
     /// The input plan
@@ -166,12 +197,11 @@ impl ExecutionPlan for CoalesceBatchesExec {
     ) -> Result<SendableRecordBatchStream> {
         Ok(Box::pin(CoalesceBatchesStream {
             input: self.input.execute(partition, context)?,
-            schema: self.input.schema(),
-            target_batch_size: self.target_batch_size,
-            fetch: self.fetch,
-            buffer: Vec::new(),
-            buffered_rows: 0,
-            total_rows: 0,
+            coalescer: BatchCoalescer::new(
+                self.input.schema(),
+                self.target_batch_size,
+                self.fetch,
+            ),
             is_closed: false,
             baseline_metrics: BaselineMetrics::new(&self.metrics, partition),
         }))
@@ -196,21 +226,12 @@ impl ExecutionPlan for CoalesceBatchesExec {
     }
 }
 
+/// Stream for [`CoalesceBatchesExec`]. See [`CoalesceBatchesExec`] for more details.
 struct CoalesceBatchesStream {
     /// The input plan
     input: SendableRecordBatchStream,
-    /// The input schema
-    schema: SchemaRef,
-    /// Minimum number of rows for coalesces batches
-    target_batch_size: usize,
-    /// Maximum number of rows to fetch, `None` means fetching all rows
-    fetch: Option<usize>,
-    /// Buffered batches
-    buffer: Vec<RecordBatch>,
-    /// Buffered row count
-    buffered_rows: usize,
-    /// Total number of rows returned
-    total_rows: usize,
+    /// Buffer for combining batches
+    coalescer: BatchCoalescer,
     /// Whether the stream has finished returning all of its data or not
     is_closed: bool,
     /// Execution metrics
@@ -249,84 +270,178 @@ impl CoalesceBatchesStream {
             let input_batch = self.input.poll_next_unpin(cx);
             // records time on drop
             let _timer = cloned_time.timer();
-            match input_batch {
-                Poll::Ready(x) => match x {
-                    Some(Ok(batch)) => {
-                        let batch = gc_string_view_batch(&batch);
-
-                        // Handle fetch limit:
-                        if let Some(fetch) = self.fetch {
-                            if self.total_rows + batch.num_rows() >= fetch {
-                                // We have reached the fetch limit.
-                                let remaining_rows = fetch - self.total_rows;
-                                debug_assert!(remaining_rows > 0);
-
+            match ready!(input_batch) {
+                Some(result) => {
+                    let Ok(input_batch) = result else {
+                        return Poll::Ready(Some(result)); // pass back error
+                    };
+                    // Buffer the batch and either get more input if not enough
+                    // rows yet or output
+                    match self.coalescer.push_batch(input_batch) {
+                        Ok(None) => continue,
+                        res => {
+                            if self.coalescer.limit_reached() {
                                 self.is_closed = true;
-                                self.total_rows = fetch;
-                                // Trim the batch and add to buffered batches:
-                                let batch = batch.slice(0, remaining_rows);
-                                self.buffered_rows += batch.num_rows();
-                                self.buffer.push(batch);
-                                // Combine buffered batches:
-                                let batch = concat_batches(&self.schema, &self.buffer)?;
-                                // Reset the buffer state and return final batch:
-                                self.buffer.clear();
-                                self.buffered_rows = 0;
-                                return Poll::Ready(Some(Ok(batch)));
-                            }
-                        }
-                        self.total_rows += batch.num_rows();
-
-                        if batch.num_rows() >= self.target_batch_size
-                            && self.buffer.is_empty()
-                        {
-                            return Poll::Ready(Some(Ok(batch)));
-                        } else if batch.num_rows() == 0 {
-                            // discard empty batches
-                        } else {
-                            // add to the buffered batches
-                            self.buffered_rows += batch.num_rows();
-                            self.buffer.push(batch);
-                            // check to see if we have enough batches yet
-                            if self.buffered_rows >= self.target_batch_size {
-                                // combine the batches and return
-                                let batch = concat_batches(&self.schema, &self.buffer)?;
-                                // reset buffer state
-                                self.buffer.clear();
-                                self.buffered_rows = 0;
-                                // return batch
-                                return Poll::Ready(Some(Ok(batch)));
                             }
+                            return Poll::Ready(res.transpose());
                         }
                     }
-                    None => {
-                        self.is_closed = true;
-                        // we have reached the end of the input stream but there could still
-                        // be buffered batches
-                        if self.buffer.is_empty() {
-                            return Poll::Ready(None);
-                        } else {
-                            // combine the batches and return
-                            let batch = concat_batches(&self.schema, &self.buffer)?;
-                            // reset buffer state
-                            self.buffer.clear();
-                            self.buffered_rows = 0;
-                            // return batch
-                            return Poll::Ready(Some(Ok(batch)));
-                        }
-                    }
-                    other => return Poll::Ready(other),
-                },
-                Poll::Pending => return Poll::Pending,
+                }
+                None => {
+                    self.is_closed = true;
+                    // we have reached the end of the input stream but there could still
+                    // be buffered batches
+                    return match self.coalescer.finish() {
+                        Ok(None) => Poll::Ready(None),
+                        res => Poll::Ready(res.transpose()),
+                    };
+                }
             }
         }
     }
 }
 
 impl RecordBatchStream for CoalesceBatchesStream {
+    fn schema(&self) -> SchemaRef {
+        self.coalescer.schema()
+    }
+}
+
+/// Concatenate multiple record batches into larger batches
+///
+/// See [`CoalesceBatchesExec`] for more details.
+///
+/// Notes:
+///
+/// 1. The output rows is the same order as the input rows
+///
+/// 2. The output is a sequence of batches, with all but the last being at least
+///    `target_batch_size` rows.
+///
+/// 3. Eventually this may also be able to handle other optimizations such as a
+///    combined filter/coalesce operation.
+#[derive(Debug)]
+struct BatchCoalescer {
+    /// The input schema
+    schema: SchemaRef,
+    /// Minimum number of rows for coalesces batches
+    target_batch_size: usize,
+    /// Total number of rows returned so far
+    total_rows: usize,
+    /// Buffered batches
+    buffer: Vec<RecordBatch>,
+    /// Buffered row count
+    buffered_rows: usize,
+    /// Maximum number of rows to fetch, `None` means fetching all rows
+    fetch: Option<usize>,
+}
+
+impl BatchCoalescer {
+    /// Create a new `BatchCoalescer`
+    ///
+    /// # Arguments
+    /// - `schema` - the schema of the output batches
+    /// - `target_batch_size` - the minimum number of rows for each
+    ///    output batch (until limit reached)
+    /// - `fetch` - the maximum number of rows to fetch, `None` means fetch all rows
+    fn new(schema: SchemaRef, target_batch_size: usize, fetch: Option<usize>) -> Self {
+        Self {
+            schema,
+            target_batch_size,
+            total_rows: 0,
+            buffer: vec![],
+            buffered_rows: 0,
+            fetch,
+        }
+    }
+
+    /// Return the schema of the output batches
     fn schema(&self) -> SchemaRef {
         Arc::clone(&self.schema)
     }
+
+    /// Add a batch, returning a batch if the target batch size or limit is reached
+    fn push_batch(&mut self, batch: RecordBatch) -> Result<Option<RecordBatch>> {
+        // discard empty batches
+        if batch.num_rows() == 0 {
+            return Ok(None);
+        }
+
+        // past limit
+        if self.limit_reached() {
+            return Ok(None);
+        }
+
+        let batch = gc_string_view_batch(&batch);
+
+        // Handle fetch limit:
+        if let Some(fetch) = self.fetch {
+            if self.total_rows + batch.num_rows() >= fetch {
+                // We have reached the fetch limit.
+                let remaining_rows = fetch - self.total_rows;
+                debug_assert!(remaining_rows > 0);
+                self.total_rows = fetch;
+                // Trim the batch and add to buffered batches:
+                let batch = batch.slice(0, remaining_rows);
+                self.buffered_rows += batch.num_rows();
+                self.buffer.push(batch);
+                // Combine buffered batches:
+                let batch = concat_batches(&self.schema, &self.buffer)?;
+                // Reset the buffer state and return final batch:
+                self.buffer.clear();
+                self.buffered_rows = 0;
+                return Ok(Some(batch));
+            }
+        }
+        self.total_rows += batch.num_rows();
+
+        // batch itself is already big enough and we have no buffered rows so
+        // return it directly
+        if batch.num_rows() >= self.target_batch_size && self.buffer.is_empty() {
+            return Ok(Some(batch));
+        }
+        // add to the buffered batches
+        self.buffered_rows += batch.num_rows();
+        self.buffer.push(batch);
+        // check to see if we have enough batches yet
+        let batch = if self.buffered_rows >= self.target_batch_size {
+            // combine the batches and return
+            let batch = concat_batches(&self.schema, &self.buffer)?;
+            // reset buffer state
+            self.buffer.clear();
+            self.buffered_rows = 0;
+            // return batch
+            Some(batch)
+        } else {
+            None
+        };
+        Ok(batch)
+    }
+
+    /// Finish the coalescing process, returning all buffered data as a final,
+    /// single batch, if any
+    fn finish(&mut self) -> Result<Option<RecordBatch>> {
+        if self.buffer.is_empty() {
+            Ok(None)
+        } else {
+            // combine the batches and return
+            let batch = concat_batches(&self.schema, &self.buffer)?;
+            // reset buffer state
+            self.buffer.clear();
+            self.buffered_rows = 0;
+            // return batch
+            Ok(Some(batch))
+        }
+    }
+
+    /// returns true if there is a limit and it has been reached
+    pub fn limit_reached(&self) -> bool {
+        if let Some(fetch) = self.fetch {
+            self.total_rows >= fetch
+        } else {
+            false
+        }
+    }
 }
 
 /// Heuristically compact `StringViewArray`s to reduce memory usage, if needed
@@ -400,164 +515,206 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
 
 #[cfg(test)]
 mod tests {
+    use super::*;
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow_array::builder::ArrayBuilder;
     use arrow_array::{StringViewArray, UInt32Array};
+    use std::ops::Range;
 
-    use crate::{memory::MemoryExec, repartition::RepartitionExec, Partitioning};
-
-    use super::*;
-
-    #[tokio::test(flavor = "multi_thread")]
-    async fn test_concat_batches() -> Result<()> {
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 10);
-        let partitions = vec![partition];
-
-        let output_partitions = coalesce_batches(&schema, partitions, 21, None).await?;
-        assert_eq!(1, output_partitions.len());
-
-        // input is 10 batches x 8 rows (80 rows)
-        // expected output is batches of at least 20 rows (except for the final batch)
-        let batches = &output_partitions[0];
-        assert_eq!(4, batches.len());
-        assert_eq!(24, batches[0].num_rows());
-        assert_eq!(24, batches[1].num_rows());
-        assert_eq!(24, batches[2].num_rows());
-        assert_eq!(8, batches[3].num_rows());
-
-        Ok(())
+    #[test]
+    fn test_coalesce() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // expected output is batches of at least 20 rows (except for the final batch)
+            .with_target_batch_size(21)
+            .with_expected_output_sizes(vec![24, 24, 24, 8])
+            .run()
     }
 
-    #[tokio::test]
-    async fn test_concat_batches_with_fetch_larger_than_input_size() -> Result<()> {
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 10);
-        let partitions = vec![partition];
-
-        let output_partitions =
-            coalesce_batches(&schema, partitions, 21, Some(100)).await?;
-        assert_eq!(1, output_partitions.len());
+    #[test]
+    fn test_coalesce_with_fetch_larger_than_input_size() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 100
+            // expected to behave the same as `test_concat_batches`
+            .with_target_batch_size(21)
+            .with_fetch(Some(100))
+            .with_expected_output_sizes(vec![24, 24, 24, 8])
+            .run();
+    }
 
-        // input is 10 batches x 8 rows (80 rows) with fetch limit of 100
-        // expected to behave the same as `test_concat_batches`
-        let batches = &output_partitions[0];
-        assert_eq!(4, batches.len());
-        assert_eq!(24, batches[0].num_rows());
-        assert_eq!(24, batches[1].num_rows());
-        assert_eq!(24, batches[2].num_rows());
-        assert_eq!(8, batches[3].num_rows());
+    #[test]
+    fn test_coalesce_with_fetch_less_than_input_size() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 50
+            .with_target_batch_size(21)
+            .with_fetch(Some(50))
+            .with_expected_output_sizes(vec![24, 24, 2])
+            .run();
+    }
 
-        Ok(())
+    #[test]
+    fn test_coalesce_with_fetch_less_than_target_and_no_remaining_rows() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 48
+            .with_target_batch_size(21)
+            .with_fetch(Some(48))
+            .with_expected_output_sizes(vec![24, 24])
+            .run();
     }
 
-    #[tokio::test]
-    async fn test_concat_batches_with_fetch_less_than_input_size() -> Result<()> {
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 10);
-        let partitions = vec![partition];
+    #[test]
+    fn test_coalesce_with_fetch_less_target_batch_size() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 10
+            .with_target_batch_size(21)
+            .with_fetch(Some(10))
+            .with_expected_output_sizes(vec![10])
+            .run();
+    }
 
-        let output_partitions =
-            coalesce_batches(&schema, partitions, 21, Some(50)).await?;
-        assert_eq!(1, output_partitions.len());
+    #[test]
+    fn test_coalesce_single_large_batch_over_fetch() {
+        let large_batch = uint32_batch(0..100);
+        Test::new()
+            .with_batch(large_batch)
+            .with_target_batch_size(20)
+            .with_fetch(Some(7))
+            .with_expected_output_sizes(vec![7])
+            .run()
+    }
+
+    /// Test for [`BatchCoalescer`]
+    ///
+    /// Pushes the input batches to the coalescer and verifies that the resulting
+    /// batches have the expected number of rows and contents.
+    #[derive(Debug, Clone, Default)]
+    struct Test {
+        /// Batches to feed to the coalescer. Tests must have at least one
+        /// schema
+        input_batches: Vec<RecordBatch>,
+        /// Expected output sizes of the resulting batches
+        expected_output_sizes: Vec<usize>,
+        /// target batch size
+        target_batch_size: usize,
+        /// Fetch (limit)
+        fetch: Option<usize>,
+    }
 
-        // input is 10 batches x 8 rows (80 rows) with fetch limit of 50
-        let batches = &output_partitions[0];
-        assert_eq!(3, batches.len());
-        assert_eq!(24, batches[0].num_rows());
-        assert_eq!(24, batches[1].num_rows());
-        assert_eq!(2, batches[2].num_rows());
+    impl Test {
+        fn new() -> Self {
+            Self::default()
+        }
 
-        Ok(())
-    }
+        /// Set the target batch size
+        fn with_target_batch_size(mut self, target_batch_size: usize) -> Self {
+            self.target_batch_size = target_batch_size;
+            self
+        }
 
-    #[tokio::test]
-    async fn test_concat_batches_with_fetch_less_than_target_and_no_remaining_rows(
-    ) -> Result<()> {
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 10);
-        let partitions = vec![partition];
+        /// Set the fetch (limit)
+        fn with_fetch(mut self, fetch: Option<usize>) -> Self {
+            self.fetch = fetch;
+            self
+        }
 
-        let output_partitions =
-            coalesce_batches(&schema, partitions, 21, Some(48)).await?;
-        assert_eq!(1, output_partitions.len());
+        /// Extend the input batches with `batch`
+        fn with_batch(mut self, batch: RecordBatch) -> Self {
+            self.input_batches.push(batch);
+            self
+        }
 
-        // input is 10 batches x 8 rows (80 rows) with fetch limit of 48
-        let batches = &output_partitions[0];
-        assert_eq!(2, batches.len());
-        assert_eq!(24, batches[0].num_rows());
-        assert_eq!(24, batches[1].num_rows());
+        /// Extends the input batches with `batches`
+        fn with_batches(
+            mut self,
+            batches: impl IntoIterator<Item = RecordBatch>,
+        ) -> Self {
+            self.input_batches.extend(batches);
+            self
+        }
 
-        Ok(())
-    }
+        /// Extends `sizes` to expected output sizes
+        fn with_expected_output_sizes(
+            mut self,
+            sizes: impl IntoIterator<Item = usize>,
+        ) -> Self {
+            self.expected_output_sizes.extend(sizes);
+            self
+        }
 
-    #[tokio::test]
-    async fn test_concat_batches_with_fetch_less_target_batch_size() -> Result<()> {
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 10);
-        let partitions = vec![partition];
+        /// Runs the test -- see documentation on [`Test`] for details
+        fn run(self) {
+            let Self {
+                input_batches,
+                target_batch_size,
+                fetch,
+                expected_output_sizes,
+            } = self;
 
-        let output_partitions =
-            coalesce_batches(&schema, partitions, 21, Some(10)).await?;
-        assert_eq!(1, output_partitions.len());
+            let schema = input_batches[0].schema();
 
-        // input is 10 batches x 8 rows (80 rows) with fetch limit of 10
-        let batches = &output_partitions[0];
-        assert_eq!(1, batches.len());
-        assert_eq!(10, batches[0].num_rows());
+            // create a single large input batch for output comparison
+            let single_input_batch = concat_batches(&schema, &input_batches).unwrap();
 
-        Ok(())
-    }
+            let mut coalescer = BatchCoalescer::new(schema, target_batch_size, fetch);
 
-    fn test_schema() -> Arc<Schema> {
-        Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]))
-    }
+            let mut output_batches = vec![];
+            for batch in input_batches {
+                if let Some(batch) = coalescer.push_batch(batch).unwrap() {
+                    output_batches.push(batch);
+                }
+            }
+            if let Some(batch) = coalescer.finish().unwrap() {
+                output_batches.push(batch);
+            }
 
-    async fn coalesce_batches(
-        schema: &SchemaRef,
-        input_partitions: Vec<Vec<RecordBatch>>,
-        target_batch_size: usize,
-        fetch: Option<usize>,
-    ) -> Result<Vec<Vec<RecordBatch>>> {
-        // create physical plan
-        let exec = MemoryExec::try_new(&input_partitions, Arc::clone(schema), None)?;
-        let exec =
-            RepartitionExec::try_new(Arc::new(exec), Partitioning::RoundRobinBatch(1))?;
-        let exec: Arc<dyn ExecutionPlan> = Arc::new(
-            CoalesceBatchesExec::new(Arc::new(exec), target_batch_size).with_fetch(fetch),
-        );
-
-        // execute and collect results
-        let output_partition_count = exec.output_partitioning().partition_count();
-        let mut output_partitions = Vec::with_capacity(output_partition_count);
-        for i in 0..output_partition_count {
-            // execute this *output* partition and collect all batches
-            let task_ctx = Arc::new(TaskContext::default());
-            let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
-            let mut batches = vec![];
-            while let Some(result) = stream.next().await {
-                batches.push(result?);
+            // make sure we got the expected number of output batches and content
+            let mut starting_idx = 0;
+            assert_eq!(expected_output_sizes.len(), output_batches.len());
+            for (i, (expected_size, batch)) in
+                expected_output_sizes.iter().zip(output_batches).enumerate()
+            {
+                assert_eq!(
+                    *expected_size,
+                    batch.num_rows(),
+                    "Unexpected number of rows in Batch {i}"
+                );
+
+                // compare the contents of the batch (using `==` compares the
+                // underlying memory layout too)
+                let expected_batch =
+                    single_input_batch.slice(starting_idx, *expected_size);
+                let batch_strings = batch_to_pretty_strings(&batch);
+                let expected_batch_strings = batch_to_pretty_strings(&expected_batch);
+                let batch_strings = batch_strings.lines().collect::<Vec<_>>();
+                let expected_batch_strings =
+                    expected_batch_strings.lines().collect::<Vec<_>>();
+                assert_eq!(
+                    expected_batch_strings, batch_strings,
+                    "Unexpected content in Batch {i}:\
+                    \n\nExpected:\n{expected_batch_strings:#?}\n\nActual:\n{batch_strings:#?}"
+                );
+                starting_idx += *expected_size;
             }
-            output_partitions.push(batches);
         }
-        Ok(output_partitions)
     }
 
-    /// Create vector batches
-    fn create_vec_batches(schema: &Schema, n: usize) -> Vec<RecordBatch> {
-        let batch = create_batch(schema);
-        let mut vec = Vec::with_capacity(n);
-        for _ in 0..n {
-            vec.push(batch.clone());
-        }
-        vec
-    }
+    /// Return a batch of  UInt32 with the specified range
+    fn uint32_batch(range: Range<u32>) -> RecordBatch {
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]));
 
-    /// Create batch
-    fn create_batch(schema: &Schema) -> RecordBatch {
         RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
+            Arc::clone(&schema),
+            vec![Arc::new(UInt32Array::from_iter_values(range))],
         )
         .unwrap()
     }
@@ -656,4 +813,9 @@ mod tests {
             }
         }
     }
+    fn batch_to_pretty_strings(batch: &RecordBatch) -> String {
+        arrow::util::pretty::pretty_format_batches(&[batch.clone()])
+            .unwrap()
+            .to_string()
+    }
 }

From 0332eb569a5428ac385fe892ce7b5fb40d52c8c0 Mon Sep 17 00:00:00 2001
From: Ruihang Xia <waynestxia@gmail.com>
Date: Fri, 2 Aug 2024 20:10:41 +0800
Subject: [PATCH 202/357] refactor: move ExecutionPlan and related structs into
 dedicated mod (#11759)

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
---
 .../physical-plan/src/execution_plan.rs       | 1018 +++++++++++++++++
 datafusion/physical-plan/src/lib.rs           |  995 +---------------
 2 files changed, 1030 insertions(+), 983 deletions(-)
 create mode 100644 datafusion/physical-plan/src/execution_plan.rs

diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
new file mode 100644
index 0000000000000..5a3fc086c1f89
--- /dev/null
+++ b/datafusion/physical-plan/src/execution_plan.rs
@@ -0,0 +1,1018 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::fmt::Debug;
+use std::sync::Arc;
+
+use arrow::datatypes::SchemaRef;
+use arrow::record_batch::RecordBatch;
+use futures::stream::{StreamExt, TryStreamExt};
+use tokio::task::JoinSet;
+
+use datafusion_common::config::ConfigOptions;
+pub use datafusion_common::hash_utils;
+pub use datafusion_common::utils::project_schema;
+use datafusion_common::{exec_err, Result};
+pub use datafusion_common::{internal_err, ColumnStatistics, Statistics};
+use datafusion_execution::TaskContext;
+pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
+pub use datafusion_expr::{Accumulator, ColumnarValue};
+pub use datafusion_physical_expr::window::WindowExpr;
+pub use datafusion_physical_expr::{
+    expressions, functions, udf, AggregateExpr, Distribution, Partitioning, PhysicalExpr,
+};
+use datafusion_physical_expr::{
+    EquivalenceProperties, LexOrdering, PhysicalSortExpr, PhysicalSortRequirement,
+};
+
+use crate::coalesce_partitions::CoalescePartitionsExec;
+use crate::display::DisplayableExecutionPlan;
+pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
+pub use crate::metrics::Metric;
+use crate::metrics::MetricsSet;
+pub use crate::ordering::InputOrderMode;
+use crate::repartition::RepartitionExec;
+use crate::sorts::sort_preserving_merge::SortPreservingMergeExec;
+pub use crate::stream::EmptyRecordBatchStream;
+use crate::stream::RecordBatchStreamAdapter;
+
+/// Represent nodes in the DataFusion Physical Plan.
+///
+/// Calling [`execute`] produces an `async` [`SendableRecordBatchStream`] of
+/// [`RecordBatch`] that incrementally computes a partition of the
+/// `ExecutionPlan`'s output from its input. See [`Partitioning`] for more
+/// details on partitioning.
+///
+/// Methods such as [`Self::schema`] and [`Self::properties`] communicate
+/// properties of the output to the DataFusion optimizer, and methods such as
+/// [`required_input_distribution`] and [`required_input_ordering`] express
+/// requirements of the `ExecutionPlan` from its input.
+///
+/// [`ExecutionPlan`] can be displayed in a simplified form using the
+/// return value from [`displayable`] in addition to the (normally
+/// quite verbose) `Debug` output.
+///
+/// [`execute`]: ExecutionPlan::execute
+/// [`required_input_distribution`]: ExecutionPlan::required_input_distribution
+/// [`required_input_ordering`]: ExecutionPlan::required_input_ordering
+pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
+    /// Short name for the ExecutionPlan, such as 'ParquetExec'.
+    ///
+    /// Implementation note: this method can just proxy to
+    /// [`static_name`](ExecutionPlan::static_name) if no special action is
+    /// needed. It doesn't provide a default implementation like that because
+    /// this method doesn't require the `Sized` constrain to allow a wilder
+    /// range of use cases.
+    fn name(&self) -> &str;
+
+    /// Short name for the ExecutionPlan, such as 'ParquetExec'.
+    /// Like [`name`](ExecutionPlan::name) but can be called without an instance.
+    fn static_name() -> &'static str
+    where
+        Self: Sized,
+    {
+        let full_name = std::any::type_name::<Self>();
+        let maybe_start_idx = full_name.rfind(':');
+        match maybe_start_idx {
+            Some(start_idx) => &full_name[start_idx + 1..],
+            None => "UNKNOWN",
+        }
+    }
+
+    /// Returns the execution plan as [`Any`] so that it can be
+    /// downcast to a specific implementation.
+    fn as_any(&self) -> &dyn Any;
+
+    /// Get the schema for this execution plan
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(self.properties().schema())
+    }
+
+    /// Return properties of the output of the `ExecutionPlan`, such as output
+    /// ordering(s), partitioning information etc.
+    ///
+    /// This information is available via methods on [`ExecutionPlanProperties`]
+    /// trait, which is implemented for all `ExecutionPlan`s.
+    fn properties(&self) -> &PlanProperties;
+
+    /// Specifies the data distribution requirements for all the
+    /// children for this `ExecutionPlan`, By default it's [[Distribution::UnspecifiedDistribution]] for each child,
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        vec![Distribution::UnspecifiedDistribution; self.children().len()]
+    }
+
+    /// Specifies the ordering required for all of the children of this
+    /// `ExecutionPlan`.
+    ///
+    /// For each child, it's the local ordering requirement within
+    /// each partition rather than the global ordering
+    ///
+    /// NOTE that checking `!is_empty()` does **not** check for a
+    /// required input ordering. Instead, the correct check is that at
+    /// least one entry must be `Some`
+    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+        vec![None; self.children().len()]
+    }
+
+    /// Returns `false` if this `ExecutionPlan`'s implementation may reorder
+    /// rows within or between partitions.
+    ///
+    /// For example, Projection, Filter, and Limit maintain the order
+    /// of inputs -- they may transform values (Projection) or not
+    /// produce the same number of rows that went in (Filter and
+    /// Limit), but the rows that are produced go in the same way.
+    ///
+    /// DataFusion uses this metadata to apply certain optimizations
+    /// such as automatically repartitioning correctly.
+    ///
+    /// The default implementation returns `false`
+    ///
+    /// WARNING: if you override this default, you *MUST* ensure that
+    /// the `ExecutionPlan`'s maintains the ordering invariant or else
+    /// DataFusion may produce incorrect results.
+    fn maintains_input_order(&self) -> Vec<bool> {
+        vec![false; self.children().len()]
+    }
+
+    /// Specifies whether the `ExecutionPlan` benefits from increased
+    /// parallelization at its input for each child.
+    ///
+    /// If returns `true`, the `ExecutionPlan` would benefit from partitioning
+    /// its corresponding child (and thus from more parallelism). For
+    /// `ExecutionPlan` that do very little work the overhead of extra
+    /// parallelism may outweigh any benefits
+    ///
+    /// The default implementation returns `true` unless this `ExecutionPlan`
+    /// has signalled it requires a single child input partition.
+    fn benefits_from_input_partitioning(&self) -> Vec<bool> {
+        // By default try to maximize parallelism with more CPUs if
+        // possible
+        self.required_input_distribution()
+            .into_iter()
+            .map(|dist| !matches!(dist, Distribution::SinglePartition))
+            .collect()
+    }
+
+    /// Get a list of children `ExecutionPlan`s that act as inputs to this plan.
+    /// The returned list will be empty for leaf nodes such as scans, will contain
+    /// a single value for unary nodes, or two values for binary nodes (such as
+    /// joins).
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>>;
+
+    /// Returns a new `ExecutionPlan` where all existing children were replaced
+    /// by the `children`, in order
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>>;
+
+    /// If supported, attempt to increase the partitioning of this `ExecutionPlan` to
+    /// produce `target_partitions` partitions.
+    ///
+    /// If the `ExecutionPlan` does not support changing its partitioning,
+    /// returns `Ok(None)` (the default).
+    ///
+    /// It is the `ExecutionPlan` can increase its partitioning, but not to the
+    /// `target_partitions`, it may return an ExecutionPlan with fewer
+    /// partitions. This might happen, for example, if each new partition would
+    /// be too small to be efficiently processed individually.
+    ///
+    /// The DataFusion optimizer attempts to use as many threads as possible by
+    /// repartitioning its inputs to match the target number of threads
+    /// available (`target_partitions`). Some data sources, such as the built in
+    /// CSV and Parquet readers, implement this method as they are able to read
+    /// from their input files in parallel, regardless of how the source data is
+    /// split amongst files.
+    fn repartitioned(
+        &self,
+        _target_partitions: usize,
+        _config: &ConfigOptions,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        Ok(None)
+    }
+
+    /// Begin execution of `partition`, returning a [`Stream`] of
+    /// [`RecordBatch`]es.
+    ///
+    /// # Notes
+    ///
+    /// The `execute` method itself is not `async` but it returns an `async`
+    /// [`futures::stream::Stream`]. This `Stream` should incrementally compute
+    /// the output, `RecordBatch` by `RecordBatch` (in a streaming fashion).
+    /// Most `ExecutionPlan`s should not do any work before the first
+    /// `RecordBatch` is requested from the stream.
+    ///
+    /// [`RecordBatchStreamAdapter`] can be used to convert an `async`
+    /// [`Stream`] into a [`SendableRecordBatchStream`].
+    ///
+    /// Using `async` `Streams` allows for network I/O during execution and
+    /// takes advantage of Rust's built in support for `async` continuations and
+    /// crate ecosystem.
+    ///
+    /// [`Stream`]: futures::stream::Stream
+    /// [`StreamExt`]: futures::stream::StreamExt
+    /// [`TryStreamExt`]: futures::stream::TryStreamExt
+    /// [`RecordBatchStreamAdapter`]: crate::stream::RecordBatchStreamAdapter
+    ///
+    /// # Cancellation / Aborting Execution
+    ///
+    /// The [`Stream`] that is returned must ensure that any allocated resources
+    /// are freed when the stream itself is dropped. This is particularly
+    /// important for [`spawn`]ed tasks or threads. Unless care is taken to
+    /// "abort" such tasks, they may continue to consume resources even after
+    /// the plan is dropped, generating intermediate results that are never
+    /// used.
+    /// Thus, [`spawn`] is disallowed, and instead use [`SpawnedTask`].
+    ///
+    /// For more details see [`SpawnedTask`], [`JoinSet`] and [`RecordBatchReceiverStreamBuilder`]
+    /// for structures to help ensure all background tasks are cancelled.
+    ///
+    /// [`spawn`]: tokio::task::spawn
+    /// [`JoinSet`]: tokio::task::JoinSet
+    /// [`SpawnedTask`]: datafusion_common_runtime::SpawnedTask
+    /// [`RecordBatchReceiverStreamBuilder`]: crate::stream::RecordBatchReceiverStreamBuilder
+    ///
+    /// # Implementation Examples
+    ///
+    /// While `async` `Stream`s have a non trivial learning curve, the
+    /// [`futures`] crate provides [`StreamExt`] and [`TryStreamExt`]
+    /// which help simplify many common operations.
+    ///
+    /// Here are some common patterns:
+    ///
+    /// ## Return Precomputed `RecordBatch`
+    ///
+    /// We can return a precomputed `RecordBatch` as a `Stream`:
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::RecordBatch;
+    /// # use arrow_schema::SchemaRef;
+    /// # use datafusion_common::Result;
+    /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+    /// # use datafusion_physical_plan::memory::MemoryStream;
+    /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
+    /// struct MyPlan {
+    ///     batch: RecordBatch,
+    /// }
+    ///
+    /// impl MyPlan {
+    ///     fn execute(
+    ///         &self,
+    ///         partition: usize,
+    ///         context: Arc<TaskContext>
+    ///     ) -> Result<SendableRecordBatchStream> {
+    ///         // use functions from futures crate convert the batch into a stream
+    ///         let fut = futures::future::ready(Ok(self.batch.clone()));
+    ///         let stream = futures::stream::once(fut);
+    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.batch.schema(), stream)))
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// ## Lazily (async) Compute `RecordBatch`
+    ///
+    /// We can also lazily compute a `RecordBatch` when the returned `Stream` is polled
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::RecordBatch;
+    /// # use arrow_schema::SchemaRef;
+    /// # use datafusion_common::Result;
+    /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+    /// # use datafusion_physical_plan::memory::MemoryStream;
+    /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
+    /// struct MyPlan {
+    ///     schema: SchemaRef,
+    /// }
+    ///
+    /// /// Returns a single batch when the returned stream is polled
+    /// async fn get_batch() -> Result<RecordBatch> {
+    ///     todo!()
+    /// }
+    ///
+    /// impl MyPlan {
+    ///     fn execute(
+    ///         &self,
+    ///         partition: usize,
+    ///         context: Arc<TaskContext>
+    ///     ) -> Result<SendableRecordBatchStream> {
+    ///         let fut = get_batch();
+    ///         let stream = futures::stream::once(fut);
+    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream)))
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// ## Lazily (async) create a Stream
+    ///
+    /// If you need to create the return `Stream` using an `async` function,
+    /// you can do so by flattening the result:
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::RecordBatch;
+    /// # use arrow_schema::SchemaRef;
+    /// # use futures::TryStreamExt;
+    /// # use datafusion_common::Result;
+    /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+    /// # use datafusion_physical_plan::memory::MemoryStream;
+    /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
+    /// struct MyPlan {
+    ///     schema: SchemaRef,
+    /// }
+    ///
+    /// /// async function that returns a stream
+    /// async fn get_batch_stream() -> Result<SendableRecordBatchStream> {
+    ///     todo!()
+    /// }
+    ///
+    /// impl MyPlan {
+    ///     fn execute(
+    ///         &self,
+    ///         partition: usize,
+    ///         context: Arc<TaskContext>
+    ///     ) -> Result<SendableRecordBatchStream> {
+    ///         // A future that yields a stream
+    ///         let fut = get_batch_stream();
+    ///         // Use TryStreamExt::try_flatten to flatten the stream of streams
+    ///         let stream = futures::stream::once(fut).try_flatten();
+    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream)))
+    ///     }
+    /// }
+    /// ```
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream>;
+
+    /// Return a snapshot of the set of [`Metric`]s for this
+    /// [`ExecutionPlan`]. If no `Metric`s are available, return None.
+    ///
+    /// While the values of the metrics in the returned
+    /// [`MetricsSet`]s may change as execution progresses, the
+    /// specific metrics will not.
+    ///
+    /// Once `self.execute()` has returned (technically the future is
+    /// resolved) for all available partitions, the set of metrics
+    /// should be complete. If this function is called prior to
+    /// `execute()` new metrics may appear in subsequent calls.
+    fn metrics(&self) -> Option<MetricsSet> {
+        None
+    }
+
+    /// Returns statistics for this `ExecutionPlan` node. If statistics are not
+    /// available, should return [`Statistics::new_unknown`] (the default), not
+    /// an error.
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema()))
+    }
+
+    /// Returns `true` if a limit can be safely pushed down through this
+    /// `ExecutionPlan` node.
+    ///
+    /// If this method returns `true`, and the query plan contains a limit at
+    /// the output of this node, DataFusion will push the limit to the input
+    /// of this node.
+    fn supports_limit_pushdown(&self) -> bool {
+        false
+    }
+
+    /// Returns a fetching variant of this `ExecutionPlan` node, if it supports
+    /// fetch limits. Returns `None` otherwise.
+    fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        None
+    }
+}
+
+/// Extension trait provides an easy API to fetch various properties of
+/// [`ExecutionPlan`] objects based on [`ExecutionPlan::properties`].
+pub trait ExecutionPlanProperties {
+    /// Specifies how the output of this `ExecutionPlan` is split into
+    /// partitions.
+    fn output_partitioning(&self) -> &Partitioning;
+
+    /// Specifies whether this plan generates an infinite stream of records.
+    /// If the plan does not support pipelining, but its input(s) are
+    /// infinite, returns [`ExecutionMode::PipelineBreaking`] to indicate this.
+    fn execution_mode(&self) -> ExecutionMode;
+
+    /// If the output of this `ExecutionPlan` within each partition is sorted,
+    /// returns `Some(keys)` describing the ordering. A `None` return value
+    /// indicates no assumptions should be made on the output ordering.
+    ///
+    /// For example, `SortExec` (obviously) produces sorted output as does
+    /// `SortPreservingMergeStream`. Less obviously, `Projection` produces sorted
+    /// output if its input is sorted as it does not reorder the input rows.
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]>;
+
+    /// Get the [`EquivalenceProperties`] within the plan.
+    ///
+    /// Equivalence properties tell DataFusion what columns are known to be
+    /// equal, during various optimization passes. By default, this returns "no
+    /// known equivalences" which is always correct, but may cause DataFusion to
+    /// unnecessarily resort data.
+    ///
+    /// If this ExecutionPlan makes no changes to the schema of the rows flowing
+    /// through it or how columns within each row relate to each other, it
+    /// should return the equivalence properties of its input. For
+    /// example, since `FilterExec` may remove rows from its input, but does not
+    /// otherwise modify them, it preserves its input equivalence properties.
+    /// However, since `ProjectionExec` may calculate derived expressions, it
+    /// needs special handling.
+    ///
+    /// See also [`ExecutionPlan::maintains_input_order`] and [`Self::output_ordering`]
+    /// for related concepts.
+    fn equivalence_properties(&self) -> &EquivalenceProperties;
+}
+
+impl ExecutionPlanProperties for Arc<dyn ExecutionPlan> {
+    fn output_partitioning(&self) -> &Partitioning {
+        self.properties().output_partitioning()
+    }
+
+    fn execution_mode(&self) -> ExecutionMode {
+        self.properties().execution_mode()
+    }
+
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        self.properties().output_ordering()
+    }
+
+    fn equivalence_properties(&self) -> &EquivalenceProperties {
+        self.properties().equivalence_properties()
+    }
+}
+
+impl ExecutionPlanProperties for &dyn ExecutionPlan {
+    fn output_partitioning(&self) -> &Partitioning {
+        self.properties().output_partitioning()
+    }
+
+    fn execution_mode(&self) -> ExecutionMode {
+        self.properties().execution_mode()
+    }
+
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        self.properties().output_ordering()
+    }
+
+    fn equivalence_properties(&self) -> &EquivalenceProperties {
+        self.properties().equivalence_properties()
+    }
+}
+
+/// Describes the execution mode of an operator's resulting stream with respect
+/// to its size and behavior. There are three possible execution modes: `Bounded`,
+/// `Unbounded` and `PipelineBreaking`.
+#[derive(Clone, Copy, PartialEq, Debug)]
+pub enum ExecutionMode {
+    /// Represents the mode where generated stream is bounded, e.g. finite.
+    Bounded,
+    /// Represents the mode where generated stream is unbounded, e.g. infinite.
+    /// Even though the operator generates an unbounded stream of results, it
+    /// works with bounded memory and execution can still continue successfully.
+    ///
+    /// The stream that results from calling `execute` on an `ExecutionPlan` that is `Unbounded`
+    /// will never be done (return `None`), except in case of error.
+    Unbounded,
+    /// Represents the mode where some of the operator's input stream(s) are
+    /// unbounded; however, the operator cannot generate streaming results from
+    /// these streaming inputs. In this case, the execution mode will be pipeline
+    /// breaking, e.g. the operator requires unbounded memory to generate results.
+    PipelineBreaking,
+}
+
+impl ExecutionMode {
+    /// Check whether the execution mode is unbounded or not.
+    pub fn is_unbounded(&self) -> bool {
+        matches!(self, ExecutionMode::Unbounded)
+    }
+
+    /// Check whether the execution is pipeline friendly. If so, operator can
+    /// execute safely.
+    pub fn pipeline_friendly(&self) -> bool {
+        matches!(self, ExecutionMode::Bounded | ExecutionMode::Unbounded)
+    }
+}
+
+/// Conservatively "combines" execution modes of a given collection of operators.
+pub(crate) fn execution_mode_from_children<'a>(
+    children: impl IntoIterator<Item = &'a Arc<dyn ExecutionPlan>>,
+) -> ExecutionMode {
+    let mut result = ExecutionMode::Bounded;
+    for mode in children.into_iter().map(|child| child.execution_mode()) {
+        match (mode, result) {
+            (ExecutionMode::PipelineBreaking, _)
+            | (_, ExecutionMode::PipelineBreaking) => {
+                // If any of the modes is `PipelineBreaking`, so is the result:
+                return ExecutionMode::PipelineBreaking;
+            }
+            (ExecutionMode::Unbounded, _) | (_, ExecutionMode::Unbounded) => {
+                // Unbounded mode eats up bounded mode:
+                result = ExecutionMode::Unbounded;
+            }
+            (ExecutionMode::Bounded, ExecutionMode::Bounded) => {
+                // When both modes are bounded, so is the result:
+                result = ExecutionMode::Bounded;
+            }
+        }
+    }
+    result
+}
+
+/// Stores certain, often expensive to compute, plan properties used in query
+/// optimization.
+///
+/// These properties are stored a single structure to permit this information to
+/// be computed once and then those cached results used multiple times without
+/// recomputation (aka a cache)
+#[derive(Debug, Clone)]
+pub struct PlanProperties {
+    /// See [ExecutionPlanProperties::equivalence_properties]
+    pub eq_properties: EquivalenceProperties,
+    /// See [ExecutionPlanProperties::output_partitioning]
+    pub partitioning: Partitioning,
+    /// See [ExecutionPlanProperties::execution_mode]
+    pub execution_mode: ExecutionMode,
+    /// See [ExecutionPlanProperties::output_ordering]
+    output_ordering: Option<LexOrdering>,
+}
+
+impl PlanProperties {
+    /// Construct a new `PlanPropertiesCache` from the
+    pub fn new(
+        eq_properties: EquivalenceProperties,
+        partitioning: Partitioning,
+        execution_mode: ExecutionMode,
+    ) -> Self {
+        // Output ordering can be derived from `eq_properties`.
+        let output_ordering = eq_properties.output_ordering();
+        Self {
+            eq_properties,
+            partitioning,
+            execution_mode,
+            output_ordering,
+        }
+    }
+
+    /// Overwrite output partitioning with its new value.
+    pub fn with_partitioning(mut self, partitioning: Partitioning) -> Self {
+        self.partitioning = partitioning;
+        self
+    }
+
+    /// Overwrite the execution Mode with its new value.
+    pub fn with_execution_mode(mut self, execution_mode: ExecutionMode) -> Self {
+        self.execution_mode = execution_mode;
+        self
+    }
+
+    /// Overwrite equivalence properties with its new value.
+    pub fn with_eq_properties(mut self, eq_properties: EquivalenceProperties) -> Self {
+        // Changing equivalence properties also changes output ordering, so
+        // make sure to overwrite it:
+        self.output_ordering = eq_properties.output_ordering();
+        self.eq_properties = eq_properties;
+        self
+    }
+
+    pub fn equivalence_properties(&self) -> &EquivalenceProperties {
+        &self.eq_properties
+    }
+
+    pub fn output_partitioning(&self) -> &Partitioning {
+        &self.partitioning
+    }
+
+    pub fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        self.output_ordering.as_deref()
+    }
+
+    pub fn execution_mode(&self) -> ExecutionMode {
+        self.execution_mode
+    }
+
+    /// Get schema of the node.
+    fn schema(&self) -> &SchemaRef {
+        self.eq_properties.schema()
+    }
+}
+
+/// Indicate whether a data exchange is needed for the input of `plan`, which will be very helpful
+/// especially for the distributed engine to judge whether need to deal with shuffling.
+/// Currently there are 3 kinds of execution plan which needs data exchange
+///     1. RepartitionExec for changing the partition number between two `ExecutionPlan`s
+///     2. CoalescePartitionsExec for collapsing all of the partitions into one without ordering guarantee
+///     3. SortPreservingMergeExec for collapsing all of the sorted partitions into one with ordering guarantee
+pub fn need_data_exchange(plan: Arc<dyn ExecutionPlan>) -> bool {
+    if let Some(repartition) = plan.as_any().downcast_ref::<RepartitionExec>() {
+        !matches!(
+            repartition.properties().output_partitioning(),
+            Partitioning::RoundRobinBatch(_)
+        )
+    } else if let Some(coalesce) = plan.as_any().downcast_ref::<CoalescePartitionsExec>()
+    {
+        coalesce.input().output_partitioning().partition_count() > 1
+    } else if let Some(sort_preserving_merge) =
+        plan.as_any().downcast_ref::<SortPreservingMergeExec>()
+    {
+        sort_preserving_merge
+            .input()
+            .output_partitioning()
+            .partition_count()
+            > 1
+    } else {
+        false
+    }
+}
+
+/// Returns a copy of this plan if we change any child according to the pointer comparison.
+/// The size of `children` must be equal to the size of `ExecutionPlan::children()`.
+pub fn with_new_children_if_necessary(
+    plan: Arc<dyn ExecutionPlan>,
+    children: Vec<Arc<dyn ExecutionPlan>>,
+) -> Result<Arc<dyn ExecutionPlan>> {
+    let old_children = plan.children();
+    if children.len() != old_children.len() {
+        internal_err!("Wrong number of children")
+    } else if children.is_empty()
+        || children
+            .iter()
+            .zip(old_children.iter())
+            .any(|(c1, c2)| !Arc::ptr_eq(c1, c2))
+    {
+        plan.with_new_children(children)
+    } else {
+        Ok(plan)
+    }
+}
+
+/// Return a [wrapper](DisplayableExecutionPlan) around an
+/// [`ExecutionPlan`] which can be displayed in various easier to
+/// understand ways.
+pub fn displayable(plan: &dyn ExecutionPlan) -> DisplayableExecutionPlan<'_> {
+    DisplayableExecutionPlan::new(plan)
+}
+
+/// Execute the [ExecutionPlan] and collect the results in memory
+pub async fn collect(
+    plan: Arc<dyn ExecutionPlan>,
+    context: Arc<TaskContext>,
+) -> Result<Vec<RecordBatch>> {
+    let stream = execute_stream(plan, context)?;
+    crate::common::collect(stream).await
+}
+
+/// Execute the [ExecutionPlan] and return a single stream of `RecordBatch`es.
+///
+/// See [collect] to buffer the `RecordBatch`es in memory.
+///
+/// # Aborting Execution
+///
+/// Dropping the stream will abort the execution of the query, and free up
+/// any allocated resources
+pub fn execute_stream(
+    plan: Arc<dyn ExecutionPlan>,
+    context: Arc<TaskContext>,
+) -> Result<SendableRecordBatchStream> {
+    match plan.output_partitioning().partition_count() {
+        0 => Ok(Box::pin(EmptyRecordBatchStream::new(plan.schema()))),
+        1 => plan.execute(0, context),
+        _ => {
+            // merge into a single partition
+            let plan = CoalescePartitionsExec::new(Arc::clone(&plan));
+            // CoalescePartitionsExec must produce a single partition
+            assert_eq!(1, plan.properties().output_partitioning().partition_count());
+            plan.execute(0, context)
+        }
+    }
+}
+
+/// Execute the [ExecutionPlan] and collect the results in memory
+pub async fn collect_partitioned(
+    plan: Arc<dyn ExecutionPlan>,
+    context: Arc<TaskContext>,
+) -> Result<Vec<Vec<RecordBatch>>> {
+    let streams = execute_stream_partitioned(plan, context)?;
+
+    let mut join_set = JoinSet::new();
+    // Execute the plan and collect the results into batches.
+    streams.into_iter().enumerate().for_each(|(idx, stream)| {
+        join_set.spawn(async move {
+            let result: Result<Vec<RecordBatch>> = stream.try_collect().await;
+            (idx, result)
+        });
+    });
+
+    let mut batches = vec![];
+    // Note that currently this doesn't identify the thread that panicked
+    //
+    // TODO: Replace with [join_next_with_id](https://docs.rs/tokio/latest/tokio/task/struct.JoinSet.html#method.join_next_with_id
+    // once it is stable
+    while let Some(result) = join_set.join_next().await {
+        match result {
+            Ok((idx, res)) => batches.push((idx, res?)),
+            Err(e) => {
+                if e.is_panic() {
+                    std::panic::resume_unwind(e.into_panic());
+                } else {
+                    unreachable!();
+                }
+            }
+        }
+    }
+
+    batches.sort_by_key(|(idx, _)| *idx);
+    let batches = batches.into_iter().map(|(_, batch)| batch).collect();
+
+    Ok(batches)
+}
+
+/// Execute the [ExecutionPlan] and return a vec with one stream per output
+/// partition
+///
+/// # Aborting Execution
+///
+/// Dropping the stream will abort the execution of the query, and free up
+/// any allocated resources
+pub fn execute_stream_partitioned(
+    plan: Arc<dyn ExecutionPlan>,
+    context: Arc<TaskContext>,
+) -> Result<Vec<SendableRecordBatchStream>> {
+    let num_partitions = plan.output_partitioning().partition_count();
+    let mut streams = Vec::with_capacity(num_partitions);
+    for i in 0..num_partitions {
+        streams.push(plan.execute(i, Arc::clone(&context))?);
+    }
+    Ok(streams)
+}
+
+/// Executes an input stream and ensures that the resulting stream adheres to
+/// the `not null` constraints specified in the `sink_schema`.
+///
+/// # Arguments
+///
+/// * `input` - An execution plan
+/// * `sink_schema` - The schema to be applied to the output stream
+/// * `partition` - The partition index to be executed
+/// * `context` - The task context
+///
+/// # Returns
+///
+/// * `Result<SendableRecordBatchStream>` - A stream of `RecordBatch`es if successful
+///
+/// This function first executes the given input plan for the specified partition
+/// and context. It then checks if there are any columns in the input that might
+/// violate the `not null` constraints specified in the `sink_schema`. If there are
+/// such columns, it wraps the resulting stream to enforce the `not null` constraints
+/// by invoking the `check_not_null_contraits` function on each batch of the stream.
+pub fn execute_input_stream(
+    input: Arc<dyn ExecutionPlan>,
+    sink_schema: SchemaRef,
+    partition: usize,
+    context: Arc<TaskContext>,
+) -> Result<SendableRecordBatchStream> {
+    let input_stream = input.execute(partition, context)?;
+
+    debug_assert_eq!(sink_schema.fields().len(), input.schema().fields().len());
+
+    // Find input columns that may violate the not null constraint.
+    let risky_columns: Vec<_> = sink_schema
+        .fields()
+        .iter()
+        .zip(input.schema().fields().iter())
+        .enumerate()
+        .filter_map(|(idx, (sink_field, input_field))| {
+            (!sink_field.is_nullable() && input_field.is_nullable()).then_some(idx)
+        })
+        .collect();
+
+    if risky_columns.is_empty() {
+        Ok(input_stream)
+    } else {
+        // Check not null constraint on the input stream
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            sink_schema,
+            input_stream
+                .map(move |batch| check_not_null_contraits(batch?, &risky_columns)),
+        )))
+    }
+}
+
+/// Checks a `RecordBatch` for `not null` constraints on specified columns.
+///
+/// # Arguments
+///
+/// * `batch` - The `RecordBatch` to be checked
+/// * `column_indices` - A vector of column indices that should be checked for
+///   `not null` constraints.
+///
+/// # Returns
+///
+/// * `Result<RecordBatch>` - The original `RecordBatch` if all constraints are met
+///
+/// This function iterates over the specified column indices and ensures that none
+/// of the columns contain null values. If any column contains null values, an error
+/// is returned.
+pub fn check_not_null_contraits(
+    batch: RecordBatch,
+    column_indices: &Vec<usize>,
+) -> Result<RecordBatch> {
+    for &index in column_indices {
+        if batch.num_columns() <= index {
+            return exec_err!(
+                "Invalid batch column count {} expected > {}",
+                batch.num_columns(),
+                index
+            );
+        }
+
+        if batch.column(index).null_count() > 0 {
+            return exec_err!(
+                "Invalid batch column at '{}' has null but schema specifies non-nullable",
+                index
+            );
+        }
+    }
+
+    Ok(batch)
+}
+
+/// Utility function yielding a string representation of the given [`ExecutionPlan`].
+pub fn get_plan_string(plan: &Arc<dyn ExecutionPlan>) -> Vec<String> {
+    let formatted = displayable(plan.as_ref()).indent(true).to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    actual.iter().map(|elem| elem.to_string()).collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::any::Any;
+    use std::sync::Arc;
+
+    use arrow_schema::{Schema, SchemaRef};
+
+    use datafusion_common::{Result, Statistics};
+    use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+
+    use crate::{DisplayAs, DisplayFormatType, ExecutionPlan};
+
+    #[derive(Debug)]
+    pub struct EmptyExec;
+
+    impl EmptyExec {
+        pub fn new(_schema: SchemaRef) -> Self {
+            Self
+        }
+    }
+
+    impl DisplayAs for EmptyExec {
+        fn fmt_as(
+            &self,
+            _t: DisplayFormatType,
+            _f: &mut std::fmt::Formatter,
+        ) -> std::fmt::Result {
+            unimplemented!()
+        }
+    }
+
+    impl ExecutionPlan for EmptyExec {
+        fn name(&self) -> &'static str {
+            Self::static_name()
+        }
+
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        fn properties(&self) -> &PlanProperties {
+            unimplemented!()
+        }
+
+        fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+            vec![]
+        }
+
+        fn with_new_children(
+            self: Arc<Self>,
+            _: Vec<Arc<dyn ExecutionPlan>>,
+        ) -> Result<Arc<dyn ExecutionPlan>> {
+            unimplemented!()
+        }
+
+        fn execute(
+            &self,
+            _partition: usize,
+            _context: Arc<TaskContext>,
+        ) -> Result<SendableRecordBatchStream> {
+            unimplemented!()
+        }
+
+        fn statistics(&self) -> Result<Statistics> {
+            unimplemented!()
+        }
+    }
+
+    #[derive(Debug)]
+    pub struct RenamedEmptyExec;
+
+    impl RenamedEmptyExec {
+        pub fn new(_schema: SchemaRef) -> Self {
+            Self
+        }
+    }
+
+    impl DisplayAs for RenamedEmptyExec {
+        fn fmt_as(
+            &self,
+            _t: DisplayFormatType,
+            _f: &mut std::fmt::Formatter,
+        ) -> std::fmt::Result {
+            unimplemented!()
+        }
+    }
+
+    impl ExecutionPlan for RenamedEmptyExec {
+        fn name(&self) -> &'static str {
+            Self::static_name()
+        }
+
+        fn static_name() -> &'static str
+        where
+            Self: Sized,
+        {
+            "MyRenamedEmptyExec"
+        }
+
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        fn properties(&self) -> &PlanProperties {
+            unimplemented!()
+        }
+
+        fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+            vec![]
+        }
+
+        fn with_new_children(
+            self: Arc<Self>,
+            _: Vec<Arc<dyn ExecutionPlan>>,
+        ) -> Result<Arc<dyn ExecutionPlan>> {
+            unimplemented!()
+        }
+
+        fn execute(
+            &self,
+            _partition: usize,
+            _context: Arc<TaskContext>,
+        ) -> Result<SendableRecordBatchStream> {
+            unimplemented!()
+        }
+
+        fn statistics(&self) -> Result<Statistics> {
+            unimplemented!()
+        }
+    }
+
+    #[test]
+    fn test_execution_plan_name() {
+        let schema1 = Arc::new(Schema::empty());
+        let default_name_exec = EmptyExec::new(schema1);
+        assert_eq!(default_name_exec.name(), "EmptyExec");
+
+        let schema2 = Arc::new(Schema::empty());
+        let renamed_exec = RenamedEmptyExec::new(schema2);
+        assert_eq!(renamed_exec.name(), "MyRenamedEmptyExec");
+        assert_eq!(RenamedEmptyExec::static_name(), "MyRenamedEmptyExec");
+    }
+
+    /// A compilation test to ensure that the `ExecutionPlan::name()` method can
+    /// be called from a trait object.
+    /// Related ticket: https://github.com/apache/datafusion/pull/11047
+    #[allow(dead_code)]
+    fn use_execution_plan_as_trait_object(plan: &dyn ExecutionPlan) {
+        let _ = plan.name();
+    }
+}
+
+// pub mod test;
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 19554d07f7a0f..eeecc017c2afa 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -14,46 +14,36 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+
 // Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+
 #![deny(clippy::clone_on_ref_ptr)]
 
 //! Traits for physical query plan, supporting parallel execution for partitioned relations.
+//!
+//! Entrypoint of this crate is trait [ExecutionPlan].
 
-use std::any::Any;
-use std::fmt::Debug;
-use std::sync::Arc;
-
-use arrow::datatypes::SchemaRef;
-use arrow::record_batch::RecordBatch;
-use futures::stream::{StreamExt, TryStreamExt};
-use tokio::task::JoinSet;
-
-use datafusion_common::config::ConfigOptions;
 pub use datafusion_common::hash_utils;
 pub use datafusion_common::utils::project_schema;
-use datafusion_common::{exec_err, Result};
 pub use datafusion_common::{internal_err, ColumnStatistics, Statistics};
-use datafusion_execution::TaskContext;
 pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
 pub use datafusion_expr::{Accumulator, ColumnarValue};
 pub use datafusion_physical_expr::window::WindowExpr;
+use datafusion_physical_expr::PhysicalSortExpr;
 pub use datafusion_physical_expr::{
     expressions, functions, udf, AggregateExpr, Distribution, Partitioning, PhysicalExpr,
 };
-use datafusion_physical_expr::{
-    EquivalenceProperties, LexOrdering, PhysicalSortExpr, PhysicalSortRequirement,
-};
 
-use crate::coalesce_partitions::CoalescePartitionsExec;
-use crate::display::DisplayableExecutionPlan;
 pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
+pub(crate) use crate::execution_plan::execution_mode_from_children;
+pub use crate::execution_plan::{
+    collect, collect_partitioned, displayable, execute_input_stream, execute_stream,
+    execute_stream_partitioned, get_plan_string, with_new_children_if_necessary,
+    ExecutionMode, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
+};
 pub use crate::metrics::Metric;
-use crate::metrics::MetricsSet;
 pub use crate::ordering::InputOrderMode;
-use crate::repartition::RepartitionExec;
-use crate::sorts::sort_preserving_merge::SortPreservingMergeExec;
 pub use crate::stream::EmptyRecordBatchStream;
-use crate::stream::RecordBatchStreamAdapter;
 pub use crate::topk::TopK;
 pub use crate::visitor::{accept, visit_execution_plan, ExecutionPlanVisitor};
 
@@ -68,6 +58,7 @@ pub mod coalesce_partitions;
 pub mod common;
 pub mod display;
 pub mod empty;
+pub mod execution_plan;
 pub mod explain;
 pub mod filter;
 pub mod insert;
@@ -96,967 +87,5 @@ pub mod udaf {
     };
 }
 
-/// Represent nodes in the DataFusion Physical Plan.
-///
-/// Calling [`execute`] produces an `async` [`SendableRecordBatchStream`] of
-/// [`RecordBatch`] that incrementally computes a partition of the
-/// `ExecutionPlan`'s output from its input. See [`Partitioning`] for more
-/// details on partitioning.
-///
-/// Methods such as [`Self::schema`] and [`Self::properties`] communicate
-/// properties of the output to the DataFusion optimizer, and methods such as
-/// [`required_input_distribution`] and [`required_input_ordering`] express
-/// requirements of the `ExecutionPlan` from its input.
-///
-/// [`ExecutionPlan`] can be displayed in a simplified form using the
-/// return value from [`displayable`] in addition to the (normally
-/// quite verbose) `Debug` output.
-///
-/// [`execute`]: ExecutionPlan::execute
-/// [`required_input_distribution`]: ExecutionPlan::required_input_distribution
-/// [`required_input_ordering`]: ExecutionPlan::required_input_ordering
-pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
-    /// Short name for the ExecutionPlan, such as 'ParquetExec'.
-    ///
-    /// Implementation note: this method can just proxy to
-    /// [`static_name`](ExecutionPlan::static_name) if no special action is
-    /// needed. It doesn't provide a default implementation like that because
-    /// this method doesn't require the `Sized` constrain to allow a wilder
-    /// range of use cases.
-    fn name(&self) -> &str;
-
-    /// Short name for the ExecutionPlan, such as 'ParquetExec'.
-    /// Like [`name`](ExecutionPlan::name) but can be called without an instance.
-    fn static_name() -> &'static str
-    where
-        Self: Sized,
-    {
-        let full_name = std::any::type_name::<Self>();
-        let maybe_start_idx = full_name.rfind(':');
-        match maybe_start_idx {
-            Some(start_idx) => &full_name[start_idx + 1..],
-            None => "UNKNOWN",
-        }
-    }
-
-    /// Returns the execution plan as [`Any`] so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        Arc::clone(self.properties().schema())
-    }
-
-    /// Return properties of the output of the `ExecutionPlan`, such as output
-    /// ordering(s), partitioning information etc.
-    ///
-    /// This information is available via methods on [`ExecutionPlanProperties`]
-    /// trait, which is implemented for all `ExecutionPlan`s.
-    fn properties(&self) -> &PlanProperties;
-
-    /// Specifies the data distribution requirements for all the
-    /// children for this `ExecutionPlan`, By default it's [[Distribution::UnspecifiedDistribution]] for each child,
-    fn required_input_distribution(&self) -> Vec<Distribution> {
-        vec![Distribution::UnspecifiedDistribution; self.children().len()]
-    }
-
-    /// Specifies the ordering required for all of the children of this
-    /// `ExecutionPlan`.
-    ///
-    /// For each child, it's the local ordering requirement within
-    /// each partition rather than the global ordering
-    ///
-    /// NOTE that checking `!is_empty()` does **not** check for a
-    /// required input ordering. Instead, the correct check is that at
-    /// least one entry must be `Some`
-    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
-        vec![None; self.children().len()]
-    }
-
-    /// Returns `false` if this `ExecutionPlan`'s implementation may reorder
-    /// rows within or between partitions.
-    ///
-    /// For example, Projection, Filter, and Limit maintain the order
-    /// of inputs -- they may transform values (Projection) or not
-    /// produce the same number of rows that went in (Filter and
-    /// Limit), but the rows that are produced go in the same way.
-    ///
-    /// DataFusion uses this metadata to apply certain optimizations
-    /// such as automatically repartitioning correctly.
-    ///
-    /// The default implementation returns `false`
-    ///
-    /// WARNING: if you override this default, you *MUST* ensure that
-    /// the `ExecutionPlan`'s maintains the ordering invariant or else
-    /// DataFusion may produce incorrect results.
-    fn maintains_input_order(&self) -> Vec<bool> {
-        vec![false; self.children().len()]
-    }
-
-    /// Specifies whether the `ExecutionPlan` benefits from increased
-    /// parallelization at its input for each child.
-    ///
-    /// If returns `true`, the `ExecutionPlan` would benefit from partitioning
-    /// its corresponding child (and thus from more parallelism). For
-    /// `ExecutionPlan` that do very little work the overhead of extra
-    /// parallelism may outweigh any benefits
-    ///
-    /// The default implementation returns `true` unless this `ExecutionPlan`
-    /// has signalled it requires a single child input partition.
-    fn benefits_from_input_partitioning(&self) -> Vec<bool> {
-        // By default try to maximize parallelism with more CPUs if
-        // possible
-        self.required_input_distribution()
-            .into_iter()
-            .map(|dist| !matches!(dist, Distribution::SinglePartition))
-            .collect()
-    }
-
-    /// Get a list of children `ExecutionPlan`s that act as inputs to this plan.
-    /// The returned list will be empty for leaf nodes such as scans, will contain
-    /// a single value for unary nodes, or two values for binary nodes (such as
-    /// joins).
-    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>>;
-
-    /// Returns a new `ExecutionPlan` where all existing children were replaced
-    /// by the `children`, in order
-    fn with_new_children(
-        self: Arc<Self>,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-
-    /// If supported, attempt to increase the partitioning of this `ExecutionPlan` to
-    /// produce `target_partitions` partitions.
-    ///
-    /// If the `ExecutionPlan` does not support changing its partitioning,
-    /// returns `Ok(None)` (the default).
-    ///
-    /// It is the `ExecutionPlan` can increase its partitioning, but not to the
-    /// `target_partitions`, it may return an ExecutionPlan with fewer
-    /// partitions. This might happen, for example, if each new partition would
-    /// be too small to be efficiently processed individually.
-    ///
-    /// The DataFusion optimizer attempts to use as many threads as possible by
-    /// repartitioning its inputs to match the target number of threads
-    /// available (`target_partitions`). Some data sources, such as the built in
-    /// CSV and Parquet readers, implement this method as they are able to read
-    /// from their input files in parallel, regardless of how the source data is
-    /// split amongst files.
-    fn repartitioned(
-        &self,
-        _target_partitions: usize,
-        _config: &ConfigOptions,
-    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-        Ok(None)
-    }
-
-    /// Begin execution of `partition`, returning a [`Stream`] of
-    /// [`RecordBatch`]es.
-    ///
-    /// # Notes
-    ///
-    /// The `execute` method itself is not `async` but it returns an `async`
-    /// [`futures::stream::Stream`]. This `Stream` should incrementally compute
-    /// the output, `RecordBatch` by `RecordBatch` (in a streaming fashion).
-    /// Most `ExecutionPlan`s should not do any work before the first
-    /// `RecordBatch` is requested from the stream.
-    ///
-    /// [`RecordBatchStreamAdapter`] can be used to convert an `async`
-    /// [`Stream`] into a [`SendableRecordBatchStream`].
-    ///
-    /// Using `async` `Streams` allows for network I/O during execution and
-    /// takes advantage of Rust's built in support for `async` continuations and
-    /// crate ecosystem.
-    ///
-    /// [`Stream`]: futures::stream::Stream
-    /// [`StreamExt`]: futures::stream::StreamExt
-    /// [`TryStreamExt`]: futures::stream::TryStreamExt
-    /// [`RecordBatchStreamAdapter`]: crate::stream::RecordBatchStreamAdapter
-    ///
-    /// # Cancellation / Aborting Execution
-    ///
-    /// The [`Stream`] that is returned must ensure that any allocated resources
-    /// are freed when the stream itself is dropped. This is particularly
-    /// important for [`spawn`]ed tasks or threads. Unless care is taken to
-    /// "abort" such tasks, they may continue to consume resources even after
-    /// the plan is dropped, generating intermediate results that are never
-    /// used.
-    /// Thus, [`spawn`] is disallowed, and instead use [`SpawnedTask`].
-    ///
-    /// For more details see [`SpawnedTask`], [`JoinSet`] and [`RecordBatchReceiverStreamBuilder`]
-    /// for structures to help ensure all background tasks are cancelled.
-    ///
-    /// [`spawn`]: tokio::task::spawn
-    /// [`JoinSet`]: tokio::task::JoinSet
-    /// [`SpawnedTask`]: datafusion_common_runtime::SpawnedTask
-    /// [`RecordBatchReceiverStreamBuilder`]: crate::stream::RecordBatchReceiverStreamBuilder
-    ///
-    /// # Implementation Examples
-    ///
-    /// While `async` `Stream`s have a non trivial learning curve, the
-    /// [`futures`] crate provides [`StreamExt`] and [`TryStreamExt`]
-    /// which help simplify many common operations.
-    ///
-    /// Here are some common patterns:
-    ///
-    /// ## Return Precomputed `RecordBatch`
-    ///
-    /// We can return a precomputed `RecordBatch` as a `Stream`:
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use arrow_array::RecordBatch;
-    /// # use arrow_schema::SchemaRef;
-    /// # use datafusion_common::Result;
-    /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-    /// # use datafusion_physical_plan::memory::MemoryStream;
-    /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
-    /// struct MyPlan {
-    ///     batch: RecordBatch,
-    /// }
-    ///
-    /// impl MyPlan {
-    ///     fn execute(
-    ///         &self,
-    ///         partition: usize,
-    ///         context: Arc<TaskContext>
-    ///     ) -> Result<SendableRecordBatchStream> {
-    ///         // use functions from futures crate convert the batch into a stream
-    ///         let fut = futures::future::ready(Ok(self.batch.clone()));
-    ///         let stream = futures::stream::once(fut);
-    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.batch.schema(), stream)))
-    ///     }
-    /// }
-    /// ```
-    ///
-    /// ## Lazily (async) Compute `RecordBatch`
-    ///
-    /// We can also lazily compute a `RecordBatch` when the returned `Stream` is polled
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use arrow_array::RecordBatch;
-    /// # use arrow_schema::SchemaRef;
-    /// # use datafusion_common::Result;
-    /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-    /// # use datafusion_physical_plan::memory::MemoryStream;
-    /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
-    /// struct MyPlan {
-    ///     schema: SchemaRef,
-    /// }
-    ///
-    /// /// Returns a single batch when the returned stream is polled
-    /// async fn get_batch() -> Result<RecordBatch> {
-    ///     todo!()
-    /// }
-    ///
-    /// impl MyPlan {
-    ///     fn execute(
-    ///         &self,
-    ///         partition: usize,
-    ///         context: Arc<TaskContext>
-    ///     ) -> Result<SendableRecordBatchStream> {
-    ///         let fut = get_batch();
-    ///         let stream = futures::stream::once(fut);
-    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream)))
-    ///     }
-    /// }
-    /// ```
-    ///
-    /// ## Lazily (async) create a Stream
-    ///
-    /// If you need to create the return `Stream` using an `async` function,
-    /// you can do so by flattening the result:
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use arrow_array::RecordBatch;
-    /// # use arrow_schema::SchemaRef;
-    /// # use futures::TryStreamExt;
-    /// # use datafusion_common::Result;
-    /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-    /// # use datafusion_physical_plan::memory::MemoryStream;
-    /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
-    /// struct MyPlan {
-    ///     schema: SchemaRef,
-    /// }
-    ///
-    /// /// async function that returns a stream
-    /// async fn get_batch_stream() -> Result<SendableRecordBatchStream> {
-    ///     todo!()
-    /// }
-    ///
-    /// impl MyPlan {
-    ///     fn execute(
-    ///         &self,
-    ///         partition: usize,
-    ///         context: Arc<TaskContext>
-    ///     ) -> Result<SendableRecordBatchStream> {
-    ///         // A future that yields a stream
-    ///         let fut = get_batch_stream();
-    ///         // Use TryStreamExt::try_flatten to flatten the stream of streams
-    ///         let stream = futures::stream::once(fut).try_flatten();
-    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream)))
-    ///     }
-    /// }
-    /// ```
-    fn execute(
-        &self,
-        partition: usize,
-        context: Arc<TaskContext>,
-    ) -> Result<SendableRecordBatchStream>;
-
-    /// Return a snapshot of the set of [`Metric`]s for this
-    /// [`ExecutionPlan`]. If no `Metric`s are available, return None.
-    ///
-    /// While the values of the metrics in the returned
-    /// [`MetricsSet`]s may change as execution progresses, the
-    /// specific metrics will not.
-    ///
-    /// Once `self.execute()` has returned (technically the future is
-    /// resolved) for all available partitions, the set of metrics
-    /// should be complete. If this function is called prior to
-    /// `execute()` new metrics may appear in subsequent calls.
-    fn metrics(&self) -> Option<MetricsSet> {
-        None
-    }
-
-    /// Returns statistics for this `ExecutionPlan` node. If statistics are not
-    /// available, should return [`Statistics::new_unknown`] (the default), not
-    /// an error.
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema()))
-    }
-
-    /// Returns `true` if a limit can be safely pushed down through this
-    /// `ExecutionPlan` node.
-    ///
-    /// If this method returns `true`, and the query plan contains a limit at
-    /// the output of this node, DataFusion will push the limit to the input
-    /// of this node.
-    fn supports_limit_pushdown(&self) -> bool {
-        false
-    }
-
-    /// Returns a fetching variant of this `ExecutionPlan` node, if it supports
-    /// fetch limits. Returns `None` otherwise.
-    fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
-        None
-    }
-}
-
-/// Extension trait provides an easy API to fetch various properties of
-/// [`ExecutionPlan`] objects based on [`ExecutionPlan::properties`].
-pub trait ExecutionPlanProperties {
-    /// Specifies how the output of this `ExecutionPlan` is split into
-    /// partitions.
-    fn output_partitioning(&self) -> &Partitioning;
-
-    /// Specifies whether this plan generates an infinite stream of records.
-    /// If the plan does not support pipelining, but its input(s) are
-    /// infinite, returns [`ExecutionMode::PipelineBreaking`] to indicate this.
-    fn execution_mode(&self) -> ExecutionMode;
-
-    /// If the output of this `ExecutionPlan` within each partition is sorted,
-    /// returns `Some(keys)` describing the ordering. A `None` return value
-    /// indicates no assumptions should be made on the output ordering.
-    ///
-    /// For example, `SortExec` (obviously) produces sorted output as does
-    /// `SortPreservingMergeStream`. Less obviously, `Projection` produces sorted
-    /// output if its input is sorted as it does not reorder the input rows.
-    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]>;
-
-    /// Get the [`EquivalenceProperties`] within the plan.
-    ///
-    /// Equivalence properties tell DataFusion what columns are known to be
-    /// equal, during various optimization passes. By default, this returns "no
-    /// known equivalences" which is always correct, but may cause DataFusion to
-    /// unnecessarily resort data.
-    ///
-    /// If this ExecutionPlan makes no changes to the schema of the rows flowing
-    /// through it or how columns within each row relate to each other, it
-    /// should return the equivalence properties of its input. For
-    /// example, since `FilterExec` may remove rows from its input, but does not
-    /// otherwise modify them, it preserves its input equivalence properties.
-    /// However, since `ProjectionExec` may calculate derived expressions, it
-    /// needs special handling.
-    ///
-    /// See also [`ExecutionPlan::maintains_input_order`] and [`Self::output_ordering`]
-    /// for related concepts.
-    fn equivalence_properties(&self) -> &EquivalenceProperties;
-}
-
-impl ExecutionPlanProperties for Arc<dyn ExecutionPlan> {
-    fn output_partitioning(&self) -> &Partitioning {
-        self.properties().output_partitioning()
-    }
-
-    fn execution_mode(&self) -> ExecutionMode {
-        self.properties().execution_mode()
-    }
-
-    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
-        self.properties().output_ordering()
-    }
-
-    fn equivalence_properties(&self) -> &EquivalenceProperties {
-        self.properties().equivalence_properties()
-    }
-}
-
-impl ExecutionPlanProperties for &dyn ExecutionPlan {
-    fn output_partitioning(&self) -> &Partitioning {
-        self.properties().output_partitioning()
-    }
-
-    fn execution_mode(&self) -> ExecutionMode {
-        self.properties().execution_mode()
-    }
-
-    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
-        self.properties().output_ordering()
-    }
-
-    fn equivalence_properties(&self) -> &EquivalenceProperties {
-        self.properties().equivalence_properties()
-    }
-}
-
-/// Describes the execution mode of an operator's resulting stream with respect
-/// to its size and behavior. There are three possible execution modes: `Bounded`,
-/// `Unbounded` and `PipelineBreaking`.
-#[derive(Clone, Copy, PartialEq, Debug)]
-pub enum ExecutionMode {
-    /// Represents the mode where generated stream is bounded, e.g. finite.
-    Bounded,
-    /// Represents the mode where generated stream is unbounded, e.g. infinite.
-    /// Even though the operator generates an unbounded stream of results, it
-    /// works with bounded memory and execution can still continue successfully.
-    ///
-    /// The stream that results from calling `execute` on an `ExecutionPlan` that is `Unbounded`
-    /// will never be done (return `None`), except in case of error.
-    Unbounded,
-    /// Represents the mode where some of the operator's input stream(s) are
-    /// unbounded; however, the operator cannot generate streaming results from
-    /// these streaming inputs. In this case, the execution mode will be pipeline
-    /// breaking, e.g. the operator requires unbounded memory to generate results.
-    PipelineBreaking,
-}
-
-impl ExecutionMode {
-    /// Check whether the execution mode is unbounded or not.
-    pub fn is_unbounded(&self) -> bool {
-        matches!(self, ExecutionMode::Unbounded)
-    }
-
-    /// Check whether the execution is pipeline friendly. If so, operator can
-    /// execute safely.
-    pub fn pipeline_friendly(&self) -> bool {
-        matches!(self, ExecutionMode::Bounded | ExecutionMode::Unbounded)
-    }
-}
-
-/// Conservatively "combines" execution modes of a given collection of operators.
-fn execution_mode_from_children<'a>(
-    children: impl IntoIterator<Item = &'a Arc<dyn ExecutionPlan>>,
-) -> ExecutionMode {
-    let mut result = ExecutionMode::Bounded;
-    for mode in children.into_iter().map(|child| child.execution_mode()) {
-        match (mode, result) {
-            (ExecutionMode::PipelineBreaking, _)
-            | (_, ExecutionMode::PipelineBreaking) => {
-                // If any of the modes is `PipelineBreaking`, so is the result:
-                return ExecutionMode::PipelineBreaking;
-            }
-            (ExecutionMode::Unbounded, _) | (_, ExecutionMode::Unbounded) => {
-                // Unbounded mode eats up bounded mode:
-                result = ExecutionMode::Unbounded;
-            }
-            (ExecutionMode::Bounded, ExecutionMode::Bounded) => {
-                // When both modes are bounded, so is the result:
-                result = ExecutionMode::Bounded;
-            }
-        }
-    }
-    result
-}
-
-/// Stores certain, often expensive to compute, plan properties used in query
-/// optimization.
-///
-/// These properties are stored a single structure to permit this information to
-/// be computed once and then those cached results used multiple times without
-/// recomputation (aka a cache)
-#[derive(Debug, Clone)]
-pub struct PlanProperties {
-    /// See [ExecutionPlanProperties::equivalence_properties]
-    pub eq_properties: EquivalenceProperties,
-    /// See [ExecutionPlanProperties::output_partitioning]
-    pub partitioning: Partitioning,
-    /// See [ExecutionPlanProperties::execution_mode]
-    pub execution_mode: ExecutionMode,
-    /// See [ExecutionPlanProperties::output_ordering]
-    output_ordering: Option<LexOrdering>,
-}
-
-impl PlanProperties {
-    /// Construct a new `PlanPropertiesCache` from the
-    pub fn new(
-        eq_properties: EquivalenceProperties,
-        partitioning: Partitioning,
-        execution_mode: ExecutionMode,
-    ) -> Self {
-        // Output ordering can be derived from `eq_properties`.
-        let output_ordering = eq_properties.output_ordering();
-        Self {
-            eq_properties,
-            partitioning,
-            execution_mode,
-            output_ordering,
-        }
-    }
-
-    /// Overwrite output partitioning with its new value.
-    pub fn with_partitioning(mut self, partitioning: Partitioning) -> Self {
-        self.partitioning = partitioning;
-        self
-    }
-
-    /// Overwrite the execution Mode with its new value.
-    pub fn with_execution_mode(mut self, execution_mode: ExecutionMode) -> Self {
-        self.execution_mode = execution_mode;
-        self
-    }
-
-    /// Overwrite equivalence properties with its new value.
-    pub fn with_eq_properties(mut self, eq_properties: EquivalenceProperties) -> Self {
-        // Changing equivalence properties also changes output ordering, so
-        // make sure to overwrite it:
-        self.output_ordering = eq_properties.output_ordering();
-        self.eq_properties = eq_properties;
-        self
-    }
-
-    pub fn equivalence_properties(&self) -> &EquivalenceProperties {
-        &self.eq_properties
-    }
-
-    pub fn output_partitioning(&self) -> &Partitioning {
-        &self.partitioning
-    }
-
-    pub fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
-        self.output_ordering.as_deref()
-    }
-
-    pub fn execution_mode(&self) -> ExecutionMode {
-        self.execution_mode
-    }
-
-    /// Get schema of the node.
-    fn schema(&self) -> &SchemaRef {
-        self.eq_properties.schema()
-    }
-}
-
-/// Indicate whether a data exchange is needed for the input of `plan`, which will be very helpful
-/// especially for the distributed engine to judge whether need to deal with shuffling.
-/// Currently there are 3 kinds of execution plan which needs data exchange
-///     1. RepartitionExec for changing the partition number between two `ExecutionPlan`s
-///     2. CoalescePartitionsExec for collapsing all of the partitions into one without ordering guarantee
-///     3. SortPreservingMergeExec for collapsing all of the sorted partitions into one with ordering guarantee
-pub fn need_data_exchange(plan: Arc<dyn ExecutionPlan>) -> bool {
-    if let Some(repartition) = plan.as_any().downcast_ref::<RepartitionExec>() {
-        !matches!(
-            repartition.properties().output_partitioning(),
-            Partitioning::RoundRobinBatch(_)
-        )
-    } else if let Some(coalesce) = plan.as_any().downcast_ref::<CoalescePartitionsExec>()
-    {
-        coalesce.input().output_partitioning().partition_count() > 1
-    } else if let Some(sort_preserving_merge) =
-        plan.as_any().downcast_ref::<SortPreservingMergeExec>()
-    {
-        sort_preserving_merge
-            .input()
-            .output_partitioning()
-            .partition_count()
-            > 1
-    } else {
-        false
-    }
-}
-
-/// Returns a copy of this plan if we change any child according to the pointer comparison.
-/// The size of `children` must be equal to the size of `ExecutionPlan::children()`.
-pub fn with_new_children_if_necessary(
-    plan: Arc<dyn ExecutionPlan>,
-    children: Vec<Arc<dyn ExecutionPlan>>,
-) -> Result<Arc<dyn ExecutionPlan>> {
-    let old_children = plan.children();
-    if children.len() != old_children.len() {
-        internal_err!("Wrong number of children")
-    } else if children.is_empty()
-        || children
-            .iter()
-            .zip(old_children.iter())
-            .any(|(c1, c2)| !Arc::ptr_eq(c1, c2))
-    {
-        plan.with_new_children(children)
-    } else {
-        Ok(plan)
-    }
-}
-
-/// Return a [wrapper](DisplayableExecutionPlan) around an
-/// [`ExecutionPlan`] which can be displayed in various easier to
-/// understand ways.
-pub fn displayable(plan: &dyn ExecutionPlan) -> DisplayableExecutionPlan<'_> {
-    DisplayableExecutionPlan::new(plan)
-}
-
-/// Execute the [ExecutionPlan] and collect the results in memory
-pub async fn collect(
-    plan: Arc<dyn ExecutionPlan>,
-    context: Arc<TaskContext>,
-) -> Result<Vec<RecordBatch>> {
-    let stream = execute_stream(plan, context)?;
-    common::collect(stream).await
-}
-
-/// Execute the [ExecutionPlan] and return a single stream of `RecordBatch`es.
-///
-/// See [collect] to buffer the `RecordBatch`es in memory.
-///
-/// # Aborting Execution
-///
-/// Dropping the stream will abort the execution of the query, and free up
-/// any allocated resources
-pub fn execute_stream(
-    plan: Arc<dyn ExecutionPlan>,
-    context: Arc<TaskContext>,
-) -> Result<SendableRecordBatchStream> {
-    match plan.output_partitioning().partition_count() {
-        0 => Ok(Box::pin(EmptyRecordBatchStream::new(plan.schema()))),
-        1 => plan.execute(0, context),
-        _ => {
-            // merge into a single partition
-            let plan = CoalescePartitionsExec::new(Arc::clone(&plan));
-            // CoalescePartitionsExec must produce a single partition
-            assert_eq!(1, plan.properties().output_partitioning().partition_count());
-            plan.execute(0, context)
-        }
-    }
-}
-
-/// Execute the [ExecutionPlan] and collect the results in memory
-pub async fn collect_partitioned(
-    plan: Arc<dyn ExecutionPlan>,
-    context: Arc<TaskContext>,
-) -> Result<Vec<Vec<RecordBatch>>> {
-    let streams = execute_stream_partitioned(plan, context)?;
-
-    let mut join_set = JoinSet::new();
-    // Execute the plan and collect the results into batches.
-    streams.into_iter().enumerate().for_each(|(idx, stream)| {
-        join_set.spawn(async move {
-            let result: Result<Vec<RecordBatch>> = stream.try_collect().await;
-            (idx, result)
-        });
-    });
-
-    let mut batches = vec![];
-    // Note that currently this doesn't identify the thread that panicked
-    //
-    // TODO: Replace with [join_next_with_id](https://docs.rs/tokio/latest/tokio/task/struct.JoinSet.html#method.join_next_with_id
-    // once it is stable
-    while let Some(result) = join_set.join_next().await {
-        match result {
-            Ok((idx, res)) => batches.push((idx, res?)),
-            Err(e) => {
-                if e.is_panic() {
-                    std::panic::resume_unwind(e.into_panic());
-                } else {
-                    unreachable!();
-                }
-            }
-        }
-    }
-
-    batches.sort_by_key(|(idx, _)| *idx);
-    let batches = batches.into_iter().map(|(_, batch)| batch).collect();
-
-    Ok(batches)
-}
-
-/// Execute the [ExecutionPlan] and return a vec with one stream per output
-/// partition
-///
-/// # Aborting Execution
-///
-/// Dropping the stream will abort the execution of the query, and free up
-/// any allocated resources
-pub fn execute_stream_partitioned(
-    plan: Arc<dyn ExecutionPlan>,
-    context: Arc<TaskContext>,
-) -> Result<Vec<SendableRecordBatchStream>> {
-    let num_partitions = plan.output_partitioning().partition_count();
-    let mut streams = Vec::with_capacity(num_partitions);
-    for i in 0..num_partitions {
-        streams.push(plan.execute(i, Arc::clone(&context))?);
-    }
-    Ok(streams)
-}
-
-/// Executes an input stream and ensures that the resulting stream adheres to
-/// the `not null` constraints specified in the `sink_schema`.
-///
-/// # Arguments
-///
-/// * `input` - An execution plan
-/// * `sink_schema` - The schema to be applied to the output stream
-/// * `partition` - The partition index to be executed
-/// * `context` - The task context
-///
-/// # Returns
-///
-/// * `Result<SendableRecordBatchStream>` - A stream of `RecordBatch`es if successful
-///
-/// This function first executes the given input plan for the specified partition
-/// and context. It then checks if there are any columns in the input that might
-/// violate the `not null` constraints specified in the `sink_schema`. If there are
-/// such columns, it wraps the resulting stream to enforce the `not null` constraints
-/// by invoking the `check_not_null_contraits` function on each batch of the stream.
-pub fn execute_input_stream(
-    input: Arc<dyn ExecutionPlan>,
-    sink_schema: SchemaRef,
-    partition: usize,
-    context: Arc<TaskContext>,
-) -> Result<SendableRecordBatchStream> {
-    let input_stream = input.execute(partition, context)?;
-
-    debug_assert_eq!(sink_schema.fields().len(), input.schema().fields().len());
-
-    // Find input columns that may violate the not null constraint.
-    let risky_columns: Vec<_> = sink_schema
-        .fields()
-        .iter()
-        .zip(input.schema().fields().iter())
-        .enumerate()
-        .filter_map(|(idx, (sink_field, input_field))| {
-            (!sink_field.is_nullable() && input_field.is_nullable()).then_some(idx)
-        })
-        .collect();
-
-    if risky_columns.is_empty() {
-        Ok(input_stream)
-    } else {
-        // Check not null constraint on the input stream
-        Ok(Box::pin(RecordBatchStreamAdapter::new(
-            sink_schema,
-            input_stream
-                .map(move |batch| check_not_null_contraits(batch?, &risky_columns)),
-        )))
-    }
-}
-
-/// Checks a `RecordBatch` for `not null` constraints on specified columns.
-///
-/// # Arguments
-///
-/// * `batch` - The `RecordBatch` to be checked
-/// * `column_indices` - A vector of column indices that should be checked for
-///   `not null` constraints.
-///
-/// # Returns
-///
-/// * `Result<RecordBatch>` - The original `RecordBatch` if all constraints are met
-///
-/// This function iterates over the specified column indices and ensures that none
-/// of the columns contain null values. If any column contains null values, an error
-/// is returned.
-pub fn check_not_null_contraits(
-    batch: RecordBatch,
-    column_indices: &Vec<usize>,
-) -> Result<RecordBatch> {
-    for &index in column_indices {
-        if batch.num_columns() <= index {
-            return exec_err!(
-                "Invalid batch column count {} expected > {}",
-                batch.num_columns(),
-                index
-            );
-        }
-
-        if batch.column(index).null_count() > 0 {
-            return exec_err!(
-                "Invalid batch column at '{}' has null but schema specifies non-nullable",
-                index
-            );
-        }
-    }
-
-    Ok(batch)
-}
-
-/// Utility function yielding a string representation of the given [`ExecutionPlan`].
-pub fn get_plan_string(plan: &Arc<dyn ExecutionPlan>) -> Vec<String> {
-    let formatted = displayable(plan.as_ref()).indent(true).to_string();
-    let actual: Vec<&str> = formatted.trim().lines().collect();
-    actual.iter().map(|elem| elem.to_string()).collect()
-}
-
 #[cfg(test)]
-mod tests {
-    use std::any::Any;
-    use std::sync::Arc;
-
-    use arrow_schema::{Schema, SchemaRef};
-
-    use datafusion_common::{Result, Statistics};
-    use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-
-    use crate::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties};
-
-    #[derive(Debug)]
-    pub struct EmptyExec;
-
-    impl EmptyExec {
-        pub fn new(_schema: SchemaRef) -> Self {
-            Self
-        }
-    }
-
-    impl DisplayAs for EmptyExec {
-        fn fmt_as(
-            &self,
-            _t: DisplayFormatType,
-            _f: &mut std::fmt::Formatter,
-        ) -> std::fmt::Result {
-            unimplemented!()
-        }
-    }
-
-    impl ExecutionPlan for EmptyExec {
-        fn name(&self) -> &'static str {
-            Self::static_name()
-        }
-
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
-
-        fn properties(&self) -> &PlanProperties {
-            unimplemented!()
-        }
-
-        fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
-            vec![]
-        }
-
-        fn with_new_children(
-            self: Arc<Self>,
-            _: Vec<Arc<dyn ExecutionPlan>>,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            unimplemented!()
-        }
-
-        fn execute(
-            &self,
-            _partition: usize,
-            _context: Arc<TaskContext>,
-        ) -> Result<SendableRecordBatchStream> {
-            unimplemented!()
-        }
-
-        fn statistics(&self) -> Result<Statistics> {
-            unimplemented!()
-        }
-    }
-
-    #[derive(Debug)]
-    pub struct RenamedEmptyExec;
-
-    impl RenamedEmptyExec {
-        pub fn new(_schema: SchemaRef) -> Self {
-            Self
-        }
-    }
-
-    impl DisplayAs for RenamedEmptyExec {
-        fn fmt_as(
-            &self,
-            _t: DisplayFormatType,
-            _f: &mut std::fmt::Formatter,
-        ) -> std::fmt::Result {
-            unimplemented!()
-        }
-    }
-
-    impl ExecutionPlan for RenamedEmptyExec {
-        fn name(&self) -> &'static str {
-            Self::static_name()
-        }
-
-        fn static_name() -> &'static str
-        where
-            Self: Sized,
-        {
-            "MyRenamedEmptyExec"
-        }
-
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
-
-        fn properties(&self) -> &PlanProperties {
-            unimplemented!()
-        }
-
-        fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
-            vec![]
-        }
-
-        fn with_new_children(
-            self: Arc<Self>,
-            _: Vec<Arc<dyn ExecutionPlan>>,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            unimplemented!()
-        }
-
-        fn execute(
-            &self,
-            _partition: usize,
-            _context: Arc<TaskContext>,
-        ) -> Result<SendableRecordBatchStream> {
-            unimplemented!()
-        }
-
-        fn statistics(&self) -> Result<Statistics> {
-            unimplemented!()
-        }
-    }
-
-    #[test]
-    fn test_execution_plan_name() {
-        let schema1 = Arc::new(Schema::empty());
-        let default_name_exec = EmptyExec::new(schema1);
-        assert_eq!(default_name_exec.name(), "EmptyExec");
-
-        let schema2 = Arc::new(Schema::empty());
-        let renamed_exec = RenamedEmptyExec::new(schema2);
-        assert_eq!(renamed_exec.name(), "MyRenamedEmptyExec");
-        assert_eq!(RenamedEmptyExec::static_name(), "MyRenamedEmptyExec");
-    }
-
-    /// A compilation test to ensure that the `ExecutionPlan::name()` method can
-    /// be called from a trait object.
-    /// Related ticket: https://github.com/apache/datafusion/pull/11047
-    #[allow(dead_code)]
-    fn use_execution_plan_as_trait_object(plan: &dyn ExecutionPlan) {
-        let _ = plan.name();
-    }
-}
-
 pub mod test;

From b89037f0d4924f42712d9940dfa3fc5a66903f89 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Fri, 2 Aug 2024 20:05:37 +0200
Subject: [PATCH 203/357] Add references to github issue (#11784)

---
 datafusion/core/src/datasource/listing/table.rs         | 4 ++--
 datafusion/core/src/datasource/listing_table_factory.rs | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 72c6e0d84c049..80f49e4eb8e62 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -743,7 +743,7 @@ impl TableProvider for ListingTable {
         filters: &[Expr],
         limit: Option<usize>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        // TODO remove downcast_ref from here?
+        // TODO (https://github.com/apache/datafusion/issues/11600) remove downcast_ref from here?
         let session_state = state.as_any().downcast_ref::<SessionState>().unwrap();
         let (mut partitioned_file_lists, statistics) = self
             .list_files_for_scan(session_state, filters, limit)
@@ -883,7 +883,7 @@ impl TableProvider for ListingTable {
         // Get the object store for the table path.
         let store = state.runtime_env().object_store(table_path)?;
 
-        // TODO remove downcast_ref from here?
+        // TODO (https://github.com/apache/datafusion/issues/11600) remove downcast_ref from here?
         let session_state = state.as_any().downcast_ref::<SessionState>().unwrap();
         let file_list_stream = pruned_partition_list(
             session_state,
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index ce52dd98166e2..591a19aab49b4 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -52,7 +52,7 @@ impl TableProviderFactory for ListingTableFactory {
         state: &dyn Session,
         cmd: &CreateExternalTable,
     ) -> Result<Arc<dyn TableProvider>> {
-        // TODO remove downcast_ref from here. Should file format factory be an extension to session state?
+        // TODO (https://github.com/apache/datafusion/issues/11600) remove downcast_ref from here. Should file format factory be an extension to session state?
         let session_state = state.as_any().downcast_ref::<SessionState>().unwrap();
         let file_format = session_state
             .get_file_format_factory(cmd.file_type.as_str())

From de2da34cc51265e6a34864db3b1ef0764b89d631 Mon Sep 17 00:00:00 2001
From: Matthew Turner <matthew.m.turner@outlook.com>
Date: Fri, 2 Aug 2024 20:33:42 -0400
Subject: [PATCH 204/357] Add docs and rename param (#11778)

---
 datafusion/expr/src/signature.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs
index eadd7ac2f83fb..b1cec3bad774b 100644
--- a/datafusion/expr/src/signature.rs
+++ b/datafusion/expr/src/signature.rs
@@ -266,9 +266,10 @@ impl Signature {
         }
     }
 
-    pub fn numeric(num: usize, volatility: Volatility) -> Self {
+    /// A specified number of numeric arguments
+    pub fn numeric(arg_count: usize, volatility: Volatility) -> Self {
         Self {
-            type_signature: TypeSignature::Numeric(num),
+            type_signature: TypeSignature::Numeric(arg_count),
             volatility,
         }
     }

From 6c4c2461258fb35dab5f2dd93e5465be1d48a123 Mon Sep 17 00:00:00 2001
From: Shehab Amin <11789402+shehabgamin@users.noreply.github.com>
Date: Sat, 3 Aug 2024 10:29:27 +0000
Subject: [PATCH 205/357] Doc: Add Sail to known users list (#11791)

* doc: Add Sail to known users list

* Update docs/source/user-guide/introduction.md

Co-authored-by: Heran Lin <linhr10@gmail.com>

---------

Co-authored-by: Heran Lin <linhr10@gmail.com>
---
 docs/source/user-guide/introduction.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/user-guide/introduction.md b/docs/source/user-guide/introduction.md
index 3a39419236d84..8f8983061eb69 100644
--- a/docs/source/user-guide/introduction.md
+++ b/docs/source/user-guide/introduction.md
@@ -114,6 +114,7 @@ Here are some active projects using DataFusion:
 - [qv](https://github.com/timvw/qv) Quickly view your data
 - [Restate](https://github.com/restatedev) Easily build resilient applications using distributed durable async/await
 - [ROAPI](https://github.com/roapi/roapi)
+- [Sail](https://github.com/lakehq/sail) Unifying stream, batch, and AI workloads with Apache Spark compatibility
 - [Seafowl](https://github.com/splitgraph/seafowl) CDN-friendly analytical database
 - [Spice.ai](https://github.com/spiceai/spiceai) Unified SQL query interface & materialization engine
 - [Synnada](https://synnada.ai/) Streaming-first framework for data products

From 81668f3b2d6448824e87d5224b43f5e465e5af10 Mon Sep 17 00:00:00 2001
From: Jax Liu <liugs963@gmail.com>
Date: Sat, 3 Aug 2024 18:32:05 +0800
Subject: [PATCH 206/357] Support planning `Map` literal (#11780)

* support map literal syntax

* address comment and enhance tests

* align error messages

* add issue references

* using iterators collecting to build the exprs

* cargo fmt
---
 datafusion/sql/src/expr/mod.rs             |  31 ++++-
 datafusion/sqllogictest/test_files/map.slt | 149 +++++++++++++++++++++
 2 files changed, 177 insertions(+), 3 deletions(-)

diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 71ff7c03bea2f..b80ffb6aed3ff 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -21,8 +21,8 @@ use datafusion_expr::planner::PlannerResult;
 use datafusion_expr::planner::RawDictionaryExpr;
 use datafusion_expr::planner::RawFieldAccessExpr;
 use sqlparser::ast::{
-    CastKind, DictionaryField, Expr as SQLExpr, StructField, Subscript, TrimWhereField,
-    Value,
+    CastKind, DictionaryField, Expr as SQLExpr, MapEntry, StructField, Subscript,
+    TrimWhereField, Value,
 };
 
 use datafusion_common::{
@@ -628,6 +628,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             SQLExpr::Dictionary(fields) => {
                 self.try_plan_dictionary_literal(fields, schema, planner_context)
             }
+            SQLExpr::Map(map) => {
+                self.try_plan_map_literal(map.entries, schema, planner_context)
+            }
             _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
         }
     }
@@ -711,7 +714,29 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 PlannerResult::Original(expr) => raw_expr = expr,
             }
         }
-        not_impl_err!("Unsupported dictionary literal: {raw_expr:?}")
+        not_impl_err!("Dictionary not supported by ExprPlanner: {raw_expr:?}")
+    }
+
+    fn try_plan_map_literal(
+        &self,
+        entries: Vec<MapEntry>,
+        schema: &DFSchema,
+        planner_context: &mut PlannerContext,
+    ) -> Result<Expr> {
+        let mut exprs: Vec<_> = entries
+            .into_iter()
+            .flat_map(|entry| vec![entry.key, entry.value].into_iter())
+            .map(|expr| self.sql_expr_to_logical_expr(*expr, schema, planner_context))
+            .collect::<Result<Vec<_>>>()?;
+        for planner in self.context_provider.get_expr_planners() {
+            match planner.plan_make_map(exprs)? {
+                PlannerResult::Planned(expr) => {
+                    return Ok(expr);
+                }
+                PlannerResult::Original(expr) => exprs = expr,
+            }
+        }
+        not_impl_err!("MAP not supported by ExprPlanner: {exprs:?}")
     }
 
     // Handles a call to struct(...) where the arguments are named. For example
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index e530e14df66ea..11998eea90440 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -310,3 +310,152 @@ VALUES (MAP(['a'], [1])), (MAP(['b'], [2])), (MAP(['c', 'a'], [3, 1]))
 {a: 1}
 {b: 2}
 {c: 3, a: 1}
+
+query ?
+SELECT MAP {'a':1, 'b':2, 'c':3};
+----
+{a: 1, b: 2, c: 3}
+
+query ?
+SELECT MAP {'a':1, 'b':2, 'c':3 } FROM t;
+----
+{a: 1, b: 2, c: 3}
+{a: 1, b: 2, c: 3}
+{a: 1, b: 2, c: 3}
+
+query I
+SELECT MAP {'a':1, 'b':2, 'c':3}['a'];
+----
+1
+
+query I
+SELECT MAP {'a':1, 'b':2, 'c':3 }['a'] FROM t;
+----
+1
+1
+1
+
+# TODO(https://github.com/sqlparser-rs/sqlparser-rs/pull/1361): support parsing an empty map. Enable this after upgrading sqlparser-rs.
+# query ?
+# SELECT MAP {};
+# ----
+# {}
+
+# values contain null
+query ?
+SELECT MAP {'a': 1, 'b': null};
+----
+{a: 1, b: }
+
+# keys contain null
+query error DataFusion error: Execution error: map key cannot be null
+SELECT MAP {'a': 1, null: 2}
+
+# array as key
+query ?
+SELECT MAP {[1,2,3]:1, [2,4]:2};
+----
+ {[1, 2, 3]: 1, [2, 4]: 2}
+
+# array with different type as key
+# expect to fail due to type coercion error
+query error
+SELECT MAP {[1,2,3]:1, ['a', 'b']:2};
+
+# array as value
+query ?
+SELECT MAP {'a':[1,2,3], 'b':[2,4]};
+----
+{a: [1, 2, 3], b: [2, 4]}
+
+# array with different type as value
+# expect to fail due to type coercion error
+query error
+SELECT MAP {'a':[1,2,3], 'b':['a', 'b']};
+
+# struct as key
+query ?
+SELECT MAP {{'a':1, 'b':2}:1, {'a':3, 'b':4}:2};
+----
+{{a: 1, b: 2}: 1, {a: 3, b: 4}: 2}
+
+# struct with different fields as key
+# expect to fail due to type coercion error
+query error
+SELECT MAP {{'a':1, 'b':2}:1, {'c':3, 'd':4}:2};
+
+# struct as value
+query ?
+SELECT MAP {'a':{'b':1, 'c':2}, 'b':{'b':3, 'c':4}};
+----
+{a: {b: 1, c: 2}, b: {b: 3, c: 4}}
+
+# struct with different fields as value
+# expect to fail due to type coercion error
+query error
+SELECT MAP {'a':{'b':1, 'c':2}, 'b':{'c':3, 'd':4}};
+
+# map as key
+query ?
+SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 };
+----
+{{1: a, 2: b}: 1, {1: c, 2: d}: 2}
+
+# map with different keys as key
+query ?
+SELECT MAP { MAP {1:'a', 2:'b', 3:'c'}:1, MAP {2:'c', 4:'d'}:2 };
+----
+ {{1: a, 2: b, 3: c}: 1, {2: c, 4: d}: 2}
+
+# map as value
+query ?
+SELECT MAP {1: MAP {1:'a', 2:'b'}, 2: MAP {1:'c', 2:'d'} };
+----
+{1: {1: a, 2: b}, 2: {1: c, 2: d}}
+
+# map with different keys as value
+query ?
+SELECT MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} };
+----
+{a: {1: a, 2: b, 3: c}, b: {2: c, 4: d}}
+
+# complex map for each row
+query ?
+SELECT MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} } from t;
+----
+{a: {1: a, 2: b, 3: c}, b: {2: c, 4: d}}
+{a: {1: a, 2: b, 3: c}, b: {2: c, 4: d}}
+{a: {1: a, 2: b, 3: c}, b: {2: c, 4: d}}
+
+# access map with non-existent key
+query ?
+SELECT MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} }['c'];
+----
+NULL
+
+# access map with null key
+query error
+SELECT MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} }[NULL];
+
+query ?
+SELECT MAP { 'a': 1, 2: 3 };
+----
+{a: 1, 2: 3}
+
+# TODO(https://github.com/apache/datafusion/issues/11785): fix accessing map with non-string key
+# query ?
+# SELECT MAP { 1: 'a', 2: 'b', 3: 'c' }[1];
+# ----
+# a
+
+# TODO(https://github.com/apache/datafusion/issues/11785): fix accessing map with non-string key
+# query ?
+# SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }[MAP {1:'a', 2:'b'}];
+# ----
+# 1
+
+# TODO(https://github.com/apache/datafusion/issues/11785): fix accessing map with non-string key
+# query ?
+# SELECT MAKE_MAP(1, null, 2, 33, 3, null)[2];
+# ----
+# 33

From 9e90e17a61444b77d7d0de416a33857d1a5105b5 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Sat, 3 Aug 2024 03:57:52 -0700
Subject: [PATCH 207/357] fix: Add additional required expression for natural
 join (#11713)

* adding test file

* replace expand_wildcard

* refine test

---------

Co-authored-by: jonahgao <jonahgao@msn.com>
---
 datafusion/expr/src/logical_plan/builder.rs |  7 ++-
 datafusion/sqllogictest/test_files/join.slt | 49 ++++++++++++++++++++-
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 98e262f0b187b..736310c7ac0f8 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1531,7 +1531,12 @@ pub fn wrap_projection_for_join_if_necessary(
 
     let need_project = join_keys.iter().any(|key| !matches!(key, Expr::Column(_)));
     let plan = if need_project {
-        let mut projection = expand_wildcard(input_schema, &input, None)?;
+        // Include all columns from the input and extend them with the join keys
+        let mut projection = input_schema
+            .columns()
+            .into_iter()
+            .map(Expr::Column)
+            .collect::<Vec<_>>();
         let join_key_items = alias_join_keys
             .iter()
             .flat_map(|expr| expr.try_as_col().is_none().then_some(expr))
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index 29f1c4384daff..21fea4ad10255 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -1130,4 +1130,51 @@ SELECT * FROM
   (SELECT * FROM t1 CROSS JOIN t2)
 WHERE t1.a + t2.a IS NULL;
 ----
-NULL NULL
\ No newline at end of file
+NULL NULL
+
+statement ok
+CREATE TABLE t5(v0 BIGINT, v1 STRING, v2 BIGINT, v3 STRING, v4 BOOLEAN);
+
+statement ok
+CREATE TABLE t1(v0 BIGINT, v1 STRING);
+
+statement ok
+CREATE TABLE t0(v0 BIGINT, v1 DOUBLE);
+
+query TT
+explain SELECT *
+FROM t1
+NATURAL JOIN t5
+INNER JOIN t0 ON (t0.v1 + t5.v0) > 0
+WHERE t0.v1 = t1.v0;
+----
+logical_plan
+01)Projection: t1.v0, t1.v1, t5.v2, t5.v3, t5.v4, t0.v0, t0.v1
+02)--Inner Join: CAST(t1.v0 AS Float64) = t0.v1 Filter: t0.v1 + CAST(t5.v0 AS Float64) > Float64(0)
+03)----Projection: t1.v0, t1.v1, t5.v0, t5.v2, t5.v3, t5.v4
+04)------Inner Join: Using t1.v0 = t5.v0, t1.v1 = t5.v1
+05)--------TableScan: t1 projection=[v0, v1]
+06)--------TableScan: t5 projection=[v0, v1, v2, v3, v4]
+07)----TableScan: t0 projection=[v0, v1]
+physical_plan
+01)CoalesceBatchesExec: target_batch_size=8192
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(CAST(t1.v0 AS Float64)@6, v1@1)], filter=v1@1 + CAST(v0@0 AS Float64) > 0, projection=[v0@0, v1@1, v2@3, v3@4, v4@5, v0@7, v1@8]
+03)----CoalescePartitionsExec
+04)------ProjectionExec: expr=[v0@0 as v0, v1@1 as v1, v0@2 as v0, v2@3 as v2, v3@4 as v3, v4@5 as v4, CAST(v0@0 AS Float64) as CAST(t1.v0 AS Float64)]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(v0@0, v0@0), (v1@1, v1@1)], projection=[v0@0, v1@1, v0@2, v2@4, v3@5, v4@6]
+08)--------------MemoryExec: partitions=1, partition_sizes=[0]
+09)--------------MemoryExec: partitions=1, partition_sizes=[0]
+10)----MemoryExec: partitions=1, partition_sizes=[0]
+
+
+
+statement ok
+drop table t5;
+
+statement ok
+drop table t1;
+
+statement ok
+drop table t0;

From f4e519f9df9ab5972638d3f2743da01887a52668 Mon Sep 17 00:00:00 2001
From: Edmondo Porcu <edmondo.porcu@gmail.com>
Date: Sat, 3 Aug 2024 07:18:10 -0400
Subject: [PATCH 208/357] Move min and max to user defined aggregate function,
 remove `AggregateFunction` / `AggregateFunctionDefinition::BuiltIn` (#11013)

* Moving min and max to new API and removing from protobuf

* Using input_type rather than data_type

* Adding type coercion

* Fixed doctests

* Implementing feedback from code review

* Implementing feedback from code review

* Fixed wrong name

* Fixing name
---
 .../examples/dataframe_subquery.rs            |   1 +
 datafusion/core/src/dataframe/mod.rs          |   8 +-
 .../src/datasource/file_format/parquet.rs     |   2 +-
 datafusion/core/src/datasource/statistics.rs  |   2 +-
 datafusion/core/src/execution/context/mod.rs  |   1 +
 datafusion/core/src/lib.rs                    |   1 +
 .../aggregate_statistics.rs                   |  15 +-
 datafusion/core/src/physical_planner.rs       |  28 +-
 datafusion/core/tests/dataframe/mod.rs        |   8 +-
 .../core/tests/fuzz_cases/window_fuzz.rs      |  21 +-
 datafusion/expr/src/aggregate_function.rs     | 156 ----
 datafusion/expr/src/expr.rs                   |  59 +-
 datafusion/expr/src/expr_fn.rs                |  30 +-
 datafusion/expr/src/expr_rewriter/order_by.rs |   6 +-
 datafusion/expr/src/expr_schema.rs            |   8 -
 datafusion/expr/src/lib.rs                    |   2 -
 datafusion/expr/src/test/function_stub.rs     | 174 ++++
 datafusion/expr/src/tree_node.rs              |  10 -
 .../expr/src/type_coercion/aggregates.rs      |  66 +-
 datafusion/expr/src/utils.rs                  |  19 +-
 datafusion/functions-aggregate/Cargo.toml     |   3 +
 datafusion/functions-aggregate/src/lib.rs     |  12 +-
 .../src}/min_max.rs                           | 856 ++++++++++++------
 datafusion/functions-nested/src/planner.rs    |   7 +-
 .../src/analyzer/count_wildcard_rule.rs       |   6 +-
 .../optimizer/src/analyzer/type_coercion.rs   |  58 +-
 datafusion/optimizer/src/decorrelate.rs       |   3 -
 .../optimizer/src/optimize_projections/mod.rs |  12 +-
 datafusion/optimizer/src/push_down_limit.rs   |   4 +-
 .../optimizer/src/scalar_subquery_to_join.rs  |   4 +-
 .../simplify_expressions/simplify_exprs.rs    |   6 +-
 .../src/single_distinct_to_groupby.rs         | 115 +--
 .../physical-expr/src/aggregate/build_in.rs   | 208 -----
 .../src/aggregate/groups_accumulator/mod.rs   |   4 -
 datafusion/physical-expr/src/aggregate/mod.rs |   3 -
 .../physical-expr/src/expressions/mod.rs      |   5 -
 .../physical-plan/src/aggregates/mod.rs       |   3 -
 datafusion/physical-plan/src/windows/mod.rs   |  18 -
 datafusion/proto/gen/src/main.rs              |   7 +-
 datafusion/proto/proto/datafusion.proto       |  50 -
 datafusion/proto/src/generated/pbjson.rs      | 293 ------
 datafusion/proto/src/generated/prost.rs       |  90 +-
 .../proto/src/logical_plan/from_proto.rs      |  54 +-
 datafusion/proto/src/logical_plan/to_proto.rs |  44 +-
 .../proto/src/physical_plan/from_proto.rs     |   9 -
 datafusion/proto/src/physical_plan/mod.rs     |  24 +-
 .../proto/src/physical_plan/to_proto.rs       |  75 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  17 +-
 .../tests/cases/roundtrip_physical_plan.rs    |  19 +-
 datafusion/sql/src/expr/function.rs           |  68 +-
 datafusion/sql/tests/cases/plan_to_sql.rs     |   6 +-
 datafusion/sql/tests/sql_integration.rs       |   5 +-
 .../substrait/src/logical_plan/consumer.rs    |  10 +-
 .../substrait/src/logical_plan/producer.rs    |  32 -
 docs/source/user-guide/dataframe.md           |   1 +
 docs/source/user-guide/example-usage.md       |   2 +
 56 files changed, 937 insertions(+), 1813 deletions(-)
 delete mode 100644 datafusion/expr/src/aggregate_function.rs
 rename datafusion/{physical-expr/src/aggregate => functions-aggregate/src}/min_max.rs (60%)
 delete mode 100644 datafusion/physical-expr/src/aggregate/build_in.rs

diff --git a/datafusion-examples/examples/dataframe_subquery.rs b/datafusion-examples/examples/dataframe_subquery.rs
index e798751b33532..3e3d0c1b5a84b 100644
--- a/datafusion-examples/examples/dataframe_subquery.rs
+++ b/datafusion-examples/examples/dataframe_subquery.rs
@@ -20,6 +20,7 @@ use std::sync::Arc;
 
 use datafusion::error::Result;
 use datafusion::functions_aggregate::average::avg;
+use datafusion::functions_aggregate::min_max::max;
 use datafusion::prelude::*;
 use datafusion::test_util::arrow_test_data;
 use datafusion_common::ScalarValue;
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 8feccfb43d6b4..cacfa4c6f2aa5 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -53,9 +53,11 @@ use datafusion_common::{
 };
 use datafusion_expr::{case, is_null, lit};
 use datafusion_expr::{
-    max, min, utils::COUNT_STAR_EXPANSION, TableProviderFilterPushDown, UNNAMED_TABLE,
+    utils::COUNT_STAR_EXPANSION, TableProviderFilterPushDown, UNNAMED_TABLE,
+};
+use datafusion_functions_aggregate::expr_fn::{
+    avg, count, max, median, min, stddev, sum,
 };
-use datafusion_functions_aggregate::expr_fn::{avg, count, median, stddev, sum};
 
 use async_trait::async_trait;
 use datafusion_catalog::Session;
@@ -144,6 +146,7 @@ impl Default for DataFrameWriteOptions {
 /// ```
 /// # use datafusion::prelude::*;
 /// # use datafusion::error::Result;
+/// # use datafusion::functions_aggregate::expr_fn::min;
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
 /// let ctx = SessionContext::new();
@@ -407,6 +410,7 @@ impl DataFrame {
     /// ```
     /// # use datafusion::prelude::*;
     /// # use datafusion::error::Result;
+    /// # use datafusion::functions_aggregate::expr_fn::min;
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 829b69c297ee5..f233f3842c8c6 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -50,7 +50,7 @@ use datafusion_common::{
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::expressions::{MaxAccumulator, MinAccumulator};
+use datafusion_functions_aggregate::min_max::{MaxAccumulator, MinAccumulator};
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
 use datafusion_physical_plan::metrics::MetricsSet;
 
diff --git a/datafusion/core/src/datasource/statistics.rs b/datafusion/core/src/datasource/statistics.rs
index a243a1c3558fb..8c789e461b08e 100644
--- a/datafusion/core/src/datasource/statistics.rs
+++ b/datafusion/core/src/datasource/statistics.rs
@@ -18,7 +18,7 @@
 use super::listing::PartitionedFile;
 use crate::arrow::datatypes::{Schema, SchemaRef};
 use crate::error::Result;
-use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator};
+use crate::functions_aggregate::min_max::{MaxAccumulator, MinAccumulator};
 use crate::physical_plan::{Accumulator, ColumnStatistics, Statistics};
 use arrow_schema::DataType;
 
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 9b889c37ab522..24704bc794c28 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -144,6 +144,7 @@ where
 ///
 /// ```
 /// use datafusion::prelude::*;
+/// # use datafusion::functions_aggregate::expr_fn::min;
 /// # use datafusion::{error::Result, assert_batches_eq};
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index cf5a184e34160..3bb0636652c08 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -52,6 +52,7 @@
 //! ```rust
 //! # use datafusion::prelude::*;
 //! # use datafusion::error::Result;
+//! # use datafusion::functions_aggregate::expr_fn::min;
 //! # use datafusion::arrow::record_batch::RecordBatch;
 //!
 //! # #[tokio::main]
diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
index a8332d1d55e46..a0f6f6a65b1f7 100644
--- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
@@ -272,39 +272,28 @@ fn is_non_distinct_count(agg_expr: &dyn AggregateExpr) -> bool {
             return true;
         }
     }
-
     false
 }
 
 // TODO: Move this check into AggregateUDFImpl
 // https://github.com/apache/datafusion/issues/11153
 fn is_min(agg_expr: &dyn AggregateExpr) -> bool {
-    if agg_expr.as_any().is::<expressions::Min>() {
-        return true;
-    }
-
     if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        if agg_expr.fun().name() == "min" {
+        if agg_expr.fun().name().to_lowercase() == "min" {
             return true;
         }
     }
-
     false
 }
 
 // TODO: Move this check into AggregateUDFImpl
 // https://github.com/apache/datafusion/issues/11153
 fn is_max(agg_expr: &dyn AggregateExpr) -> bool {
-    if agg_expr.as_any().is::<expressions::Max>() {
-        return true;
-    }
-
     if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        if agg_expr.fun().name() == "max" {
+        if agg_expr.fun().name().to_lowercase() == "max" {
             return true;
         }
     }
-
     false
 }
 
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 329d343f13fc1..03e20b886e2c4 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -59,8 +59,8 @@ use crate::physical_plan::unnest::UnnestExec;
 use crate::physical_plan::values::ValuesExec;
 use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
 use crate::physical_plan::{
-    aggregates, displayable, udaf, windows, AggregateExpr, ExecutionPlan,
-    ExecutionPlanProperties, InputOrderMode, Partitioning, PhysicalExpr, WindowExpr,
+    displayable, udaf, windows, AggregateExpr, ExecutionPlan, ExecutionPlanProperties,
+    InputOrderMode, Partitioning, PhysicalExpr, WindowExpr,
 };
 
 use arrow::compute::SortOptions;
@@ -1812,7 +1812,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
     e: &Expr,
     name: impl Into<String>,
     logical_input_schema: &DFSchema,
-    physical_input_schema: &Schema,
+    _physical_input_schema: &Schema,
     execution_props: &ExecutionProps,
 ) -> Result<AggregateExprWithOptionalArgs> {
     match e {
@@ -1840,28 +1840,6 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                 == NullTreatment::IgnoreNulls;
 
             let (agg_expr, filter, order_by) = match func_def {
-                AggregateFunctionDefinition::BuiltIn(fun) => {
-                    let physical_sort_exprs = match order_by {
-                        Some(exprs) => Some(create_physical_sort_exprs(
-                            exprs,
-                            logical_input_schema,
-                            execution_props,
-                        )?),
-                        None => None,
-                    };
-                    let ordering_reqs: Vec<PhysicalSortExpr> =
-                        physical_sort_exprs.clone().unwrap_or(vec![]);
-                    let agg_expr = aggregates::create_aggregate_expr(
-                        fun,
-                        *distinct,
-                        &physical_args,
-                        &ordering_reqs,
-                        physical_input_schema,
-                        name,
-                        ignore_nulls,
-                    )?;
-                    (agg_expr, filter, physical_sort_exprs)
-                }
                 AggregateFunctionDefinition::UDF(fun) => {
                     let sort_exprs = order_by.clone().unwrap_or(vec![]);
                     let physical_sort_exprs = match order_by {
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index d83a47ceb0699..86cacbaa06d87 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -54,11 +54,11 @@ use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_expr::expr::{GroupingSet, Sort};
 use datafusion_expr::var_provider::{VarProvider, VarType};
 use datafusion_expr::{
-    cast, col, exists, expr, in_subquery, lit, max, out_ref_col, placeholder,
-    scalar_subquery, when, wildcard, Expr, ExprFunctionExt, ExprSchemable, WindowFrame,
-    WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
+    cast, col, exists, expr, in_subquery, lit, out_ref_col, placeholder, scalar_subquery,
+    when, wildcard, Expr, ExprFunctionExt, ExprSchemable, WindowFrame, WindowFrameBound,
+    WindowFrameUnits, WindowFunctionDefinition,
 };
-use datafusion_functions_aggregate::expr_fn::{array_agg, avg, count, sum};
+use datafusion_functions_aggregate::expr_fn::{array_agg, avg, count, max, sum};
 
 #[tokio::test]
 async fn test_count_wildcard_on_sort() -> Result<()> {
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index c97621ec4d019..813862c4cc2f5 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -32,13 +32,13 @@ use datafusion::physical_plan::{collect, InputOrderMode};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::{Result, ScalarValue};
 use datafusion_common_runtime::SpawnedTask;
-use datafusion_expr::type_coercion::aggregates::coerce_types;
 use datafusion_expr::type_coercion::functions::data_types_with_aggregate_udf;
 use datafusion_expr::{
-    AggregateFunction, BuiltInWindowFunction, WindowFrame, WindowFrameBound,
-    WindowFrameUnits, WindowFunctionDefinition,
+    BuiltInWindowFunction, WindowFrame, WindowFrameBound, WindowFrameUnits,
+    WindowFunctionDefinition,
 };
 use datafusion_functions_aggregate::count::count_udaf;
+use datafusion_functions_aggregate::min_max::{max_udaf, min_udaf};
 use datafusion_functions_aggregate::sum::sum_udaf;
 use datafusion_physical_expr::expressions::{cast, col, lit};
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
@@ -361,14 +361,14 @@ fn get_random_function(
     window_fn_map.insert(
         "min",
         (
-            WindowFunctionDefinition::AggregateFunction(AggregateFunction::Min),
+            WindowFunctionDefinition::AggregateUDF(min_udaf()),
             vec![arg.clone()],
         ),
     );
     window_fn_map.insert(
         "max",
         (
-            WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
+            WindowFunctionDefinition::AggregateUDF(max_udaf()),
             vec![arg.clone()],
         ),
     );
@@ -465,16 +465,7 @@ fn get_random_function(
     let fn_name = window_fn_map.keys().collect::<Vec<_>>()[rand_fn_idx];
     let (window_fn, args) = window_fn_map.values().collect::<Vec<_>>()[rand_fn_idx];
     let mut args = args.clone();
-    if let WindowFunctionDefinition::AggregateFunction(f) = window_fn {
-        if !args.is_empty() {
-            // Do type coercion first argument
-            let a = args[0].clone();
-            let dt = a.data_type(schema.as_ref()).unwrap();
-            let sig = f.signature();
-            let coerced = coerce_types(f, &[dt], &sig).unwrap();
-            args[0] = cast(a, schema, coerced[0].clone()).unwrap();
-        }
-    } else if let WindowFunctionDefinition::AggregateUDF(udf) = window_fn {
+    if let WindowFunctionDefinition::AggregateUDF(udf) = window_fn {
         if !args.is_empty() {
             // Do type coercion first argument
             let a = args[0].clone();
diff --git a/datafusion/expr/src/aggregate_function.rs b/datafusion/expr/src/aggregate_function.rs
deleted file mode 100644
index 4037e3c5db9b9..0000000000000
--- a/datafusion/expr/src/aggregate_function.rs
+++ /dev/null
@@ -1,156 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Aggregate function module contains all built-in aggregate functions definitions
-
-use std::{fmt, str::FromStr};
-
-use crate::utils;
-use crate::{type_coercion::aggregates::*, Signature, Volatility};
-
-use arrow::datatypes::DataType;
-use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result};
-
-use strum_macros::EnumIter;
-
-/// Enum of all built-in aggregate functions
-// Contributor's guide for adding new aggregate functions
-// https://datafusion.apache.org/contributor-guide/index.html#how-to-add-a-new-aggregate-function
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash, EnumIter)]
-pub enum AggregateFunction {
-    /// Minimum
-    Min,
-    /// Maximum
-    Max,
-}
-
-impl AggregateFunction {
-    pub fn name(&self) -> &str {
-        use AggregateFunction::*;
-        match self {
-            Min => "MIN",
-            Max => "MAX",
-        }
-    }
-}
-
-impl fmt::Display for AggregateFunction {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}", self.name())
-    }
-}
-
-impl FromStr for AggregateFunction {
-    type Err = DataFusionError;
-    fn from_str(name: &str) -> Result<AggregateFunction> {
-        Ok(match name {
-            // general
-            "max" => AggregateFunction::Max,
-            "min" => AggregateFunction::Min,
-            _ => {
-                return plan_err!("There is no built-in function named {name}");
-            }
-        })
-    }
-}
-
-impl AggregateFunction {
-    /// Returns the datatype of the aggregate function given its argument types
-    ///
-    /// This is used to get the returned data type for aggregate expr.
-    pub fn return_type(
-        &self,
-        input_expr_types: &[DataType],
-        _input_expr_nullable: &[bool],
-    ) -> Result<DataType> {
-        // Note that this function *must* return the same type that the respective physical expression returns
-        // or the execution panics.
-
-        let coerced_data_types = coerce_types(self, input_expr_types, &self.signature())
-            // original errors are all related to wrong function signature
-            // aggregate them for better error message
-            .map_err(|_| {
-                plan_datafusion_err!(
-                    "{}",
-                    utils::generate_signature_error_msg(
-                        &format!("{self}"),
-                        self.signature(),
-                        input_expr_types,
-                    )
-                )
-            })?;
-
-        match self {
-            AggregateFunction::Max | AggregateFunction::Min => {
-                // For min and max agg function, the returned type is same as input type.
-                // The coerced_data_types is same with input_types.
-                Ok(coerced_data_types[0].clone())
-            }
-        }
-    }
-
-    /// Returns if the return type of the aggregate function is nullable given its argument
-    /// nullability
-    pub fn nullable(&self) -> Result<bool> {
-        match self {
-            AggregateFunction::Max | AggregateFunction::Min => Ok(true),
-        }
-    }
-}
-
-impl AggregateFunction {
-    /// the signatures supported by the function `fun`.
-    pub fn signature(&self) -> Signature {
-        // note: the physical expression must accept the type returned by this function or the execution panics.
-        match self {
-            AggregateFunction::Min | AggregateFunction::Max => {
-                let valid = STRINGS
-                    .iter()
-                    .chain(NUMERICS.iter())
-                    .chain(TIMESTAMPS.iter())
-                    .chain(DATES.iter())
-                    .chain(TIMES.iter())
-                    .chain(BINARYS.iter())
-                    .cloned()
-                    .collect::<Vec<_>>();
-                Signature::uniform(1, valid, Volatility::Immutable)
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use strum::IntoEnumIterator;
-
-    #[test]
-    // Test for AggregateFunction's Display and from_str() implementations.
-    // For each variant in AggregateFunction, it converts the variant to a string
-    // and then back to a variant. The test asserts that the original variant and
-    // the reconstructed variant are the same. This assertion is also necessary for
-    // function suggestion. See https://github.com/apache/datafusion/issues/8082
-    fn test_display_and_from_str() {
-        for func_original in AggregateFunction::iter() {
-            let func_name = func_original.to_string();
-            let func_from_str =
-                AggregateFunction::from_str(func_name.to_lowercase().as_str()).unwrap();
-            assert_eq!(func_from_str, func_original);
-        }
-    }
-}
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 68d5504eea48b..708843494814b 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -28,8 +28,8 @@ use crate::expr_fn::binary_expr;
 use crate::logical_plan::Subquery;
 use crate::utils::expr_to_columns;
 use crate::{
-    aggregate_function, built_in_window_function, udaf, BuiltInWindowFunction,
-    ExprSchemable, Operator, Signature, WindowFrame, WindowUDF,
+    built_in_window_function, udaf, BuiltInWindowFunction, ExprSchemable, Operator,
+    Signature, WindowFrame, WindowUDF,
 };
 use crate::{window_frame, Volatility};
 
@@ -630,7 +630,6 @@ impl Sort {
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 /// Defines which implementation of an aggregate function DataFusion should call.
 pub enum AggregateFunctionDefinition {
-    BuiltIn(aggregate_function::AggregateFunction),
     /// Resolved to a user defined aggregate function
     UDF(Arc<crate::AggregateUDF>),
 }
@@ -639,7 +638,6 @@ impl AggregateFunctionDefinition {
     /// Function's name for display
     pub fn name(&self) -> &str {
         match self {
-            AggregateFunctionDefinition::BuiltIn(fun) => fun.name(),
             AggregateFunctionDefinition::UDF(udf) => udf.name(),
         }
     }
@@ -666,24 +664,6 @@ pub struct AggregateFunction {
 }
 
 impl AggregateFunction {
-    pub fn new(
-        fun: aggregate_function::AggregateFunction,
-        args: Vec<Expr>,
-        distinct: bool,
-        filter: Option<Box<Expr>>,
-        order_by: Option<Vec<Expr>>,
-        null_treatment: Option<NullTreatment>,
-    ) -> Self {
-        Self {
-            func_def: AggregateFunctionDefinition::BuiltIn(fun),
-            args,
-            distinct,
-            filter,
-            order_by,
-            null_treatment,
-        }
-    }
-
     /// Create a new AggregateFunction expression with a user-defined function (UDF)
     pub fn new_udf(
         udf: Arc<crate::AggregateUDF>,
@@ -709,7 +689,6 @@ impl AggregateFunction {
 /// Defines which implementation of an aggregate function DataFusion should call.
 pub enum WindowFunctionDefinition {
     /// A built in aggregate function that leverages an aggregate function
-    AggregateFunction(aggregate_function::AggregateFunction),
     /// A a built-in window function
     BuiltInWindowFunction(built_in_window_function::BuiltInWindowFunction),
     /// A user defined aggregate function
@@ -723,12 +702,9 @@ impl WindowFunctionDefinition {
     pub fn return_type(
         &self,
         input_expr_types: &[DataType],
-        input_expr_nullable: &[bool],
+        _input_expr_nullable: &[bool],
     ) -> Result<DataType> {
         match self {
-            WindowFunctionDefinition::AggregateFunction(fun) => {
-                fun.return_type(input_expr_types, input_expr_nullable)
-            }
             WindowFunctionDefinition::BuiltInWindowFunction(fun) => {
                 fun.return_type(input_expr_types)
             }
@@ -742,7 +718,6 @@ impl WindowFunctionDefinition {
     /// the signatures supported by the function `fun`.
     pub fn signature(&self) -> Signature {
         match self {
-            WindowFunctionDefinition::AggregateFunction(fun) => fun.signature(),
             WindowFunctionDefinition::BuiltInWindowFunction(fun) => fun.signature(),
             WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(),
             WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(),
@@ -754,7 +729,6 @@ impl WindowFunctionDefinition {
         match self {
             WindowFunctionDefinition::BuiltInWindowFunction(fun) => fun.name(),
             WindowFunctionDefinition::WindowUDF(fun) => fun.name(),
-            WindowFunctionDefinition::AggregateFunction(fun) => fun.name(),
             WindowFunctionDefinition::AggregateUDF(fun) => fun.name(),
         }
     }
@@ -763,9 +737,6 @@ impl WindowFunctionDefinition {
 impl fmt::Display for WindowFunctionDefinition {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
-            WindowFunctionDefinition::AggregateFunction(fun) => {
-                std::fmt::Display::fmt(fun, f)
-            }
             WindowFunctionDefinition::BuiltInWindowFunction(fun) => {
                 std::fmt::Display::fmt(fun, f)
             }
@@ -775,12 +746,6 @@ impl fmt::Display for WindowFunctionDefinition {
     }
 }
 
-impl From<aggregate_function::AggregateFunction> for WindowFunctionDefinition {
-    fn from(value: aggregate_function::AggregateFunction) -> Self {
-        Self::AggregateFunction(value)
-    }
-}
-
 impl From<BuiltInWindowFunction> for WindowFunctionDefinition {
     fn from(value: BuiltInWindowFunction) -> Self {
         Self::BuiltInWindowFunction(value)
@@ -866,10 +831,6 @@ pub fn find_df_window_func(name: &str) -> Option<WindowFunctionDefinition> {
         Some(WindowFunctionDefinition::BuiltInWindowFunction(
             built_in_function,
         ))
-    } else if let Ok(aggregate) =
-        aggregate_function::AggregateFunction::from_str(name.as_str())
-    {
-        Some(WindowFunctionDefinition::AggregateFunction(aggregate))
     } else {
         None
     }
@@ -2589,8 +2550,6 @@ mod test {
             "first_value",
             "last_value",
             "nth_value",
-            "min",
-            "max",
         ];
         for name in names {
             let fun = find_df_window_func(name).unwrap();
@@ -2607,18 +2566,6 @@ mod test {
 
     #[test]
     fn test_find_df_window_function() {
-        assert_eq!(
-            find_df_window_func("max"),
-            Some(WindowFunctionDefinition::AggregateFunction(
-                aggregate_function::AggregateFunction::Max
-            ))
-        );
-        assert_eq!(
-            find_df_window_func("min"),
-            Some(WindowFunctionDefinition::AggregateFunction(
-                aggregate_function::AggregateFunction::Min
-            ))
-        );
         assert_eq!(
             find_df_window_func("cume_dist"),
             Some(WindowFunctionDefinition::BuiltInWindowFunction(
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 1f51cded22399..e9c5485656c8c 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -26,9 +26,9 @@ use crate::function::{
     StateFieldsArgs,
 };
 use crate::{
-    aggregate_function, conditional_expressions::CaseBuilder, logical_plan::Subquery,
-    AggregateUDF, Expr, LogicalPlan, Operator, ScalarFunctionImplementation, ScalarUDF,
-    Signature, Volatility,
+    conditional_expressions::CaseBuilder, logical_plan::Subquery, AggregateUDF, Expr,
+    LogicalPlan, Operator, ScalarFunctionImplementation, ScalarUDF, Signature,
+    Volatility,
 };
 use crate::{
     AggregateUDFImpl, ColumnarValue, ScalarUDFImpl, WindowFrame, WindowUDF, WindowUDFImpl,
@@ -150,30 +150,6 @@ pub fn not(expr: Expr) -> Expr {
     expr.not()
 }
 
-/// Create an expression to represent the min() aggregate function
-pub fn min(expr: Expr) -> Expr {
-    Expr::AggregateFunction(AggregateFunction::new(
-        aggregate_function::AggregateFunction::Min,
-        vec![expr],
-        false,
-        None,
-        None,
-        None,
-    ))
-}
-
-/// Create an expression to represent the max() aggregate function
-pub fn max(expr: Expr) -> Expr {
-    Expr::AggregateFunction(AggregateFunction::new(
-        aggregate_function::AggregateFunction::Max,
-        vec![expr],
-        false,
-        None,
-        None,
-        None,
-    ))
-}
-
 /// Return a new expression with bitwise AND
 pub fn bitwise_and(left: Expr, right: Expr) -> Expr {
     Expr::BinaryExpr(BinaryExpr::new(
diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs
index 4b56ca3d1c2e0..2efdcae1a790c 100644
--- a/datafusion/expr/src/expr_rewriter/order_by.rs
+++ b/datafusion/expr/src/expr_rewriter/order_by.rs
@@ -156,11 +156,13 @@ mod test {
     use arrow::datatypes::{DataType, Field, Schema};
 
     use crate::{
-        cast, col, lit, logical_plan::builder::LogicalTableSource, min,
-        test::function_stub::avg, try_cast, LogicalPlanBuilder,
+        cast, col, lit, logical_plan::builder::LogicalTableSource, try_cast,
+        LogicalPlanBuilder,
     };
 
     use super::*;
+    use crate::test::function_stub::avg;
+    use crate::test::function_stub::min;
 
     #[test]
     fn rewrite_sort_cols_by_agg() {
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 5e0571f712ee5..6344b892adb7e 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -198,14 +198,7 @@ impl ExprSchemable for Expr {
                     .iter()
                     .map(|e| e.get_type(schema))
                     .collect::<Result<Vec<_>>>()?;
-                let nullability = args
-                    .iter()
-                    .map(|e| e.nullable(schema))
-                    .collect::<Result<Vec<_>>>()?;
                 match func_def {
-                    AggregateFunctionDefinition::BuiltIn(fun) => {
-                        fun.return_type(&data_types, &nullability)
-                    }
                     AggregateFunctionDefinition::UDF(fun) => {
                         let new_types = data_types_with_aggregate_udf(&data_types, fun)
                             .map_err(|err| {
@@ -338,7 +331,6 @@ impl ExprSchemable for Expr {
             Expr::Cast(Cast { expr, .. }) => expr.nullable(input_schema),
             Expr::AggregateFunction(AggregateFunction { func_def, .. }) => {
                 match func_def {
-                    AggregateFunctionDefinition::BuiltIn(fun) => fun.nullable(),
                     // TODO: UDF should be able to customize nullability
                     AggregateFunctionDefinition::UDF(udf) if udf.name() == "count" => {
                         Ok(false)
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index 0a5cf4653a228..f5460918fa707 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -39,7 +39,6 @@ mod udaf;
 mod udf;
 mod udwf;
 
-pub mod aggregate_function;
 pub mod conditional_expressions;
 pub mod execution_props;
 pub mod expr;
@@ -64,7 +63,6 @@ pub mod window_function;
 pub mod window_state;
 
 pub use accumulator::Accumulator;
-pub use aggregate_function::AggregateFunction;
 pub use built_in_window_function::BuiltInWindowFunction;
 pub use columnar_value::ColumnarValue;
 pub use expr::{
diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs
index 3e0760b5c0dec..72b73ccee44fb 100644
--- a/datafusion/expr/src/test/function_stub.rs
+++ b/datafusion/expr/src/test/function_stub.rs
@@ -289,6 +289,180 @@ impl AggregateUDFImpl for Count {
     }
 }
 
+create_func!(Min, min_udaf);
+
+pub fn min(expr: Expr) -> Expr {
+    Expr::AggregateFunction(AggregateFunction::new_udf(
+        min_udaf(),
+        vec![expr],
+        false,
+        None,
+        None,
+        None,
+    ))
+}
+
+/// Testing stub implementation of Min aggregate
+pub struct Min {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl std::fmt::Debug for Min {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("Min")
+            .field("name", &self.name())
+            .field("signature", &self.signature)
+            .finish()
+    }
+}
+
+impl Default for Min {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl Min {
+    pub fn new() -> Self {
+        Self {
+            aliases: vec!["min".to_string()],
+            signature: Signature::variadic_any(Volatility::Immutable),
+        }
+    }
+}
+
+impl AggregateUDFImpl for Min {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "MIN"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Int64)
+    }
+
+    fn state_fields(&self, _args: StateFieldsArgs) -> Result<Vec<Field>> {
+        not_impl_err!("no impl for stub")
+    }
+
+    fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        not_impl_err!("no impl for stub")
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        _args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        not_impl_err!("no impl for stub")
+    }
+
+    fn reverse_expr(&self) -> ReversedUDAF {
+        ReversedUDAF::Identical
+    }
+    fn is_descending(&self) -> Option<bool> {
+        Some(false)
+    }
+}
+
+create_func!(Max, max_udaf);
+
+pub fn max(expr: Expr) -> Expr {
+    Expr::AggregateFunction(AggregateFunction::new_udf(
+        max_udaf(),
+        vec![expr],
+        false,
+        None,
+        None,
+        None,
+    ))
+}
+
+/// Testing stub implementation of MAX aggregate
+pub struct Max {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl std::fmt::Debug for Max {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("Max")
+            .field("name", &self.name())
+            .field("signature", &self.signature)
+            .finish()
+    }
+}
+
+impl Default for Max {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl Max {
+    pub fn new() -> Self {
+        Self {
+            aliases: vec!["max".to_string()],
+            signature: Signature::variadic_any(Volatility::Immutable),
+        }
+    }
+}
+
+impl AggregateUDFImpl for Max {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "MAX"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Int64)
+    }
+
+    fn state_fields(&self, _args: StateFieldsArgs) -> Result<Vec<Field>> {
+        not_impl_err!("no impl for stub")
+    }
+
+    fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        not_impl_err!("no impl for stub")
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        _args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        not_impl_err!("no impl for stub")
+    }
+
+    fn reverse_expr(&self) -> ReversedUDAF {
+        ReversedUDAF::Identical
+    }
+    fn is_descending(&self) -> Option<bool> {
+        Some(true)
+    }
+}
+
 /// Testing stub implementation of avg aggregate
 #[derive(Debug)]
 pub struct Avg {
diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs
index a97b9f010f792..a8062c0c07ee9 100644
--- a/datafusion/expr/src/tree_node.rs
+++ b/datafusion/expr/src/tree_node.rs
@@ -318,16 +318,6 @@ impl TreeNode for Expr {
             )?
             .map_data(
                 |(new_args, new_filter, new_order_by)| match func_def {
-                    AggregateFunctionDefinition::BuiltIn(fun) => {
-                        Ok(Expr::AggregateFunction(AggregateFunction::new(
-                            fun,
-                            new_args,
-                            distinct,
-                            new_filter,
-                            new_order_by,
-                            null_treatment,
-                        )))
-                    }
                     AggregateFunctionDefinition::UDF(fun) => {
                         Ok(Expr::AggregateFunction(AggregateFunction::new_udf(
                             fun,
diff --git a/datafusion/expr/src/type_coercion/aggregates.rs b/datafusion/expr/src/type_coercion/aggregates.rs
index a024401e18d5f..e7e58bf84362e 100644
--- a/datafusion/expr/src/type_coercion/aggregates.rs
+++ b/datafusion/expr/src/type_coercion/aggregates.rs
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::ops::Deref;
-
+use crate::TypeSignature;
 use arrow::datatypes::{
     DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
     DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
@@ -24,8 +23,6 @@ use arrow::datatypes::{
 
 use datafusion_common::{internal_err, plan_err, Result};
 
-use crate::{AggregateFunction, Signature, TypeSignature};
-
 pub static STRINGS: &[DataType] = &[DataType::Utf8, DataType::LargeUtf8];
 
 pub static SIGNED_INTEGERS: &[DataType] = &[
@@ -84,25 +81,6 @@ pub static TIMES: &[DataType] = &[
     DataType::Time64(TimeUnit::Nanosecond),
 ];
 
-/// Returns the coerced data type for each `input_types`.
-/// Different aggregate function with different input data type will get corresponding coerced data type.
-pub fn coerce_types(
-    agg_fun: &AggregateFunction,
-    input_types: &[DataType],
-    signature: &Signature,
-) -> Result<Vec<DataType>> {
-    // Validate input_types matches (at least one of) the func signature.
-    check_arg_count(agg_fun.name(), input_types, &signature.type_signature)?;
-
-    match agg_fun {
-        AggregateFunction::Min | AggregateFunction::Max => {
-            // min and max support the dictionary data type
-            // unpack the dictionary to get the value
-            get_min_max_result_type(input_types)
-        }
-    }
-}
-
 /// Validate the length of `input_types` matches the `signature` for `agg_fun`.
 ///
 /// This method DOES NOT validate the argument types - only that (at least one,
@@ -163,22 +141,6 @@ pub fn check_arg_count(
     Ok(())
 }
 
-fn get_min_max_result_type(input_types: &[DataType]) -> Result<Vec<DataType>> {
-    // make sure that the input types only has one element.
-    assert_eq!(input_types.len(), 1);
-    // min and max support the dictionary data type
-    // unpack the dictionary to get the value
-    match &input_types[0] {
-        DataType::Dictionary(_, dict_value_type) => {
-            // TODO add checker, if the value type is complex data type
-            Ok(vec![dict_value_type.deref().clone()])
-        }
-        // TODO add checker for datatype which min and max supported
-        // For example, the `Struct` and `Map` type are not supported in the MIN and MAX function
-        _ => Ok(input_types.to_vec()),
-    }
-}
-
 /// function return type of a sum
 pub fn sum_return_type(arg_type: &DataType) -> Result<DataType> {
     match arg_type {
@@ -348,32 +310,6 @@ pub fn coerce_avg_type(func_name: &str, arg_types: &[DataType]) -> Result<Vec<Da
 #[cfg(test)]
 mod tests {
     use super::*;
-    #[test]
-    fn test_aggregate_coerce_types() {
-        // test input args with error number input types
-        let fun = AggregateFunction::Min;
-        let input_types = vec![DataType::Int64, DataType::Int32];
-        let signature = fun.signature();
-        let result = coerce_types(&fun, &input_types, &signature);
-        assert_eq!("Error during planning: The function MIN expects 1 arguments, but 2 were provided", result.unwrap_err().strip_backtrace());
-
-        // test count, array_agg, approx_distinct, min, max.
-        // the coerced types is same with input types
-        let funs = vec![AggregateFunction::Min, AggregateFunction::Max];
-        let input_types = vec![
-            vec![DataType::Int32],
-            vec![DataType::Decimal128(10, 2)],
-            vec![DataType::Decimal256(1, 1)],
-            vec![DataType::Utf8],
-        ];
-        for fun in funs {
-            for input_type in &input_types {
-                let signature = fun.signature();
-                let result = coerce_types(&fun, input_type, &signature);
-                assert_eq!(*input_type, result.unwrap());
-            }
-        }
-    }
 
     #[test]
     fn test_variance_return_data_type() -> Result<()> {
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 2ef1597abfd1d..683a8e170ed4d 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -1253,8 +1253,9 @@ mod tests {
     use super::*;
     use crate::{
         col, cube, expr, expr_vec_fmt, grouping_set, lit, rollup,
-        test::function_stub::sum_udaf, AggregateFunction, Cast, ExprFunctionExt,
-        WindowFrame, WindowFunctionDefinition,
+        test::function_stub::max_udaf, test::function_stub::min_udaf,
+        test::function_stub::sum_udaf, Cast, ExprFunctionExt, WindowFrame,
+        WindowFunctionDefinition,
     };
 
     #[test]
@@ -1268,15 +1269,15 @@ mod tests {
     #[test]
     fn test_group_window_expr_by_sort_keys_empty_window() -> Result<()> {
         let max1 = Expr::WindowFunction(expr::WindowFunction::new(
-            WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
+            WindowFunctionDefinition::AggregateUDF(max_udaf()),
             vec![col("name")],
         ));
         let max2 = Expr::WindowFunction(expr::WindowFunction::new(
-            WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
+            WindowFunctionDefinition::AggregateUDF(max_udaf()),
             vec![col("name")],
         ));
         let min3 = Expr::WindowFunction(expr::WindowFunction::new(
-            WindowFunctionDefinition::AggregateFunction(AggregateFunction::Min),
+            WindowFunctionDefinition::AggregateUDF(min_udaf()),
             vec![col("name")],
         ));
         let sum4 = Expr::WindowFunction(expr::WindowFunction::new(
@@ -1299,18 +1300,18 @@ mod tests {
         let created_at_desc =
             Expr::Sort(expr::Sort::new(Box::new(col("created_at")), false, true));
         let max1 = Expr::WindowFunction(expr::WindowFunction::new(
-            WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
+            WindowFunctionDefinition::AggregateUDF(max_udaf()),
             vec![col("name")],
         ))
         .order_by(vec![age_asc.clone(), name_desc.clone()])
         .build()
         .unwrap();
         let max2 = Expr::WindowFunction(expr::WindowFunction::new(
-            WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
+            WindowFunctionDefinition::AggregateUDF(max_udaf()),
             vec![col("name")],
         ));
         let min3 = Expr::WindowFunction(expr::WindowFunction::new(
-            WindowFunctionDefinition::AggregateFunction(AggregateFunction::Min),
+            WindowFunctionDefinition::AggregateUDF(min_udaf()),
             vec![col("name")],
         ))
         .order_by(vec![age_asc.clone(), name_desc.clone()])
@@ -1352,7 +1353,7 @@ mod tests {
     fn test_find_sort_exprs() -> Result<()> {
         let exprs = &[
             Expr::WindowFunction(expr::WindowFunction::new(
-                WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
+                WindowFunctionDefinition::AggregateUDF(max_udaf()),
                 vec![col("name")],
             ))
             .order_by(vec![
diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml
index 26630a0352d58..43ddd37cfb6ff 100644
--- a/datafusion/functions-aggregate/Cargo.toml
+++ b/datafusion/functions-aggregate/Cargo.toml
@@ -48,3 +48,6 @@ datafusion-physical-expr-common = { workspace = true }
 log = { workspace = true }
 paste = "1.0.14"
 sqlparser = { workspace = true }
+
+[dev-dependencies]
+rand = { workspace = true }
diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs
index 1711869666440..b54cd181a0cbf 100644
--- a/datafusion/functions-aggregate/src/lib.rs
+++ b/datafusion/functions-aggregate/src/lib.rs
@@ -65,6 +65,7 @@ pub mod covariance;
 pub mod first_last;
 pub mod hyperloglog;
 pub mod median;
+pub mod min_max;
 pub mod regr;
 pub mod stddev;
 pub mod sum;
@@ -110,7 +111,8 @@ pub mod expr_fn {
     pub use super::first_last::last_value;
     pub use super::grouping::grouping;
     pub use super::median::median;
-    pub use super::nth_value::nth_value;
+    pub use super::min_max::max;
+    pub use super::min_max::min;
     pub use super::regr::regr_avgx;
     pub use super::regr::regr_avgy;
     pub use super::regr::regr_count;
@@ -137,6 +139,8 @@ pub fn all_default_aggregate_functions() -> Vec<Arc<AggregateUDF>> {
         covariance::covar_pop_udaf(),
         correlation::corr_udaf(),
         sum::sum_udaf(),
+        min_max::max_udaf(),
+        min_max::min_udaf(),
         median::median_udaf(),
         count::count_udaf(),
         regr::regr_slope_udaf(),
@@ -192,11 +196,11 @@ mod tests {
     #[test]
     fn test_no_duplicate_name() -> Result<()> {
         let mut names = HashSet::new();
+        let migrated_functions = ["array_agg", "count", "max", "min"];
         for func in all_default_aggregate_functions() {
             // TODO: remove this
-            // These functions are in intermidiate migration state, skip them
-            let name_lower_case = func.name().to_lowercase();
-            if name_lower_case == "count" || name_lower_case == "array_agg" {
+            // These functions are in intermediate migration state, skip them
+            if migrated_functions.contains(&func.name().to_lowercase().as_str()) {
                 continue;
             }
             assert!(
diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
similarity index 60%
rename from datafusion/physical-expr/src/aggregate/min_max.rs
rename to datafusion/functions-aggregate/src/min_max.rs
index f9362db30196f..4d743983411dc 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -2,7 +2,6 @@
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
@@ -15,103 +14,107 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Defines physical expressions that can evaluated at runtime during query execution
+//! [`Max`] and [`MaxAccumulator`] accumulator for the `max` function
+//! [`Min`] and [`MinAccumulator`] accumulator for the `max` function
 
-use std::any::Any;
-use std::sync::Arc;
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
 
-use crate::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
-use crate::{AggregateExpr, PhysicalExpr};
+use arrow::array::{
+    ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array,
+    Decimal128Array, Decimal256Array, Float32Array, Float64Array, Int16Array, Int32Array,
+    Int64Array, Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray,
+    IntervalYearMonthArray, LargeBinaryArray, LargeStringArray, StringArray,
+    StringViewArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
+    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
+    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
+    UInt64Array, UInt8Array,
+};
 use arrow::compute;
 use arrow::datatypes::{
-    DataType, Date32Type, Date64Type, IntervalUnit, Time32MillisecondType,
-    Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit,
-    TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
-    TimestampSecondType,
-};
-use arrow::{
-    array::{
-        ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Float32Array,
-        Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
-        IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
-        LargeBinaryArray, LargeStringArray, StringArray, Time32MillisecondArray,
-        Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
-        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-        TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-    },
-    datatypes::Field,
-};
-use arrow_array::types::{
-    Decimal128Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
-    UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    DataType, Decimal128Type, Decimal256Type, Float32Type, Float64Type, Int16Type,
+    Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
-use arrow_array::{BinaryViewArray, StringViewArray};
-use datafusion_common::internal_err;
-use datafusion_common::ScalarValue;
-use datafusion_common::{downcast_value, DataFusionError, Result};
-use datafusion_expr::{Accumulator, GroupsAccumulator};
-
-use crate::aggregate::utils::down_cast_any_ref;
-use crate::expressions::format_state_name;
-use arrow::array::Array;
-use arrow::array::Decimal128Array;
-use arrow::array::Decimal256Array;
-use arrow::datatypes::i256;
-use arrow::datatypes::Decimal256Type;
+use arrow_schema::IntervalUnit;
+use datafusion_common::{downcast_value, internal_err, DataFusionError, Result};
+use datafusion_physical_expr_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
+use std::fmt::Debug;
 
-use super::moving_min_max;
+use arrow::datatypes::i256;
+use arrow::datatypes::{
+    Date32Type, Date64Type, Time32MillisecondType, Time32SecondType,
+    Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType,
+    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
+};
 
-// Min/max aggregation can take Dictionary encode input but always produces unpacked
-// (aka non Dictionary) output. We need to adjust the output data type to reflect this.
-// The reason min/max aggregate produces unpacked output because there is only one
-// min/max value per group; there is no needs to keep them Dictionary encode
-fn min_max_aggregate_data_type(input_type: DataType) -> DataType {
-    if let DataType::Dictionary(_, value_type) = input_type {
-        *value_type
-    } else {
-        input_type
+use datafusion_common::ScalarValue;
+use datafusion_expr::GroupsAccumulator;
+use datafusion_expr::{
+    function::AccumulatorArgs, Accumulator, AggregateUDFImpl, Signature, Volatility,
+};
+use std::ops::Deref;
+
+fn get_min_max_result_type(input_types: &[DataType]) -> Result<Vec<DataType>> {
+    // make sure that the input types only has one element.
+    assert_eq!(input_types.len(), 1);
+    // min and max support the dictionary data type
+    // unpack the dictionary to get the value
+    match &input_types[0] {
+        DataType::Dictionary(_, dict_value_type) => {
+            // TODO add checker, if the value type is complex data type
+            Ok(vec![dict_value_type.deref().clone()])
+        }
+        // TODO add checker for datatype which min and max supported
+        // For example, the `Struct` and `Map` type are not supported in the MIN and MAX function
+        _ => Ok(input_types.to_vec()),
     }
 }
 
-/// MAX aggregate expression
-#[derive(Debug, Clone)]
+// MAX aggregate UDF
+#[derive(Debug)]
 pub struct Max {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    expr: Arc<dyn PhysicalExpr>,
+    aliases: Vec<String>,
+    signature: Signature,
 }
 
 impl Max {
-    /// Create a new MAX aggregate function
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        name: impl Into<String>,
-        data_type: DataType,
-    ) -> Self {
+    pub fn new() -> Self {
         Self {
-            name: name.into(),
-            expr,
-            data_type: min_max_aggregate_data_type(data_type),
-            nullable: true,
+            aliases: vec!["max".to_owned()],
+            signature: Signature::user_defined(Volatility::Immutable),
         }
     }
 }
+
+impl Default for Max {
+    fn default() -> Self {
+        Self::new()
+    }
+}
 /// Creates a [`PrimitiveGroupsAccumulator`] for computing `MAX`
 /// the specified [`ArrowPrimitiveType`].
 ///
 /// [`ArrowPrimitiveType`]: arrow::datatypes::ArrowPrimitiveType
 macro_rules! instantiate_max_accumulator {
-    ($SELF:expr, $NATIVE:ident, $PRIMTYPE:ident) => {{
+    ($DATA_TYPE:ident, $NATIVE:ident, $PRIMTYPE:ident) => {{
         Ok(Box::new(
-            PrimitiveGroupsAccumulator::<$PRIMTYPE, _>::new(
-                &$SELF.data_type,
-                |cur, new| {
-                    if *cur < new {
-                        *cur = new
-                    }
-                },
-            )
+            PrimitiveGroupsAccumulator::<$PRIMTYPE, _>::new($DATA_TYPE, |cur, new| {
+                if *cur < new {
+                    *cur = new
+                }
+            })
             // Initialize each accumulator to $NATIVE::MIN
             .with_starting_value($NATIVE::MIN),
         ))
@@ -124,60 +127,48 @@ macro_rules! instantiate_max_accumulator {
 ///
 /// [`ArrowPrimitiveType`]: arrow::datatypes::ArrowPrimitiveType
 macro_rules! instantiate_min_accumulator {
-    ($SELF:expr, $NATIVE:ident, $PRIMTYPE:ident) => {{
+    ($DATA_TYPE:ident, $NATIVE:ident, $PRIMTYPE:ident) => {{
         Ok(Box::new(
-            PrimitiveGroupsAccumulator::<$PRIMTYPE, _>::new(
-                &$SELF.data_type,
-                |cur, new| {
-                    if *cur > new {
-                        *cur = new
-                    }
-                },
-            )
+            PrimitiveGroupsAccumulator::<$PRIMTYPE, _>::new(&$DATA_TYPE, |cur, new| {
+                if *cur > new {
+                    *cur = new
+                }
+            })
             // Initialize each accumulator to $NATIVE::MAX
             .with_starting_value($NATIVE::MAX),
         ))
     }};
 }
 
-impl AggregateExpr for Max {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
+impl AggregateUDFImpl for Max {
+    fn as_any(&self) -> &dyn std::any::Any {
         self
     }
 
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(
-            &self.name,
-            self.data_type.clone(),
-            self.nullable,
-        ))
+    fn name(&self) -> &str {
+        "MAX"
     }
 
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new(
-            format_state_name(&self.name, "max"),
-            self.data_type.clone(),
-            true,
-        )])
+    fn signature(&self) -> &Signature {
+        &self.signature
     }
 
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![Arc::clone(&self.expr)]
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        Ok(arg_types[0].to_owned())
     }
 
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(MaxAccumulator::try_new(&self.data_type)?))
+    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        Ok(Box::new(MaxAccumulator::try_new(acc_args.data_type)?))
     }
 
-    fn name(&self) -> &str {
-        &self.name
+    fn aliases(&self) -> &[String] {
+        &self.aliases
     }
 
-    fn groups_accumulator_supported(&self) -> bool {
+    fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
         use DataType::*;
         matches!(
-            self.data_type,
+            args.data_type,
             Int8 | Int16
                 | Int32
                 | Int64
@@ -197,97 +188,92 @@ impl AggregateExpr for Max {
         )
     }
 
-    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+    fn create_groups_accumulator(
+        &self,
+        args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
         use DataType::*;
         use TimeUnit::*;
-
-        match self.data_type {
-            Int8 => instantiate_max_accumulator!(self, i8, Int8Type),
-            Int16 => instantiate_max_accumulator!(self, i16, Int16Type),
-            Int32 => instantiate_max_accumulator!(self, i32, Int32Type),
-            Int64 => instantiate_max_accumulator!(self, i64, Int64Type),
-            UInt8 => instantiate_max_accumulator!(self, u8, UInt8Type),
-            UInt16 => instantiate_max_accumulator!(self, u16, UInt16Type),
-            UInt32 => instantiate_max_accumulator!(self, u32, UInt32Type),
-            UInt64 => instantiate_max_accumulator!(self, u64, UInt64Type),
+        let data_type = args.data_type;
+        match data_type {
+            Int8 => instantiate_max_accumulator!(data_type, i8, Int8Type),
+            Int16 => instantiate_max_accumulator!(data_type, i16, Int16Type),
+            Int32 => instantiate_max_accumulator!(data_type, i32, Int32Type),
+            Int64 => instantiate_max_accumulator!(data_type, i64, Int64Type),
+            UInt8 => instantiate_max_accumulator!(data_type, u8, UInt8Type),
+            UInt16 => instantiate_max_accumulator!(data_type, u16, UInt16Type),
+            UInt32 => instantiate_max_accumulator!(data_type, u32, UInt32Type),
+            UInt64 => instantiate_max_accumulator!(data_type, u64, UInt64Type),
             Float32 => {
-                instantiate_max_accumulator!(self, f32, Float32Type)
+                instantiate_max_accumulator!(data_type, f32, Float32Type)
             }
             Float64 => {
-                instantiate_max_accumulator!(self, f64, Float64Type)
+                instantiate_max_accumulator!(data_type, f64, Float64Type)
             }
-            Date32 => instantiate_max_accumulator!(self, i32, Date32Type),
-            Date64 => instantiate_max_accumulator!(self, i64, Date64Type),
+            Date32 => instantiate_max_accumulator!(data_type, i32, Date32Type),
+            Date64 => instantiate_max_accumulator!(data_type, i64, Date64Type),
             Time32(Second) => {
-                instantiate_max_accumulator!(self, i32, Time32SecondType)
+                instantiate_max_accumulator!(data_type, i32, Time32SecondType)
             }
             Time32(Millisecond) => {
-                instantiate_max_accumulator!(self, i32, Time32MillisecondType)
+                instantiate_max_accumulator!(data_type, i32, Time32MillisecondType)
             }
             Time64(Microsecond) => {
-                instantiate_max_accumulator!(self, i64, Time64MicrosecondType)
+                instantiate_max_accumulator!(data_type, i64, Time64MicrosecondType)
             }
             Time64(Nanosecond) => {
-                instantiate_max_accumulator!(self, i64, Time64NanosecondType)
+                instantiate_max_accumulator!(data_type, i64, Time64NanosecondType)
             }
             Timestamp(Second, _) => {
-                instantiate_max_accumulator!(self, i64, TimestampSecondType)
+                instantiate_max_accumulator!(data_type, i64, TimestampSecondType)
             }
             Timestamp(Millisecond, _) => {
-                instantiate_max_accumulator!(self, i64, TimestampMillisecondType)
+                instantiate_max_accumulator!(data_type, i64, TimestampMillisecondType)
             }
             Timestamp(Microsecond, _) => {
-                instantiate_max_accumulator!(self, i64, TimestampMicrosecondType)
+                instantiate_max_accumulator!(data_type, i64, TimestampMicrosecondType)
             }
             Timestamp(Nanosecond, _) => {
-                instantiate_max_accumulator!(self, i64, TimestampNanosecondType)
+                instantiate_max_accumulator!(data_type, i64, TimestampNanosecondType)
             }
             Decimal128(_, _) => {
-                instantiate_max_accumulator!(self, i128, Decimal128Type)
+                instantiate_max_accumulator!(data_type, i128, Decimal128Type)
             }
             Decimal256(_, _) => {
-                instantiate_max_accumulator!(self, i256, Decimal256Type)
+                instantiate_max_accumulator!(data_type, i256, Decimal256Type)
             }
 
             // It would be nice to have a fast implementation for Strings as well
             // https://github.com/apache/datafusion/issues/6906
 
             // This is only reached if groups_accumulator_supported is out of sync
-            _ => internal_err!(
-                "GroupsAccumulator not supported for max({})",
-                self.data_type
-            ),
+            _ => internal_err!("GroupsAccumulator not supported for max({})", data_type),
         }
     }
 
-    fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
-        Some(Arc::new(self.clone()))
+    fn create_sliding_accumulator(
+        &self,
+        args: AccumulatorArgs,
+    ) -> Result<Box<dyn Accumulator>> {
+        Ok(Box::new(SlidingMaxAccumulator::try_new(args.data_type)?))
     }
 
-    fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(SlidingMaxAccumulator::try_new(&self.data_type)?))
+    fn is_descending(&self) -> Option<bool> {
+        Some(true)
     }
-
-    fn get_minmax_desc(&self) -> Option<(Field, bool)> {
-        Some((self.field().ok()?, true))
+    fn order_sensitivity(&self) -> datafusion_expr::utils::AggregateOrderSensitivity {
+        datafusion_expr::utils::AggregateOrderSensitivity::Insensitive
     }
-}
 
-impl PartialEq<dyn Any> for Max {
-    fn eq(&self, other: &dyn Any) -> bool {
-        down_cast_any_ref(other)
-            .downcast_ref::<Self>()
-            .map(|x| {
-                self.name == x.name
-                    && self.data_type == x.data_type
-                    && self.nullable == x.nullable
-                    && self.expr.eq(&x.expr)
-            })
-            .unwrap_or(false)
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        get_min_max_result_type(arg_types)
+    }
+    fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
+        datafusion_expr::ReversedUDAF::Identical
     }
 }
 
-// Statically-typed version of min/max(array) -> ScalarValue for string types.
+// Statically-typed version of min/max(array) -> ScalarValue for string types
 macro_rules! typed_min_max_batch_string {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
         let array = downcast_value!($VALUES, $ARRAYTYPE);
@@ -296,8 +282,7 @@ macro_rules! typed_min_max_batch_string {
         ScalarValue::$SCALAR(value)
     }};
 }
-
-// Statically-typed version of min/max(array) -> ScalarValue for binary types.
+// Statically-typed version of min/max(array) -> ScalarValue for binay types.
 macro_rules! typed_min_max_batch_binary {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
         let array = downcast_value!($VALUES, $ARRAYTYPE);
@@ -545,7 +530,6 @@ macro_rules! typed_min_max {
         )
     }};
 }
-
 macro_rules! typed_min_max_float {
     ($VALUE:expr, $DELTA:expr, $SCALAR:ident, $OP:ident) => {{
         ScalarValue::$SCALAR(match ($VALUE, $DELTA) {
@@ -804,16 +788,6 @@ macro_rules! min_max {
     }};
 }
 
-/// the minimum of two scalar values
-pub fn min(lhs: &ScalarValue, rhs: &ScalarValue) -> Result<ScalarValue> {
-    min_max!(lhs, rhs, min)
-}
-
-/// the maximum of two scalar values
-pub fn max(lhs: &ScalarValue, rhs: &ScalarValue) -> Result<ScalarValue> {
-    min_max!(lhs, rhs, max)
-}
-
 /// An accumulator to compute the maximum value
 #[derive(Debug)]
 pub struct MaxAccumulator {
@@ -833,7 +807,9 @@ impl Accumulator for MaxAccumulator {
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
         let values = &values[0];
         let delta = &max_batch(values)?;
-        self.max = max(&self.max, delta)?;
+        let new_max: Result<ScalarValue, DataFusionError> =
+            min_max!(&self.max, delta, max);
+        self.max = new_max?;
         Ok(())
     }
 
@@ -842,9 +818,8 @@ impl Accumulator for MaxAccumulator {
     }
 
     fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.max.clone()])
+        Ok(vec![self.evaluate()?])
     }
-
     fn evaluate(&mut self) -> Result<ScalarValue> {
         Ok(self.max.clone())
     }
@@ -854,11 +829,10 @@ impl Accumulator for MaxAccumulator {
     }
 }
 
-/// An accumulator to compute the maximum value
 #[derive(Debug)]
 pub struct SlidingMaxAccumulator {
     max: ScalarValue,
-    moving_max: moving_min_max::MovingMax<ScalarValue>,
+    moving_max: MovingMax<ScalarValue>,
 }
 
 impl SlidingMaxAccumulator {
@@ -866,7 +840,7 @@ impl SlidingMaxAccumulator {
     pub fn try_new(datatype: &DataType) -> Result<Self> {
         Ok(Self {
             max: ScalarValue::try_from(datatype)?,
-            moving_max: moving_min_max::MovingMax::<ScalarValue>::new(),
+            moving_max: MovingMax::<ScalarValue>::new(),
         })
     }
 }
@@ -914,69 +888,56 @@ impl Accumulator for SlidingMaxAccumulator {
     }
 }
 
-/// MIN aggregate expression
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub struct Min {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    expr: Arc<dyn PhysicalExpr>,
+    signature: Signature,
+    aliases: Vec<String>,
 }
 
 impl Min {
-    /// Create a new MIN aggregate function
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        name: impl Into<String>,
-        data_type: DataType,
-    ) -> Self {
+    pub fn new() -> Self {
         Self {
-            name: name.into(),
-            expr,
-            data_type: min_max_aggregate_data_type(data_type),
-            nullable: true,
+            signature: Signature::user_defined(Volatility::Immutable),
+            aliases: vec!["min".to_owned()],
         }
     }
 }
 
-impl AggregateExpr for Min {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
+impl Default for Min {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl AggregateUDFImpl for Min {
+    fn as_any(&self) -> &dyn std::any::Any {
         self
     }
 
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(
-            &self.name,
-            self.data_type.clone(),
-            self.nullable,
-        ))
+    fn name(&self) -> &str {
+        "MIN"
     }
 
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(MinAccumulator::try_new(&self.data_type)?))
+    fn signature(&self) -> &Signature {
+        &self.signature
     }
 
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new(
-            format_state_name(&self.name, "min"),
-            self.data_type.clone(),
-            true,
-        )])
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        Ok(arg_types[0].to_owned())
     }
 
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![Arc::clone(&self.expr)]
+    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        Ok(Box::new(MinAccumulator::try_new(acc_args.data_type)?))
     }
 
-    fn name(&self) -> &str {
-        &self.name
+    fn aliases(&self) -> &[String] {
+        &self.aliases
     }
 
-    fn groups_accumulator_supported(&self) -> bool {
+    fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
         use DataType::*;
         matches!(
-            self.data_type,
+            args.data_type,
             Int8 | Int16
                 | Int32
                 | Int64
@@ -996,91 +957,92 @@ impl AggregateExpr for Min {
         )
     }
 
-    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+    fn create_groups_accumulator(
+        &self,
+        args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
         use DataType::*;
         use TimeUnit::*;
-        match self.data_type {
-            Int8 => instantiate_min_accumulator!(self, i8, Int8Type),
-            Int16 => instantiate_min_accumulator!(self, i16, Int16Type),
-            Int32 => instantiate_min_accumulator!(self, i32, Int32Type),
-            Int64 => instantiate_min_accumulator!(self, i64, Int64Type),
-            UInt8 => instantiate_min_accumulator!(self, u8, UInt8Type),
-            UInt16 => instantiate_min_accumulator!(self, u16, UInt16Type),
-            UInt32 => instantiate_min_accumulator!(self, u32, UInt32Type),
-            UInt64 => instantiate_min_accumulator!(self, u64, UInt64Type),
+        let data_type = args.data_type;
+        match data_type {
+            Int8 => instantiate_min_accumulator!(data_type, i8, Int8Type),
+            Int16 => instantiate_min_accumulator!(data_type, i16, Int16Type),
+            Int32 => instantiate_min_accumulator!(data_type, i32, Int32Type),
+            Int64 => instantiate_min_accumulator!(data_type, i64, Int64Type),
+            UInt8 => instantiate_min_accumulator!(data_type, u8, UInt8Type),
+            UInt16 => instantiate_min_accumulator!(data_type, u16, UInt16Type),
+            UInt32 => instantiate_min_accumulator!(data_type, u32, UInt32Type),
+            UInt64 => instantiate_min_accumulator!(data_type, u64, UInt64Type),
             Float32 => {
-                instantiate_min_accumulator!(self, f32, Float32Type)
+                instantiate_min_accumulator!(data_type, f32, Float32Type)
             }
             Float64 => {
-                instantiate_min_accumulator!(self, f64, Float64Type)
+                instantiate_min_accumulator!(data_type, f64, Float64Type)
             }
-            Date32 => instantiate_min_accumulator!(self, i32, Date32Type),
-            Date64 => instantiate_min_accumulator!(self, i64, Date64Type),
+            Date32 => instantiate_min_accumulator!(data_type, i32, Date32Type),
+            Date64 => instantiate_min_accumulator!(data_type, i64, Date64Type),
             Time32(Second) => {
-                instantiate_min_accumulator!(self, i32, Time32SecondType)
+                instantiate_min_accumulator!(data_type, i32, Time32SecondType)
             }
             Time32(Millisecond) => {
-                instantiate_min_accumulator!(self, i32, Time32MillisecondType)
+                instantiate_min_accumulator!(data_type, i32, Time32MillisecondType)
             }
             Time64(Microsecond) => {
-                instantiate_min_accumulator!(self, i64, Time64MicrosecondType)
+                instantiate_min_accumulator!(data_type, i64, Time64MicrosecondType)
             }
             Time64(Nanosecond) => {
-                instantiate_min_accumulator!(self, i64, Time64NanosecondType)
+                instantiate_min_accumulator!(data_type, i64, Time64NanosecondType)
             }
             Timestamp(Second, _) => {
-                instantiate_min_accumulator!(self, i64, TimestampSecondType)
+                instantiate_min_accumulator!(data_type, i64, TimestampSecondType)
             }
             Timestamp(Millisecond, _) => {
-                instantiate_min_accumulator!(self, i64, TimestampMillisecondType)
+                instantiate_min_accumulator!(data_type, i64, TimestampMillisecondType)
             }
             Timestamp(Microsecond, _) => {
-                instantiate_min_accumulator!(self, i64, TimestampMicrosecondType)
+                instantiate_min_accumulator!(data_type, i64, TimestampMicrosecondType)
             }
             Timestamp(Nanosecond, _) => {
-                instantiate_min_accumulator!(self, i64, TimestampNanosecondType)
+                instantiate_min_accumulator!(data_type, i64, TimestampNanosecondType)
             }
             Decimal128(_, _) => {
-                instantiate_min_accumulator!(self, i128, Decimal128Type)
+                instantiate_min_accumulator!(data_type, i128, Decimal128Type)
             }
             Decimal256(_, _) => {
-                instantiate_min_accumulator!(self, i256, Decimal256Type)
+                instantiate_min_accumulator!(data_type, i256, Decimal256Type)
             }
+
+            // It would be nice to have a fast implementation for Strings as well
+            // https://github.com/apache/datafusion/issues/6906
+
             // This is only reached if groups_accumulator_supported is out of sync
-            _ => internal_err!(
-                "GroupsAccumulator not supported for min({})",
-                self.data_type
-            ),
+            _ => internal_err!("GroupsAccumulator not supported for min({})", data_type),
         }
     }
 
-    fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
-        Some(Arc::new(self.clone()))
+    fn create_sliding_accumulator(
+        &self,
+        args: AccumulatorArgs,
+    ) -> Result<Box<dyn Accumulator>> {
+        Ok(Box::new(SlidingMinAccumulator::try_new(args.data_type)?))
     }
 
-    fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(SlidingMinAccumulator::try_new(&self.data_type)?))
+    fn is_descending(&self) -> Option<bool> {
+        Some(false)
     }
 
-    fn get_minmax_desc(&self) -> Option<(Field, bool)> {
-        Some((self.field().ok()?, false))
+    fn order_sensitivity(&self) -> datafusion_expr::utils::AggregateOrderSensitivity {
+        datafusion_expr::utils::AggregateOrderSensitivity::Insensitive
     }
-}
 
-impl PartialEq<dyn Any> for Min {
-    fn eq(&self, other: &dyn Any) -> bool {
-        down_cast_any_ref(other)
-            .downcast_ref::<Self>()
-            .map(|x| {
-                self.name == x.name
-                    && self.data_type == x.data_type
-                    && self.nullable == x.nullable
-                    && self.expr.eq(&x.expr)
-            })
-            .unwrap_or(false)
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        get_min_max_result_type(arg_types)
     }
-}
 
+    fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
+        datafusion_expr::ReversedUDAF::Identical
+    }
+}
 /// An accumulator to compute the minimum value
 #[derive(Debug)]
 pub struct MinAccumulator {
@@ -1098,13 +1060,15 @@ impl MinAccumulator {
 
 impl Accumulator for MinAccumulator {
     fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.min.clone()])
+        Ok(vec![self.evaluate()?])
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
         let values = &values[0];
         let delta = &min_batch(values)?;
-        self.min = min(&self.min, delta)?;
+        let new_min: Result<ScalarValue, DataFusionError> =
+            min_max!(&self.min, delta, min);
+        self.min = new_min?;
         Ok(())
     }
 
@@ -1121,19 +1085,17 @@ impl Accumulator for MinAccumulator {
     }
 }
 
-/// An accumulator to compute the minimum value
 #[derive(Debug)]
 pub struct SlidingMinAccumulator {
     min: ScalarValue,
-    moving_min: moving_min_max::MovingMin<ScalarValue>,
+    moving_min: MovingMin<ScalarValue>,
 }
 
 impl SlidingMinAccumulator {
-    /// new min accumulator
     pub fn try_new(datatype: &DataType) -> Result<Self> {
         Ok(Self {
             min: ScalarValue::try_from(datatype)?,
-            moving_min: moving_min_max::MovingMin::<ScalarValue>::new(),
+            moving_min: MovingMin::<ScalarValue>::new(),
         })
     }
 }
@@ -1186,12 +1148,278 @@ impl Accumulator for SlidingMinAccumulator {
     }
 }
 
+//
+// Moving min and moving max
+// The implementation is taken from https://github.com/spebern/moving_min_max/blob/master/src/lib.rs.
+
+// Keep track of the minimum or maximum value in a sliding window.
+//
+// `moving min max` provides one data structure for keeping track of the
+// minimum value and one for keeping track of the maximum value in a sliding
+// window.
+//
+// Each element is stored with the current min/max. One stack to push and another one for pop. If pop stack is empty,
+// push to this stack all elements popped from first stack while updating their current min/max. Now pop from
+// the second stack (MovingMin/Max struct works as a queue). To find the minimum element of the queue,
+// look at the smallest/largest two elements of the individual stacks, then take the minimum of those two values.
+//
+// The complexity of the operations are
+// - O(1) for getting the minimum/maximum
+// - O(1) for push
+// - amortized O(1) for pop
+
+/// ```
+/// # use datafusion_functions_aggregate::min_max::MovingMin;
+/// let mut moving_min = MovingMin::<i32>::new();
+/// moving_min.push(2);
+/// moving_min.push(1);
+/// moving_min.push(3);
+///
+/// assert_eq!(moving_min.min(), Some(&1));
+/// assert_eq!(moving_min.pop(), Some(2));
+///
+/// assert_eq!(moving_min.min(), Some(&1));
+/// assert_eq!(moving_min.pop(), Some(1));
+///
+/// assert_eq!(moving_min.min(), Some(&3));
+/// assert_eq!(moving_min.pop(), Some(3));
+///
+/// assert_eq!(moving_min.min(), None);
+/// assert_eq!(moving_min.pop(), None);
+/// ```
+#[derive(Debug)]
+pub struct MovingMin<T> {
+    push_stack: Vec<(T, T)>,
+    pop_stack: Vec<(T, T)>,
+}
+
+impl<T: Clone + PartialOrd> Default for MovingMin<T> {
+    fn default() -> Self {
+        Self {
+            push_stack: Vec::new(),
+            pop_stack: Vec::new(),
+        }
+    }
+}
+
+impl<T: Clone + PartialOrd> MovingMin<T> {
+    /// Creates a new `MovingMin` to keep track of the minimum in a sliding
+    /// window.
+    #[inline]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Creates a new `MovingMin` to keep track of the minimum in a sliding
+    /// window with `capacity` allocated slots.
+    #[inline]
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            push_stack: Vec::with_capacity(capacity),
+            pop_stack: Vec::with_capacity(capacity),
+        }
+    }
+
+    /// Returns the minimum of the sliding window or `None` if the window is
+    /// empty.
+    #[inline]
+    pub fn min(&self) -> Option<&T> {
+        match (self.push_stack.last(), self.pop_stack.last()) {
+            (None, None) => None,
+            (Some((_, min)), None) => Some(min),
+            (None, Some((_, min))) => Some(min),
+            (Some((_, a)), Some((_, b))) => Some(if a < b { a } else { b }),
+        }
+    }
+
+    /// Pushes a new element into the sliding window.
+    #[inline]
+    pub fn push(&mut self, val: T) {
+        self.push_stack.push(match self.push_stack.last() {
+            Some((_, min)) => {
+                if val > *min {
+                    (val, min.clone())
+                } else {
+                    (val.clone(), val)
+                }
+            }
+            None => (val.clone(), val),
+        });
+    }
+
+    /// Removes and returns the last value of the sliding window.
+    #[inline]
+    pub fn pop(&mut self) -> Option<T> {
+        if self.pop_stack.is_empty() {
+            match self.push_stack.pop() {
+                Some((val, _)) => {
+                    let mut last = (val.clone(), val);
+                    self.pop_stack.push(last.clone());
+                    while let Some((val, _)) = self.push_stack.pop() {
+                        let min = if last.1 < val {
+                            last.1.clone()
+                        } else {
+                            val.clone()
+                        };
+                        last = (val.clone(), min);
+                        self.pop_stack.push(last.clone());
+                    }
+                }
+                None => return None,
+            }
+        }
+        self.pop_stack.pop().map(|(val, _)| val)
+    }
+
+    /// Returns the number of elements stored in the sliding window.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.push_stack.len() + self.pop_stack.len()
+    }
+
+    /// Returns `true` if the moving window contains no elements.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+/// ```
+/// # use datafusion_functions_aggregate::min_max::MovingMax;
+/// let mut moving_max = MovingMax::<i32>::new();
+/// moving_max.push(2);
+/// moving_max.push(3);
+/// moving_max.push(1);
+///
+/// assert_eq!(moving_max.max(), Some(&3));
+/// assert_eq!(moving_max.pop(), Some(2));
+///
+/// assert_eq!(moving_max.max(), Some(&3));
+/// assert_eq!(moving_max.pop(), Some(3));
+///
+/// assert_eq!(moving_max.max(), Some(&1));
+/// assert_eq!(moving_max.pop(), Some(1));
+///
+/// assert_eq!(moving_max.max(), None);
+/// assert_eq!(moving_max.pop(), None);
+/// ```
+#[derive(Debug)]
+pub struct MovingMax<T> {
+    push_stack: Vec<(T, T)>,
+    pop_stack: Vec<(T, T)>,
+}
+
+impl<T: Clone + PartialOrd> Default for MovingMax<T> {
+    fn default() -> Self {
+        Self {
+            push_stack: Vec::new(),
+            pop_stack: Vec::new(),
+        }
+    }
+}
+
+impl<T: Clone + PartialOrd> MovingMax<T> {
+    /// Creates a new `MovingMax` to keep track of the maximum in a sliding window.
+    #[inline]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Creates a new `MovingMax` to keep track of the maximum in a sliding window with
+    /// `capacity` allocated slots.
+    #[inline]
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            push_stack: Vec::with_capacity(capacity),
+            pop_stack: Vec::with_capacity(capacity),
+        }
+    }
+
+    /// Returns the maximum of the sliding window or `None` if the window is empty.
+    #[inline]
+    pub fn max(&self) -> Option<&T> {
+        match (self.push_stack.last(), self.pop_stack.last()) {
+            (None, None) => None,
+            (Some((_, max)), None) => Some(max),
+            (None, Some((_, max))) => Some(max),
+            (Some((_, a)), Some((_, b))) => Some(if a > b { a } else { b }),
+        }
+    }
+
+    /// Pushes a new element into the sliding window.
+    #[inline]
+    pub fn push(&mut self, val: T) {
+        self.push_stack.push(match self.push_stack.last() {
+            Some((_, max)) => {
+                if val < *max {
+                    (val, max.clone())
+                } else {
+                    (val.clone(), val)
+                }
+            }
+            None => (val.clone(), val),
+        });
+    }
+
+    /// Removes and returns the last value of the sliding window.
+    #[inline]
+    pub fn pop(&mut self) -> Option<T> {
+        if self.pop_stack.is_empty() {
+            match self.push_stack.pop() {
+                Some((val, _)) => {
+                    let mut last = (val.clone(), val);
+                    self.pop_stack.push(last.clone());
+                    while let Some((val, _)) = self.push_stack.pop() {
+                        let max = if last.1 > val {
+                            last.1.clone()
+                        } else {
+                            val.clone()
+                        };
+                        last = (val.clone(), max);
+                        self.pop_stack.push(last.clone());
+                    }
+                }
+                None => return None,
+            }
+        }
+        self.pop_stack.pop().map(|(val, _)| val)
+    }
+
+    /// Returns the number of elements stored in the sliding window.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.push_stack.len() + self.pop_stack.len()
+    }
+
+    /// Returns `true` if the moving window contains no elements.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+make_udaf_expr_and_func!(
+    Max,
+    max,
+    expression,
+    "Returns the maximum of a group of values.",
+    max_udaf
+);
+
+make_udaf_expr_and_func!(
+    Min,
+    min,
+    expression,
+    "Returns the minimum of a group of values.",
+    min_udaf
+);
+
 #[cfg(test)]
 mod tests {
     use super::*;
     use arrow::datatypes::{
         IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
     };
+    use std::sync::Arc;
 
     #[test]
     fn interval_min_max() {
@@ -1324,4 +1552,100 @@ mod tests {
         check(&mut max(), &[&[zero], &[neg_inf]], zero);
         check(&mut max(), &[&[zero, neg_inf]], zero);
     }
+
+    use datafusion_common::Result;
+    use rand::Rng;
+
+    fn get_random_vec_i32(len: usize) -> Vec<i32> {
+        let mut rng = rand::thread_rng();
+        let mut input = Vec::with_capacity(len);
+        for _i in 0..len {
+            input.push(rng.gen_range(0..100));
+        }
+        input
+    }
+
+    fn moving_min_i32(len: usize, n_sliding_window: usize) -> Result<()> {
+        let data = get_random_vec_i32(len);
+        let mut expected = Vec::with_capacity(len);
+        let mut moving_min = MovingMin::<i32>::new();
+        let mut res = Vec::with_capacity(len);
+        for i in 0..len {
+            let start = i.saturating_sub(n_sliding_window);
+            expected.push(*data[start..i + 1].iter().min().unwrap());
+
+            moving_min.push(data[i]);
+            if i > n_sliding_window {
+                moving_min.pop();
+            }
+            res.push(*moving_min.min().unwrap());
+        }
+        assert_eq!(res, expected);
+        Ok(())
+    }
+
+    fn moving_max_i32(len: usize, n_sliding_window: usize) -> Result<()> {
+        let data = get_random_vec_i32(len);
+        let mut expected = Vec::with_capacity(len);
+        let mut moving_max = MovingMax::<i32>::new();
+        let mut res = Vec::with_capacity(len);
+        for i in 0..len {
+            let start = i.saturating_sub(n_sliding_window);
+            expected.push(*data[start..i + 1].iter().max().unwrap());
+
+            moving_max.push(data[i]);
+            if i > n_sliding_window {
+                moving_max.pop();
+            }
+            res.push(*moving_max.max().unwrap());
+        }
+        assert_eq!(res, expected);
+        Ok(())
+    }
+
+    #[test]
+    fn moving_min_tests() -> Result<()> {
+        moving_min_i32(100, 10)?;
+        moving_min_i32(100, 20)?;
+        moving_min_i32(100, 50)?;
+        moving_min_i32(100, 100)?;
+        Ok(())
+    }
+
+    #[test]
+    fn moving_max_tests() -> Result<()> {
+        moving_max_i32(100, 10)?;
+        moving_max_i32(100, 20)?;
+        moving_max_i32(100, 50)?;
+        moving_max_i32(100, 100)?;
+        Ok(())
+    }
+
+    #[test]
+    fn test_min_max_coerce_types() {
+        // the coerced types is same with input types
+        let funs: Vec<Box<dyn AggregateUDFImpl>> =
+            vec![Box::new(Min::new()), Box::new(Max::new())];
+        let input_types = vec![
+            vec![DataType::Int32],
+            vec![DataType::Decimal128(10, 2)],
+            vec![DataType::Decimal256(1, 1)],
+            vec![DataType::Utf8],
+        ];
+        for fun in funs {
+            for input_type in &input_types {
+                let result = fun.coerce_types(input_type);
+                assert_eq!(*input_type, result.unwrap());
+            }
+        }
+    }
+
+    #[test]
+    fn test_get_min_max_return_type_coerce_dictionary() -> Result<()> {
+        let data_type =
+            DataType::Dictionary(Box::new(DataType::Utf8), Box::new(DataType::Int32));
+        let result = get_min_max_result_type(&[data_type])?;
+        assert_eq!(result, vec![DataType::Int32]);
+        Ok(())
+    }
 }
diff --git a/datafusion/functions-nested/src/planner.rs b/datafusion/functions-nested/src/planner.rs
index 97c54cc77bebd..fee3e83a0d656 100644
--- a/datafusion/functions-nested/src/planner.rs
+++ b/datafusion/functions-nested/src/planner.rs
@@ -171,9 +171,6 @@ impl ExprPlanner for FieldAccessPlanner {
 }
 
 fn is_array_agg(agg_func: &datafusion_expr::expr::AggregateFunction) -> bool {
-    if let AggregateFunctionDefinition::UDF(udf) = &agg_func.func_def {
-        return udf.name() == "array_agg";
-    }
-
-    false
+    let AggregateFunctionDefinition::UDF(udf) = &agg_func.func_def;
+    return udf.name() == "array_agg";
 }
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index 338268e299da7..6f832966671c7 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -103,11 +103,11 @@ mod tests {
     use datafusion_expr::expr::Sort;
     use datafusion_expr::ExprFunctionExt;
     use datafusion_expr::{
-        col, exists, expr, in_subquery, logical_plan::LogicalPlanBuilder, max,
-        out_ref_col, scalar_subquery, wildcard, WindowFrame, WindowFrameBound,
-        WindowFrameUnits,
+        col, exists, expr, in_subquery, logical_plan::LogicalPlanBuilder, out_ref_col,
+        scalar_subquery, wildcard, WindowFrame, WindowFrameBound, WindowFrameUnits,
     };
     use datafusion_functions_aggregate::count::count_udaf;
+    use datafusion_functions_aggregate::expr_fn::max;
     use std::sync::Arc;
 
     use datafusion_functions_aggregate::expr_fn::{count, sum};
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 75dbb4d1adcd3..bcd1cbcce23e3 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -47,9 +47,8 @@ use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_large_utf8};
 use datafusion_expr::utils::merge_schema;
 use datafusion_expr::{
     is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not,
-    type_coercion, AggregateFunction, AggregateUDF, Expr, ExprFunctionExt, ExprSchemable,
-    LogicalPlan, Operator, ScalarUDF, Signature, WindowFrame, WindowFrameBound,
-    WindowFrameUnits,
+    AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, Operator, ScalarUDF,
+    WindowFrame, WindowFrameBound, WindowFrameUnits,
 };
 
 use crate::analyzer::AnalyzerRule;
@@ -401,24 +400,6 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> {
                 order_by,
                 null_treatment,
             }) => match func_def {
-                AggregateFunctionDefinition::BuiltIn(fun) => {
-                    let new_expr = coerce_agg_exprs_for_signature(
-                        &fun,
-                        args,
-                        self.schema,
-                        &fun.signature(),
-                    )?;
-                    Ok(Transformed::yes(Expr::AggregateFunction(
-                        expr::AggregateFunction::new(
-                            fun,
-                            new_expr,
-                            distinct,
-                            filter,
-                            order_by,
-                            null_treatment,
-                        ),
-                    )))
-                }
                 AggregateFunctionDefinition::UDF(fun) => {
                     let new_expr = coerce_arguments_for_signature_with_aggregate_udf(
                         args,
@@ -449,14 +430,6 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> {
                     coerce_window_frame(window_frame, self.schema, &order_by)?;
 
                 let args = match &fun {
-                    expr::WindowFunctionDefinition::AggregateFunction(fun) => {
-                        coerce_agg_exprs_for_signature(
-                            fun,
-                            args,
-                            self.schema,
-                            &fun.signature(),
-                        )?
-                    }
                     expr::WindowFunctionDefinition::AggregateUDF(udf) => {
                         coerce_arguments_for_signature_with_aggregate_udf(
                             args,
@@ -692,33 +665,6 @@ fn coerce_arguments_for_fun(
     }
 }
 
-/// Returns the coerced exprs for each `input_exprs`.
-/// Get the coerced data type from `aggregate_rule::coerce_types` and add `try_cast` if the
-/// data type of `input_exprs` need to be coerced.
-fn coerce_agg_exprs_for_signature(
-    agg_fun: &AggregateFunction,
-    input_exprs: Vec<Expr>,
-    schema: &DFSchema,
-    signature: &Signature,
-) -> Result<Vec<Expr>> {
-    if input_exprs.is_empty() {
-        return Ok(input_exprs);
-    }
-    let current_types = input_exprs
-        .iter()
-        .map(|e| e.get_type(schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    let coerced_types =
-        type_coercion::aggregates::coerce_types(agg_fun, &current_types, signature)?;
-
-    input_exprs
-        .into_iter()
-        .enumerate()
-        .map(|(i, expr)| expr.cast_to(&coerced_types[i], schema))
-        .collect()
-}
-
 fn coerce_case_expression(case: Case, schema: &DFSchema) -> Result<Case> {
     // Given expressions like:
     //
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index c998e8442548c..6dbf1641bd7cb 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -436,9 +436,6 @@ fn agg_exprs_evaluation_result_on_empty_batch(
                     Expr::AggregateFunction(expr::AggregateFunction {
                         func_def, ..
                     }) => match func_def {
-                        AggregateFunctionDefinition::BuiltIn(_fun) => {
-                            Transformed::yes(Expr::Literal(ScalarValue::Null))
-                        }
                         AggregateFunctionDefinition::UDF(fun) => {
                             if fun.name() == "count" {
                                 Transformed::yes(Expr::Literal(ScalarValue::Int64(Some(
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 16abf93f38073..31d59da13323f 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -814,13 +814,13 @@ mod tests {
         expr::{self, Cast},
         lit,
         logical_plan::{builder::LogicalPlanBuilder, table_scan},
-        max, min, not, try_cast, when, AggregateFunction, BinaryExpr, Expr, Extension,
-        Like, LogicalPlan, Operator, Projection, UserDefinedLogicalNodeCore,
-        WindowFunctionDefinition,
+        not, try_cast, when, BinaryExpr, Expr, Extension, Like, LogicalPlan, Operator,
+        Projection, UserDefinedLogicalNodeCore, WindowFunctionDefinition,
     };
 
     use datafusion_functions_aggregate::count::count_udaf;
-    use datafusion_functions_aggregate::expr_fn::count;
+    use datafusion_functions_aggregate::expr_fn::{count, max, min};
+    use datafusion_functions_aggregate::min_max::max_udaf;
 
     fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
         assert_optimized_plan_eq(Arc::new(OptimizeProjections::new()), plan, expected)
@@ -1917,7 +1917,7 @@ mod tests {
         let table_scan = test_table_scan()?;
 
         let max1 = Expr::WindowFunction(expr::WindowFunction::new(
-            WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
+            WindowFunctionDefinition::AggregateUDF(max_udaf()),
             vec![col("test.a")],
         ))
         .partition_by(vec![col("test.b")])
@@ -1925,7 +1925,7 @@ mod tests {
         .unwrap();
 
         let max2 = Expr::WindowFunction(expr::WindowFunction::new(
-            WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
+            WindowFunctionDefinition::AggregateUDF(max_udaf()),
             vec![col("test.b")],
         ));
         let col1 = col(max1.display_name()?);
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index 79980f8fc9ec9..d7da3871ee89b 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -321,8 +321,8 @@ mod test {
 
     use super::*;
     use crate::test::*;
-
-    use datafusion_expr::{col, exists, logical_plan::builder::LogicalPlanBuilder, max};
+    use datafusion_expr::{col, exists, logical_plan::builder::LogicalPlanBuilder};
+    use datafusion_functions_aggregate::expr_fn::max;
 
     fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
         assert_optimized_plan_eq(Arc::new(PushDownLimit::new()), plan, expected)
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 35691847fb8e9..fbec675f6fc48 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -394,7 +394,9 @@ mod tests {
 
     use arrow::datatypes::DataType;
     use datafusion_expr::test::function_stub::sum;
-    use datafusion_expr::{col, lit, max, min, out_ref_col, scalar_subquery, Between};
+
+    use datafusion_expr::{col, lit, out_ref_col, scalar_subquery, Between};
+    use datafusion_functions_aggregate::min_max::{max, min};
 
     /// Test multiple correlated subqueries
     #[test]
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index e650d4c09c23f..e44f60d1df220 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -160,6 +160,7 @@ mod tests {
         ExprSchemable, JoinType,
     };
     use datafusion_expr::{or, BinaryExpr, Cast, Operator};
+    use datafusion_functions_aggregate::expr_fn::{max, min};
 
     use crate::test::{assert_fields_eq, test_table_scan_with_name};
     use crate::OptimizerContext;
@@ -395,10 +396,7 @@ mod tests {
             .project(vec![col("a"), col("c"), col("b")])?
             .aggregate(
                 vec![col("a"), col("c")],
-                vec![
-                    datafusion_expr::max(col("b").eq(lit(true))),
-                    datafusion_expr::min(col("b")),
-                ],
+                vec![max(col("b").eq(lit(true))), min(col("b"))],
             )?
             .build()?;
 
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index d776e6598cbe7..69c1b505727d8 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -28,7 +28,6 @@ use datafusion_common::{
 use datafusion_expr::builder::project;
 use datafusion_expr::expr::AggregateFunctionDefinition;
 use datafusion_expr::{
-    aggregate_function::AggregateFunction::{Max, Min},
     col,
     expr::AggregateFunction,
     logical_plan::{Aggregate, LogicalPlan},
@@ -71,26 +70,6 @@ fn is_single_distinct_agg(aggr_expr: &[Expr]) -> Result<bool> {
     let mut aggregate_count = 0;
     for expr in aggr_expr {
         if let Expr::AggregateFunction(AggregateFunction {
-            func_def: AggregateFunctionDefinition::BuiltIn(fun),
-            distinct,
-            args,
-            filter,
-            order_by,
-            null_treatment: _,
-        }) = expr
-        {
-            if filter.is_some() || order_by.is_some() {
-                return Ok(false);
-            }
-            aggregate_count += 1;
-            if *distinct {
-                for e in args {
-                    fields_set.insert(e);
-                }
-            } else if !matches!(fun, Min | Max) {
-                return Ok(false);
-            }
-        } else if let Expr::AggregateFunction(AggregateFunction {
             func_def: AggregateFunctionDefinition::UDF(fun),
             distinct,
             args,
@@ -107,7 +86,10 @@ fn is_single_distinct_agg(aggr_expr: &[Expr]) -> Result<bool> {
                 for e in args {
                     fields_set.insert(e);
                 }
-            } else if fun.name() != "sum" && fun.name() != "MIN" && fun.name() != "MAX" {
+            } else if fun.name() != "sum"
+                && fun.name().to_lowercase() != "min"
+                && fun.name().to_lowercase() != "max"
+            {
                 return Ok(false);
             }
         } else {
@@ -173,6 +155,7 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                             //
                             // First aggregate(from bottom) refers to `test.a` column.
                             // Second aggregate refers to the `group_alias_0` column, Which is a valid field in the first aggregate.
+
                             // If we were to write plan above as below without alias
                             //
                             // Aggregate: groupBy=[[test.a + Int32(1)]], aggr=[[count(alias1)]] [group_alias_0:Int32, count(alias1):Int64;N]\
@@ -200,55 +183,6 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                 let outer_aggr_exprs = aggr_expr
                     .into_iter()
                     .map(|aggr_expr| match aggr_expr {
-                        Expr::AggregateFunction(AggregateFunction {
-                            func_def: AggregateFunctionDefinition::BuiltIn(fun),
-                            mut args,
-                            distinct,
-                            ..
-                        }) => {
-                            if distinct {
-                                if args.len() != 1 {
-                                    return internal_err!("DISTINCT aggregate should have exactly one argument");
-                                }
-                                let arg = args.swap_remove(0);
-
-                                if group_fields_set.insert(arg.display_name()?) {
-                                    inner_group_exprs
-                                        .push(arg.alias(SINGLE_DISTINCT_ALIAS));
-                                }
-                                Ok(Expr::AggregateFunction(AggregateFunction::new(
-                                    fun,
-                                    vec![col(SINGLE_DISTINCT_ALIAS)],
-                                    false, // intentional to remove distinct here
-                                    None,
-                                    None,
-                                    None,
-                                )))
-                                // if the aggregate function is not distinct, we need to rewrite it like two phase aggregation
-                            } else {
-                                index += 1;
-                                let alias_str = format!("alias{}", index);
-                                inner_aggr_exprs.push(
-                                    Expr::AggregateFunction(AggregateFunction::new(
-                                        fun.clone(),
-                                        args,
-                                        false,
-                                        None,
-                                        None,
-                                        None,
-                                    ))
-                                    .alias(&alias_str),
-                                );
-                                Ok(Expr::AggregateFunction(AggregateFunction::new(
-                                    fun,
-                                    vec![col(&alias_str)],
-                                    false,
-                                    None,
-                                    None,
-                                    None,
-                                )))
-                            }
-                        }
                         Expr::AggregateFunction(AggregateFunction {
                             func_def: AggregateFunctionDefinition::UDF(udf),
                             mut args,
@@ -355,13 +289,23 @@ mod tests {
     use crate::test::*;
     use datafusion_expr::expr::{self, GroupingSet};
     use datafusion_expr::ExprFunctionExt;
-    use datafusion_expr::{
-        lit, logical_plan::builder::LogicalPlanBuilder, max, min, AggregateFunction,
-    };
+    use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder};
     use datafusion_functions_aggregate::count::count_udaf;
-    use datafusion_functions_aggregate::expr_fn::{count, count_distinct, sum};
+    use datafusion_functions_aggregate::expr_fn::{count, count_distinct, max, min, sum};
+    use datafusion_functions_aggregate::min_max::max_udaf;
     use datafusion_functions_aggregate::sum::sum_udaf;
 
+    fn max_distinct(expr: Expr) -> Expr {
+        Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction::new_udf(
+            max_udaf(),
+            vec![expr],
+            true,
+            None,
+            None,
+            None,
+        ))
+    }
+
     fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
         assert_optimized_plan_eq_display_indent(
             Arc::new(SingleDistinctToGroupBy::new()),
@@ -520,17 +464,7 @@ mod tests {
         let plan = LogicalPlanBuilder::from(table_scan)
             .aggregate(
                 vec![col("a")],
-                vec![
-                    count_distinct(col("b")),
-                    Expr::AggregateFunction(expr::AggregateFunction::new(
-                        AggregateFunction::Max,
-                        vec![col("b")],
-                        true,
-                        None,
-                        None,
-                        None,
-                    )),
-                ],
+                vec![count_distinct(col("b")), max_distinct(col("b"))],
             )?
             .build()?;
         // Should work
@@ -587,14 +521,7 @@ mod tests {
                 vec![
                     sum(col("c")),
                     count_distinct(col("b")),
-                    Expr::AggregateFunction(expr::AggregateFunction::new(
-                        AggregateFunction::Max,
-                        vec![col("b")],
-                        true,
-                        None,
-                        None,
-                        None,
-                    )),
+                    max_distinct(col("b")),
                 ],
             )?
             .build()?;
diff --git a/datafusion/physical-expr/src/aggregate/build_in.rs b/datafusion/physical-expr/src/aggregate/build_in.rs
deleted file mode 100644
index bdc41ff0a9bc2..0000000000000
--- a/datafusion/physical-expr/src/aggregate/build_in.rs
+++ /dev/null
@@ -1,208 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Declaration of built-in (aggregate) functions.
-//! This module contains built-in aggregates' enumeration and metadata.
-//!
-//! Generally, an aggregate has:
-//! * a signature
-//! * a return type, that is a function of the incoming argument's types
-//! * the computation, that must accept each valid signature
-//!
-//! * Signature: see `Signature`
-//! * Return type: a function `(arg_types) -> return_type`. E.g. for min, ([f32]) -> f32, ([f64]) -> f64.
-
-use std::sync::Arc;
-
-use arrow::datatypes::Schema;
-
-use datafusion_common::Result;
-use datafusion_expr::AggregateFunction;
-
-use crate::expressions::{self};
-use crate::{AggregateExpr, PhysicalExpr, PhysicalSortExpr};
-
-/// Create a physical aggregation expression.
-/// This function errors when `input_phy_exprs`' can't be coerced to a valid argument type of the aggregation function.
-pub fn create_aggregate_expr(
-    fun: &AggregateFunction,
-    distinct: bool,
-    input_phy_exprs: &[Arc<dyn PhysicalExpr>],
-    _ordering_req: &[PhysicalSortExpr],
-    input_schema: &Schema,
-    name: impl Into<String>,
-    _ignore_nulls: bool,
-) -> Result<Arc<dyn AggregateExpr>> {
-    let name = name.into();
-    // get the result data type for this aggregate function
-    let input_phy_types = input_phy_exprs
-        .iter()
-        .map(|e| e.data_type(input_schema))
-        .collect::<Result<Vec<_>>>()?;
-    let data_type = input_phy_types[0].clone();
-    let input_phy_exprs = input_phy_exprs.to_vec();
-    Ok(match (fun, distinct) {
-        (AggregateFunction::Min, _) => Arc::new(expressions::Min::new(
-            Arc::clone(&input_phy_exprs[0]),
-            name,
-            data_type,
-        )),
-        (AggregateFunction::Max, _) => Arc::new(expressions::Max::new(
-            Arc::clone(&input_phy_exprs[0]),
-            name,
-            data_type,
-        )),
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::datatypes::{DataType, Field};
-
-    use datafusion_common::plan_err;
-    use datafusion_expr::{type_coercion, Signature};
-
-    use crate::expressions::{try_cast, Max, Min};
-
-    use super::*;
-
-    #[test]
-    fn test_min_max_expr() -> Result<()> {
-        let funcs = vec![AggregateFunction::Min, AggregateFunction::Max];
-        let data_types = vec![
-            DataType::UInt32,
-            DataType::Int32,
-            DataType::Float32,
-            DataType::Float64,
-            DataType::Decimal128(10, 2),
-            DataType::Utf8,
-        ];
-        for fun in funcs {
-            for data_type in &data_types {
-                let input_schema =
-                    Schema::new(vec![Field::new("c1", data_type.clone(), true)]);
-                let input_phy_exprs: Vec<Arc<dyn PhysicalExpr>> = vec![Arc::new(
-                    expressions::Column::new_with_schema("c1", &input_schema).unwrap(),
-                )];
-                let result_agg_phy_exprs = create_physical_agg_expr_for_test(
-                    &fun,
-                    false,
-                    &input_phy_exprs[0..1],
-                    &input_schema,
-                    "c1",
-                )?;
-                match fun {
-                    AggregateFunction::Min => {
-                        assert!(result_agg_phy_exprs.as_any().is::<Min>());
-                        assert_eq!("c1", result_agg_phy_exprs.name());
-                        assert_eq!(
-                            Field::new("c1", data_type.clone(), true),
-                            result_agg_phy_exprs.field().unwrap()
-                        );
-                    }
-                    AggregateFunction::Max => {
-                        assert!(result_agg_phy_exprs.as_any().is::<Max>());
-                        assert_eq!("c1", result_agg_phy_exprs.name());
-                        assert_eq!(
-                            Field::new("c1", data_type.clone(), true),
-                            result_agg_phy_exprs.field().unwrap()
-                        );
-                    }
-                };
-            }
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn test_min_max() -> Result<()> {
-        let observed = AggregateFunction::Min.return_type(&[DataType::Utf8], &[true])?;
-        assert_eq!(DataType::Utf8, observed);
-
-        let observed = AggregateFunction::Max.return_type(&[DataType::Int32], &[true])?;
-        assert_eq!(DataType::Int32, observed);
-
-        // test decimal for min
-        let observed = AggregateFunction::Min
-            .return_type(&[DataType::Decimal128(10, 6)], &[true])?;
-        assert_eq!(DataType::Decimal128(10, 6), observed);
-
-        // test decimal for max
-        let observed = AggregateFunction::Max
-            .return_type(&[DataType::Decimal128(28, 13)], &[true])?;
-        assert_eq!(DataType::Decimal128(28, 13), observed);
-
-        Ok(())
-    }
-
-    // Helper function
-    // Create aggregate expr with type coercion
-    fn create_physical_agg_expr_for_test(
-        fun: &AggregateFunction,
-        distinct: bool,
-        input_phy_exprs: &[Arc<dyn PhysicalExpr>],
-        input_schema: &Schema,
-        name: impl Into<String>,
-    ) -> Result<Arc<dyn AggregateExpr>> {
-        let name = name.into();
-        let coerced_phy_exprs =
-            coerce_exprs_for_test(fun, input_phy_exprs, input_schema, &fun.signature())?;
-        if coerced_phy_exprs.is_empty() {
-            return plan_err!(
-                "Invalid or wrong number of arguments passed to aggregate: '{name}'"
-            );
-        }
-        create_aggregate_expr(
-            fun,
-            distinct,
-            &coerced_phy_exprs,
-            &[],
-            input_schema,
-            name,
-            false,
-        )
-    }
-
-    // Returns the coerced exprs for each `input_exprs`.
-    // Get the coerced data type from `aggregate_rule::coerce_types` and add `try_cast` if the
-    // data type of `input_exprs` need to be coerced.
-    fn coerce_exprs_for_test(
-        agg_fun: &AggregateFunction,
-        input_exprs: &[Arc<dyn PhysicalExpr>],
-        schema: &Schema,
-        signature: &Signature,
-    ) -> Result<Vec<Arc<dyn PhysicalExpr>>> {
-        if input_exprs.is_empty() {
-            return Ok(vec![]);
-        }
-        let input_types = input_exprs
-            .iter()
-            .map(|e| e.data_type(schema))
-            .collect::<Result<Vec<_>>>()?;
-
-        // get the coerced data types
-        let coerced_types =
-            type_coercion::aggregates::coerce_types(agg_fun, &input_types, signature)?;
-
-        // try cast if need
-        input_exprs
-            .iter()
-            .zip(coerced_types)
-            .map(|(expr, coerced_type)| try_cast(Arc::clone(expr), schema, coerced_type))
-            .collect::<Result<Vec<_>>>()
-    }
-}
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
index 1944e2b2d4159..3c0f3a28fedbc 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
@@ -25,7 +25,3 @@ pub(crate) mod accumulate {
 }
 
 pub use datafusion_physical_expr_common::aggregate::groups_accumulator::accumulate::NullState;
-
-pub(crate) mod prim_op {
-    pub use datafusion_physical_expr_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
-}
diff --git a/datafusion/physical-expr/src/aggregate/mod.rs b/datafusion/physical-expr/src/aggregate/mod.rs
index 264c485130505..0760986a87c6d 100644
--- a/datafusion/physical-expr/src/aggregate/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/mod.rs
@@ -15,12 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#[macro_use]
-pub(crate) mod min_max;
 pub(crate) mod groups_accumulator;
 pub(crate) mod stats;
 
-pub mod build_in;
 pub mod moving_min_max;
 pub mod utils {
     pub use datafusion_physical_expr_common::aggregate::utils::{
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index 7cbe4e7968445..cbb697b5f3041 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -31,11 +31,6 @@ mod try_cast;
 mod unknown_column;
 
 /// Module with some convenient methods used in expression building
-pub mod helpers {
-    pub use crate::aggregate::min_max::{max, min};
-}
-pub use crate::aggregate::build_in::create_aggregate_expr;
-pub use crate::aggregate::min_max::{Max, MaxAccumulator, Min, MinAccumulator};
 pub use crate::aggregate::stats::StatsType;
 pub use crate::window::cume_dist::{cume_dist, CumeDist};
 pub use crate::window::lead_lag::{lag, lead, WindowShift};
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index d1152038eb2a2..43f9f98283bb1 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -55,9 +55,6 @@ mod row_hash;
 mod topk;
 mod topk_stream;
 
-pub use datafusion_expr::AggregateFunction;
-pub use datafusion_physical_expr::expressions::create_aggregate_expr;
-
 /// Hash aggregate modes
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub enum AggregateMode {
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index a462430ca3819..65cef28efc451 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -21,7 +21,6 @@ use std::borrow::Borrow;
 use std::sync::Arc;
 
 use crate::{
-    aggregates,
     expressions::{
         cume_dist, dense_rank, lag, lead, percent_rank, rank, Literal, NthValue, Ntile,
         PhysicalSortExpr, RowNumber,
@@ -104,23 +103,6 @@ pub fn create_window_expr(
     ignore_nulls: bool,
 ) -> Result<Arc<dyn WindowExpr>> {
     Ok(match fun {
-        WindowFunctionDefinition::AggregateFunction(fun) => {
-            let aggregate = aggregates::create_aggregate_expr(
-                fun,
-                false,
-                args,
-                &[],
-                input_schema,
-                name,
-                ignore_nulls,
-            )?;
-            window_expr_from_aggregate_expr(
-                partition_by,
-                order_by,
-                window_frame,
-                aggregate,
-            )
-        }
         WindowFunctionDefinition::BuiltInWindowFunction(fun) => {
             Arc::new(BuiltInWindowExpr::new(
                 create_built_in_window_expr(fun, args, input_schema, name, ignore_nulls)?,
diff --git a/datafusion/proto/gen/src/main.rs b/datafusion/proto/gen/src/main.rs
index d38a41a01ac23..d3b3c92f60654 100644
--- a/datafusion/proto/gen/src/main.rs
+++ b/datafusion/proto/gen/src/main.rs
@@ -33,6 +33,7 @@ fn main() -> Result<(), String> {
         .file_descriptor_set_path(&descriptor_path)
         .out_dir(out_dir)
         .compile_well_known_types()
+        .protoc_arg("--experimental_allow_proto3_optional")
         .extern_path(".google.protobuf", "::pbjson_types")
         .compile_protos(&[proto_path], &["proto"])
         .map_err(|e| format!("protobuf compilation failed: {e}"))?;
@@ -52,7 +53,11 @@ fn main() -> Result<(), String> {
     let prost = proto_dir.join("src/datafusion.rs");
     let pbjson = proto_dir.join("src/datafusion.serde.rs");
     let common_path = proto_dir.join("src/datafusion_common.rs");
-
+    println!(
+        "Copying {} to {}",
+        prost.clone().display(),
+        proto_dir.join("src/generated/prost.rs").display()
+    );
     std::fs::copy(prost, proto_dir.join("src/generated/prost.rs")).unwrap();
     std::fs::copy(pbjson, proto_dir.join("src/generated/pbjson.rs")).unwrap();
     std::fs::copy(
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 4c90297263c45..819130b08e861 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -311,8 +311,6 @@ message LogicalExprNode {
     // binary expressions
     BinaryExprNode binary_expr = 4;
 
-    // aggregate expressions
-    AggregateExprNode aggregate_expr = 5;
 
     // null checks
     IsNull is_null_expr = 6;
@@ -466,51 +464,6 @@ message InListNode {
   bool negated = 3;
 }
 
-enum AggregateFunction {
-  MIN = 0;
-  MAX = 1;
-  // SUM = 2;
-  // AVG = 3;
-  // COUNT = 4;
-  // APPROX_DISTINCT = 5;
-  // ARRAY_AGG = 6;
-  // VARIANCE = 7;
-  // VARIANCE_POP = 8;
-  // COVARIANCE = 9;
-  // COVARIANCE_POP = 10;
-  // STDDEV = 11;
-  // STDDEV_POP = 12;
-  // CORRELATION = 13;
-  // APPROX_PERCENTILE_CONT = 14;
-  // APPROX_MEDIAN = 15;
-  // APPROX_PERCENTILE_CONT_WITH_WEIGHT = 16;
-  // GROUPING = 17;
-  // MEDIAN = 18;
-  // BIT_AND = 19;
-  // BIT_OR = 20;
-  // BIT_XOR = 21;
-  //  BOOL_AND = 22;
-  //  BOOL_OR = 23;
-  // REGR_SLOPE = 26;
-  // REGR_INTERCEPT = 27;
-  // REGR_COUNT = 28;
-  // REGR_R2 = 29;
-  // REGR_AVGX = 30;
-  // REGR_AVGY = 31;
-  // REGR_SXX = 32;
-  // REGR_SYY = 33;
-  // REGR_SXY = 34;
-  // STRING_AGG = 35;
-  // NTH_VALUE_AGG = 36;
-}
-
-message AggregateExprNode {
-  AggregateFunction aggr_function = 1;
-  repeated LogicalExprNode expr = 2;
-  bool distinct = 3;
-  LogicalExprNode filter = 4;
-  repeated LogicalExprNode order_by = 5;
-}
 
 message AggregateUDFExprNode {
   string fun_name = 1;
@@ -543,7 +496,6 @@ enum BuiltInWindowFunction {
 
 message WindowExprNode {
   oneof window_function {
-    AggregateFunction aggr_function = 1;
     BuiltInWindowFunction built_in_function = 2;
     string udaf = 3;
     string udwf = 9;
@@ -853,7 +805,6 @@ message PhysicalScalarUdfNode {
 
 message PhysicalAggregateExprNode {
   oneof AggregateFunction {
-    AggregateFunction aggr_function = 1;
     string user_defined_aggr_function = 4;
   }
   repeated PhysicalExprNode expr = 2;
@@ -865,7 +816,6 @@ message PhysicalAggregateExprNode {
 
 message PhysicalWindowExprNode {
   oneof window_function {
-    AggregateFunction aggr_function = 1;
     BuiltInWindowFunction built_in_function = 2;
     string user_defined_aggr_function = 3;
   }
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 163a4c044aeb5..521a0d90c1ed6 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -362,240 +362,6 @@ impl<'de> serde::Deserialize<'de> for AggregateExecNode {
         deserializer.deserialize_struct("datafusion.AggregateExecNode", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for AggregateExprNode {
-    #[allow(deprecated)]
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeStruct;
-        let mut len = 0;
-        if self.aggr_function != 0 {
-            len += 1;
-        }
-        if !self.expr.is_empty() {
-            len += 1;
-        }
-        if self.distinct {
-            len += 1;
-        }
-        if self.filter.is_some() {
-            len += 1;
-        }
-        if !self.order_by.is_empty() {
-            len += 1;
-        }
-        let mut struct_ser = serializer.serialize_struct("datafusion.AggregateExprNode", len)?;
-        if self.aggr_function != 0 {
-            let v = AggregateFunction::try_from(self.aggr_function)
-                .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", self.aggr_function)))?;
-            struct_ser.serialize_field("aggrFunction", &v)?;
-        }
-        if !self.expr.is_empty() {
-            struct_ser.serialize_field("expr", &self.expr)?;
-        }
-        if self.distinct {
-            struct_ser.serialize_field("distinct", &self.distinct)?;
-        }
-        if let Some(v) = self.filter.as_ref() {
-            struct_ser.serialize_field("filter", v)?;
-        }
-        if !self.order_by.is_empty() {
-            struct_ser.serialize_field("orderBy", &self.order_by)?;
-        }
-        struct_ser.end()
-    }
-}
-impl<'de> serde::Deserialize<'de> for AggregateExprNode {
-    #[allow(deprecated)]
-    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        const FIELDS: &[&str] = &[
-            "aggr_function",
-            "aggrFunction",
-            "expr",
-            "distinct",
-            "filter",
-            "order_by",
-            "orderBy",
-        ];
-
-        #[allow(clippy::enum_variant_names)]
-        enum GeneratedField {
-            AggrFunction,
-            Expr,
-            Distinct,
-            Filter,
-            OrderBy,
-        }
-        impl<'de> serde::Deserialize<'de> for GeneratedField {
-            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
-            where
-                D: serde::Deserializer<'de>,
-            {
-                struct GeneratedVisitor;
-
-                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-                    type Value = GeneratedField;
-
-                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                        write!(formatter, "expected one of: {:?}", &FIELDS)
-                    }
-
-                    #[allow(unused_variables)]
-                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
-                    where
-                        E: serde::de::Error,
-                    {
-                        match value {
-                            "aggrFunction" | "aggr_function" => Ok(GeneratedField::AggrFunction),
-                            "expr" => Ok(GeneratedField::Expr),
-                            "distinct" => Ok(GeneratedField::Distinct),
-                            "filter" => Ok(GeneratedField::Filter),
-                            "orderBy" | "order_by" => Ok(GeneratedField::OrderBy),
-                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
-                        }
-                    }
-                }
-                deserializer.deserialize_identifier(GeneratedVisitor)
-            }
-        }
-        struct GeneratedVisitor;
-        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = AggregateExprNode;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.AggregateExprNode")
-            }
-
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<AggregateExprNode, V::Error>
-                where
-                    V: serde::de::MapAccess<'de>,
-            {
-                let mut aggr_function__ = None;
-                let mut expr__ = None;
-                let mut distinct__ = None;
-                let mut filter__ = None;
-                let mut order_by__ = None;
-                while let Some(k) = map_.next_key()? {
-                    match k {
-                        GeneratedField::AggrFunction => {
-                            if aggr_function__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("aggrFunction"));
-                            }
-                            aggr_function__ = Some(map_.next_value::<AggregateFunction>()? as i32);
-                        }
-                        GeneratedField::Expr => {
-                            if expr__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("expr"));
-                            }
-                            expr__ = Some(map_.next_value()?);
-                        }
-                        GeneratedField::Distinct => {
-                            if distinct__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("distinct"));
-                            }
-                            distinct__ = Some(map_.next_value()?);
-                        }
-                        GeneratedField::Filter => {
-                            if filter__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("filter"));
-                            }
-                            filter__ = map_.next_value()?;
-                        }
-                        GeneratedField::OrderBy => {
-                            if order_by__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("orderBy"));
-                            }
-                            order_by__ = Some(map_.next_value()?);
-                        }
-                    }
-                }
-                Ok(AggregateExprNode {
-                    aggr_function: aggr_function__.unwrap_or_default(),
-                    expr: expr__.unwrap_or_default(),
-                    distinct: distinct__.unwrap_or_default(),
-                    filter: filter__,
-                    order_by: order_by__.unwrap_or_default(),
-                })
-            }
-        }
-        deserializer.deserialize_struct("datafusion.AggregateExprNode", FIELDS, GeneratedVisitor)
-    }
-}
-impl serde::Serialize for AggregateFunction {
-    #[allow(deprecated)]
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        let variant = match self {
-            Self::Min => "MIN",
-            Self::Max => "MAX",
-        };
-        serializer.serialize_str(variant)
-    }
-}
-impl<'de> serde::Deserialize<'de> for AggregateFunction {
-    #[allow(deprecated)]
-    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        const FIELDS: &[&str] = &[
-            "MIN",
-            "MAX",
-        ];
-
-        struct GeneratedVisitor;
-
-        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = AggregateFunction;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                write!(formatter, "expected one of: {:?}", &FIELDS)
-            }
-
-            fn visit_i64<E>(self, v: i64) -> std::result::Result<Self::Value, E>
-            where
-                E: serde::de::Error,
-            {
-                i32::try_from(v)
-                    .ok()
-                    .and_then(|x| x.try_into().ok())
-                    .ok_or_else(|| {
-                        serde::de::Error::invalid_value(serde::de::Unexpected::Signed(v), &self)
-                    })
-            }
-
-            fn visit_u64<E>(self, v: u64) -> std::result::Result<Self::Value, E>
-            where
-                E: serde::de::Error,
-            {
-                i32::try_from(v)
-                    .ok()
-                    .and_then(|x| x.try_into().ok())
-                    .ok_or_else(|| {
-                        serde::de::Error::invalid_value(serde::de::Unexpected::Unsigned(v), &self)
-                    })
-            }
-
-            fn visit_str<E>(self, value: &str) -> std::result::Result<Self::Value, E>
-            where
-                E: serde::de::Error,
-            {
-                match value {
-                    "MIN" => Ok(AggregateFunction::Min),
-                    "MAX" => Ok(AggregateFunction::Max),
-                    _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
-                }
-            }
-        }
-        deserializer.deserialize_any(GeneratedVisitor)
-    }
-}
 impl serde::Serialize for AggregateMode {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
@@ -9488,9 +9254,6 @@ impl serde::Serialize for LogicalExprNode {
                 logical_expr_node::ExprType::BinaryExpr(v) => {
                     struct_ser.serialize_field("binaryExpr", v)?;
                 }
-                logical_expr_node::ExprType::AggregateExpr(v) => {
-                    struct_ser.serialize_field("aggregateExpr", v)?;
-                }
                 logical_expr_node::ExprType::IsNullExpr(v) => {
                     struct_ser.serialize_field("isNullExpr", v)?;
                 }
@@ -9592,8 +9355,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode {
             "literal",
             "binary_expr",
             "binaryExpr",
-            "aggregate_expr",
-            "aggregateExpr",
             "is_null_expr",
             "isNullExpr",
             "is_not_null_expr",
@@ -9647,7 +9408,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode {
             Alias,
             Literal,
             BinaryExpr,
-            AggregateExpr,
             IsNullExpr,
             IsNotNullExpr,
             NotExpr,
@@ -9701,7 +9461,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode {
                             "alias" => Ok(GeneratedField::Alias),
                             "literal" => Ok(GeneratedField::Literal),
                             "binaryExpr" | "binary_expr" => Ok(GeneratedField::BinaryExpr),
-                            "aggregateExpr" | "aggregate_expr" => Ok(GeneratedField::AggregateExpr),
                             "isNullExpr" | "is_null_expr" => Ok(GeneratedField::IsNullExpr),
                             "isNotNullExpr" | "is_not_null_expr" => Ok(GeneratedField::IsNotNullExpr),
                             "notExpr" | "not_expr" => Ok(GeneratedField::NotExpr),
@@ -9778,13 +9537,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode {
                                 return Err(serde::de::Error::duplicate_field("binaryExpr"));
                             }
                             expr_type__ = map_.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::BinaryExpr)
-;
-                        }
-                        GeneratedField::AggregateExpr => {
-                            if expr_type__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("aggregateExpr"));
-                            }
-                            expr_type__ = map_.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::AggregateExpr)
 ;
                         }
                         GeneratedField::IsNullExpr => {
@@ -12708,11 +12460,6 @@ impl serde::Serialize for PhysicalAggregateExprNode {
         }
         if let Some(v) = self.aggregate_function.as_ref() {
             match v {
-                physical_aggregate_expr_node::AggregateFunction::AggrFunction(v) => {
-                    let v = AggregateFunction::try_from(*v)
-                        .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", *v)))?;
-                    struct_ser.serialize_field("aggrFunction", &v)?;
-                }
                 physical_aggregate_expr_node::AggregateFunction::UserDefinedAggrFunction(v) => {
                     struct_ser.serialize_field("userDefinedAggrFunction", v)?;
                 }
@@ -12736,8 +12483,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode {
             "ignoreNulls",
             "fun_definition",
             "funDefinition",
-            "aggr_function",
-            "aggrFunction",
             "user_defined_aggr_function",
             "userDefinedAggrFunction",
         ];
@@ -12749,7 +12494,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode {
             Distinct,
             IgnoreNulls,
             FunDefinition,
-            AggrFunction,
             UserDefinedAggrFunction,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
@@ -12777,7 +12521,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode {
                             "distinct" => Ok(GeneratedField::Distinct),
                             "ignoreNulls" | "ignore_nulls" => Ok(GeneratedField::IgnoreNulls),
                             "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
-                            "aggrFunction" | "aggr_function" => Ok(GeneratedField::AggrFunction),
                             "userDefinedAggrFunction" | "user_defined_aggr_function" => Ok(GeneratedField::UserDefinedAggrFunction),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
@@ -12838,12 +12581,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode {
                                 map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
                             ;
                         }
-                        GeneratedField::AggrFunction => {
-                            if aggregate_function__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("aggrFunction"));
-                            }
-                            aggregate_function__ = map_.next_value::<::std::option::Option<AggregateFunction>>()?.map(|x| physical_aggregate_expr_node::AggregateFunction::AggrFunction(x as i32));
-                        }
                         GeneratedField::UserDefinedAggrFunction => {
                             if aggregate_function__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("userDefinedAggrFunction"));
@@ -15948,11 +15685,6 @@ impl serde::Serialize for PhysicalWindowExprNode {
         }
         if let Some(v) = self.window_function.as_ref() {
             match v {
-                physical_window_expr_node::WindowFunction::AggrFunction(v) => {
-                    let v = AggregateFunction::try_from(*v)
-                        .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", *v)))?;
-                    struct_ser.serialize_field("aggrFunction", &v)?;
-                }
                 physical_window_expr_node::WindowFunction::BuiltInFunction(v) => {
                     let v = BuiltInWindowFunction::try_from(*v)
                         .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", *v)))?;
@@ -15983,8 +15715,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode {
             "name",
             "fun_definition",
             "funDefinition",
-            "aggr_function",
-            "aggrFunction",
             "built_in_function",
             "builtInFunction",
             "user_defined_aggr_function",
@@ -15999,7 +15729,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode {
             WindowFrame,
             Name,
             FunDefinition,
-            AggrFunction,
             BuiltInFunction,
             UserDefinedAggrFunction,
         }
@@ -16029,7 +15758,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode {
                             "windowFrame" | "window_frame" => Ok(GeneratedField::WindowFrame),
                             "name" => Ok(GeneratedField::Name),
                             "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
-                            "aggrFunction" | "aggr_function" => Ok(GeneratedField::AggrFunction),
                             "builtInFunction" | "built_in_function" => Ok(GeneratedField::BuiltInFunction),
                             "userDefinedAggrFunction" | "user_defined_aggr_function" => Ok(GeneratedField::UserDefinedAggrFunction),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
@@ -16098,12 +15826,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalWindowExprNode {
                                 map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
                             ;
                         }
-                        GeneratedField::AggrFunction => {
-                            if window_function__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("aggrFunction"));
-                            }
-                            window_function__ = map_.next_value::<::std::option::Option<AggregateFunction>>()?.map(|x| physical_window_expr_node::WindowFunction::AggrFunction(x as i32));
-                        }
                         GeneratedField::BuiltInFunction => {
                             if window_function__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("builtInFunction"));
@@ -20483,11 +20205,6 @@ impl serde::Serialize for WindowExprNode {
         }
         if let Some(v) = self.window_function.as_ref() {
             match v {
-                window_expr_node::WindowFunction::AggrFunction(v) => {
-                    let v = AggregateFunction::try_from(*v)
-                        .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", *v)))?;
-                    struct_ser.serialize_field("aggrFunction", &v)?;
-                }
                 window_expr_node::WindowFunction::BuiltInFunction(v) => {
                     let v = BuiltInWindowFunction::try_from(*v)
                         .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", *v)))?;
@@ -20520,8 +20237,6 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
             "windowFrame",
             "fun_definition",
             "funDefinition",
-            "aggr_function",
-            "aggrFunction",
             "built_in_function",
             "builtInFunction",
             "udaf",
@@ -20535,7 +20250,6 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
             OrderBy,
             WindowFrame,
             FunDefinition,
-            AggrFunction,
             BuiltInFunction,
             Udaf,
             Udwf,
@@ -20565,7 +20279,6 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                             "orderBy" | "order_by" => Ok(GeneratedField::OrderBy),
                             "windowFrame" | "window_frame" => Ok(GeneratedField::WindowFrame),
                             "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
-                            "aggrFunction" | "aggr_function" => Ok(GeneratedField::AggrFunction),
                             "builtInFunction" | "built_in_function" => Ok(GeneratedField::BuiltInFunction),
                             "udaf" => Ok(GeneratedField::Udaf),
                             "udwf" => Ok(GeneratedField::Udwf),
@@ -20628,12 +20341,6 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                                 map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
                             ;
                         }
-                        GeneratedField::AggrFunction => {
-                            if window_function__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("aggrFunction"));
-                            }
-                            window_function__ = map_.next_value::<::std::option::Option<AggregateFunction>>()?.map(|x| window_expr_node::WindowFunction::AggrFunction(x as i32));
-                        }
                         GeneratedField::BuiltInFunction => {
                             if window_function__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("builtInFunction"));
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 606fe3c1699fc..070c9b31d3d48 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -488,7 +488,7 @@ pub struct SubqueryAliasNode {
 pub struct LogicalExprNode {
     #[prost(
         oneof = "logical_expr_node::ExprType",
-        tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35"
+        tags = "1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35"
     )]
     pub expr_type: ::core::option::Option<logical_expr_node::ExprType>,
 }
@@ -508,9 +508,6 @@ pub mod logical_expr_node {
         /// binary expressions
         #[prost(message, tag = "4")]
         BinaryExpr(super::BinaryExprNode),
-        /// aggregate expressions
-        #[prost(message, tag = "5")]
-        AggregateExpr(::prost::alloc::boxed::Box<super::AggregateExprNode>),
         /// null checks
         #[prost(message, tag = "6")]
         IsNullExpr(::prost::alloc::boxed::Box<super::IsNull>),
@@ -733,20 +730,6 @@ pub struct InListNode {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct AggregateExprNode {
-    #[prost(enumeration = "AggregateFunction", tag = "1")]
-    pub aggr_function: i32,
-    #[prost(message, repeated, tag = "2")]
-    pub expr: ::prost::alloc::vec::Vec<LogicalExprNode>,
-    #[prost(bool, tag = "3")]
-    pub distinct: bool,
-    #[prost(message, optional, boxed, tag = "4")]
-    pub filter: ::core::option::Option<::prost::alloc::boxed::Box<LogicalExprNode>>,
-    #[prost(message, repeated, tag = "5")]
-    pub order_by: ::prost::alloc::vec::Vec<LogicalExprNode>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct AggregateUdfExprNode {
     #[prost(string, tag = "1")]
     pub fun_name: ::prost::alloc::string::String,
@@ -785,7 +768,7 @@ pub struct WindowExprNode {
     pub window_frame: ::core::option::Option<WindowFrame>,
     #[prost(bytes = "vec", optional, tag = "10")]
     pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
-    #[prost(oneof = "window_expr_node::WindowFunction", tags = "1, 2, 3, 9")]
+    #[prost(oneof = "window_expr_node::WindowFunction", tags = "2, 3, 9")]
     pub window_function: ::core::option::Option<window_expr_node::WindowFunction>,
 }
 /// Nested message and enum types in `WindowExprNode`.
@@ -793,8 +776,6 @@ pub mod window_expr_node {
     #[allow(clippy::derive_partial_eq_without_eq)]
     #[derive(Clone, PartialEq, ::prost::Oneof)]
     pub enum WindowFunction {
-        #[prost(enumeration = "super::AggregateFunction", tag = "1")]
-        AggrFunction(i32),
         #[prost(enumeration = "super::BuiltInWindowFunction", tag = "2")]
         BuiltInFunction(i32),
         #[prost(string, tag = "3")]
@@ -1301,7 +1282,7 @@ pub struct PhysicalAggregateExprNode {
     pub ignore_nulls: bool,
     #[prost(bytes = "vec", optional, tag = "7")]
     pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
-    #[prost(oneof = "physical_aggregate_expr_node::AggregateFunction", tags = "1, 4")]
+    #[prost(oneof = "physical_aggregate_expr_node::AggregateFunction", tags = "4")]
     pub aggregate_function: ::core::option::Option<
         physical_aggregate_expr_node::AggregateFunction,
     >,
@@ -1311,8 +1292,6 @@ pub mod physical_aggregate_expr_node {
     #[allow(clippy::derive_partial_eq_without_eq)]
     #[derive(Clone, PartialEq, ::prost::Oneof)]
     pub enum AggregateFunction {
-        #[prost(enumeration = "super::AggregateFunction", tag = "1")]
-        AggrFunction(i32),
         #[prost(string, tag = "4")]
         UserDefinedAggrFunction(::prost::alloc::string::String),
     }
@@ -1332,7 +1311,7 @@ pub struct PhysicalWindowExprNode {
     pub name: ::prost::alloc::string::String,
     #[prost(bytes = "vec", optional, tag = "9")]
     pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
-    #[prost(oneof = "physical_window_expr_node::WindowFunction", tags = "1, 2, 3")]
+    #[prost(oneof = "physical_window_expr_node::WindowFunction", tags = "2, 3")]
     pub window_function: ::core::option::Option<
         physical_window_expr_node::WindowFunction,
     >,
@@ -1342,8 +1321,6 @@ pub mod physical_window_expr_node {
     #[allow(clippy::derive_partial_eq_without_eq)]
     #[derive(Clone, PartialEq, ::prost::Oneof)]
     pub enum WindowFunction {
-        #[prost(enumeration = "super::AggregateFunction", tag = "1")]
-        AggrFunction(i32),
         #[prost(enumeration = "super::BuiltInWindowFunction", tag = "2")]
         BuiltInFunction(i32),
         #[prost(string, tag = "3")]
@@ -1941,65 +1918,6 @@ pub struct PartitionStats {
 }
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
 #[repr(i32)]
-pub enum AggregateFunction {
-    Min = 0,
-    /// SUM = 2;
-    /// AVG = 3;
-    /// COUNT = 4;
-    /// APPROX_DISTINCT = 5;
-    /// ARRAY_AGG = 6;
-    /// VARIANCE = 7;
-    /// VARIANCE_POP = 8;
-    /// COVARIANCE = 9;
-    /// COVARIANCE_POP = 10;
-    /// STDDEV = 11;
-    /// STDDEV_POP = 12;
-    /// CORRELATION = 13;
-    /// APPROX_PERCENTILE_CONT = 14;
-    /// APPROX_MEDIAN = 15;
-    /// APPROX_PERCENTILE_CONT_WITH_WEIGHT = 16;
-    /// GROUPING = 17;
-    /// MEDIAN = 18;
-    /// BIT_AND = 19;
-    /// BIT_OR = 20;
-    /// BIT_XOR = 21;
-    ///   BOOL_AND = 22;
-    ///   BOOL_OR = 23;
-    /// REGR_SLOPE = 26;
-    /// REGR_INTERCEPT = 27;
-    /// REGR_COUNT = 28;
-    /// REGR_R2 = 29;
-    /// REGR_AVGX = 30;
-    /// REGR_AVGY = 31;
-    /// REGR_SXX = 32;
-    /// REGR_SYY = 33;
-    /// REGR_SXY = 34;
-    /// STRING_AGG = 35;
-    /// NTH_VALUE_AGG = 36;
-    Max = 1,
-}
-impl AggregateFunction {
-    /// String value of the enum field names used in the ProtoBuf definition.
-    ///
-    /// The values are not transformed in any way and thus are considered stable
-    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
-    pub fn as_str_name(&self) -> &'static str {
-        match self {
-            AggregateFunction::Min => "MIN",
-            AggregateFunction::Max => "MAX",
-        }
-    }
-    /// Creates an enum from field names used in the ProtoBuf definition.
-    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
-        match value {
-            "MIN" => Some(Self::Min),
-            "MAX" => Some(Self::Max),
-            _ => None,
-        }
-    }
-}
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
-#[repr(i32)]
 pub enum BuiltInWindowFunction {
     RowNumber = 0,
     Rank = 1,
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 5e9b9af49ae9c..6c4c07428bd3b 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -22,11 +22,13 @@ use datafusion_common::{
     exec_datafusion_err, internal_err, plan_datafusion_err, Result, ScalarValue,
     TableReference, UnnestOptions,
 };
+use datafusion_expr::expr::Unnest;
+use datafusion_expr::expr::{Alias, Placeholder};
+use datafusion_expr::ExprFunctionExt;
 use datafusion_expr::{
-    expr::{self, Alias, InList, Placeholder, Sort, Unnest, WindowFunction},
+    expr::{self, InList, Sort, WindowFunction},
     logical_plan::{PlanType, StringifiedPlan},
-    AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, Case, Cast, Expr,
-    ExprFunctionExt, GroupingSet,
+    Between, BinaryExpr, BuiltInWindowFunction, Case, Cast, Expr, GroupingSet,
     GroupingSet::GroupingSets,
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
     WindowFrameUnits,
@@ -136,15 +138,6 @@ impl From<&protobuf::StringifiedPlan> for StringifiedPlan {
     }
 }
 
-impl From<protobuf::AggregateFunction> for AggregateFunction {
-    fn from(agg_fun: protobuf::AggregateFunction) -> Self {
-        match agg_fun {
-            protobuf::AggregateFunction::Min => Self::Min,
-            protobuf::AggregateFunction::Max => Self::Max,
-        }
-    }
-}
-
 impl From<protobuf::BuiltInWindowFunction> for BuiltInWindowFunction {
     fn from(built_in_function: protobuf::BuiltInWindowFunction) -> Self {
         match built_in_function {
@@ -231,12 +224,6 @@ impl From<protobuf::JoinConstraint> for JoinConstraint {
     }
 }
 
-pub fn parse_i32_to_aggregate_function(value: &i32) -> Result<AggregateFunction, Error> {
-    protobuf::AggregateFunction::try_from(*value)
-        .map(|a| a.into())
-        .map_err(|_| Error::unknown("AggregateFunction", *value))
-}
-
 pub fn parse_expr(
     proto: &protobuf::LogicalExprNode,
     registry: &dyn FunctionRegistry,
@@ -297,24 +284,6 @@ pub fn parse_expr(
 
             // TODO: support proto for null treatment
             match window_function {
-                window_expr_node::WindowFunction::AggrFunction(i) => {
-                    let aggr_function = parse_i32_to_aggregate_function(i)?;
-
-                    Expr::WindowFunction(WindowFunction::new(
-                        expr::WindowFunctionDefinition::AggregateFunction(aggr_function),
-                        vec![parse_required_expr(
-                            expr.expr.as_deref(),
-                            registry,
-                            "expr",
-                            codec,
-                        )?],
-                    ))
-                    .partition_by(partition_by)
-                    .order_by(order_by)
-                    .window_frame(window_frame)
-                    .build()
-                    .map_err(Error::DataFusionError)
-                }
                 window_expr_node::WindowFunction::BuiltInFunction(i) => {
                     let built_in_function = protobuf::BuiltInWindowFunction::try_from(*i)
                         .map_err(|_| Error::unknown("BuiltInWindowFunction", *i))?
@@ -379,19 +348,6 @@ pub fn parse_expr(
                 }
             }
         }
-        ExprType::AggregateExpr(expr) => {
-            let fun = parse_i32_to_aggregate_function(&expr.aggr_function)?;
-
-            Ok(Expr::AggregateFunction(expr::AggregateFunction::new(
-                fun,
-                parse_exprs(&expr.expr, registry, codec)?,
-                expr.distinct,
-                parse_optional_expr(expr.filter.as_deref(), registry, codec)?
-                    .map(Box::new),
-                parse_vec_expr(&expr.order_by, registry, codec)?,
-                None,
-            )))
-        }
         ExprType::Alias(alias) => Ok(Expr::Alias(Alias::new(
             parse_required_expr(alias.expr.as_deref(), registry, "expr", codec)?,
             alias
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index c2441892e8a89..74d9d61b3a7f3 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -25,9 +25,9 @@ use datafusion_expr::expr::{
     InList, Like, Placeholder, ScalarFunction, Sort, Unnest,
 };
 use datafusion_expr::{
-    logical_plan::PlanType, logical_plan::StringifiedPlan, AggregateFunction,
-    BuiltInWindowFunction, Expr, JoinConstraint, JoinType, TryCast, WindowFrame,
-    WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
+    logical_plan::PlanType, logical_plan::StringifiedPlan, BuiltInWindowFunction, Expr,
+    JoinConstraint, JoinType, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits,
+    WindowFunctionDefinition,
 };
 
 use crate::protobuf::{
@@ -111,15 +111,6 @@ impl From<&StringifiedPlan> for protobuf::StringifiedPlan {
     }
 }
 
-impl From<&AggregateFunction> for protobuf::AggregateFunction {
-    fn from(value: &AggregateFunction) -> Self {
-        match value {
-            AggregateFunction::Min => Self::Min,
-            AggregateFunction::Max => Self::Max,
-        }
-    }
-}
-
 impl From<&BuiltInWindowFunction> for protobuf::BuiltInWindowFunction {
     fn from(value: &BuiltInWindowFunction) -> Self {
         match value {
@@ -319,12 +310,6 @@ pub fn serialize_expr(
             null_treatment: _,
         }) => {
             let (window_function, fun_definition) = match fun {
-                WindowFunctionDefinition::AggregateFunction(fun) => (
-                    protobuf::window_expr_node::WindowFunction::AggrFunction(
-                        protobuf::AggregateFunction::from(fun).into(),
-                    ),
-                    None,
-                ),
                 WindowFunctionDefinition::BuiltInWindowFunction(fun) => (
                     protobuf::window_expr_node::WindowFunction::BuiltInFunction(
                         protobuf::BuiltInWindowFunction::from(fun).into(),
@@ -383,29 +368,6 @@ pub fn serialize_expr(
             ref order_by,
             null_treatment: _,
         }) => match func_def {
-            AggregateFunctionDefinition::BuiltIn(fun) => {
-                let aggr_function = match fun {
-                    AggregateFunction::Min => protobuf::AggregateFunction::Min,
-                    AggregateFunction::Max => protobuf::AggregateFunction::Max,
-                };
-
-                let aggregate_expr = protobuf::AggregateExprNode {
-                    aggr_function: aggr_function.into(),
-                    expr: serialize_exprs(args, codec)?,
-                    distinct: *distinct,
-                    filter: match filter {
-                        Some(e) => Some(Box::new(serialize_expr(e, codec)?)),
-                        None => None,
-                    },
-                    order_by: match order_by {
-                        Some(e) => serialize_exprs(e, codec)?,
-                        None => vec![],
-                    },
-                };
-                protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::AggregateExpr(Box::new(aggregate_expr))),
-                }
-            }
             AggregateFunctionDefinition::UDF(fun) => {
                 let mut buf = Vec::new();
                 let _ = codec.try_encode_udaf(fun, &mut buf);
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index 5ecca51478053..bc0a19336bae4 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -145,15 +145,6 @@ pub fn parse_physical_window_expr(
 
     let fun = if let Some(window_func) = proto.window_function.as_ref() {
         match window_func {
-            protobuf::physical_window_expr_node::WindowFunction::AggrFunction(n) => {
-                let f = protobuf::AggregateFunction::try_from(*n).map_err(|_| {
-                    proto_error(format!(
-                        "Received an unknown window aggregate function: {n}"
-                    ))
-                })?;
-
-                WindowFunctionDefinition::AggregateFunction(f.into())
-            }
             protobuf::physical_window_expr_node::WindowFunction::BuiltInFunction(n) => {
                 let f = protobuf::BuiltInWindowFunction::try_from(*n).map_err(|_| {
                     proto_error(format!(
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 1f433ff01d128..fbb9e442980b1 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -35,7 +35,7 @@ use datafusion::datasource::physical_plan::{AvroExec, CsvExec};
 use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::execution::FunctionRegistry;
 use datafusion::physical_expr::{PhysicalExprRef, PhysicalSortRequirement};
-use datafusion::physical_plan::aggregates::{create_aggregate_expr, AggregateMode};
+use datafusion::physical_plan::aggregates::AggregateMode;
 use datafusion::physical_plan::aggregates::{AggregateExec, PhysicalGroupBy};
 use datafusion::physical_plan::analyze::AnalyzeExec;
 use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
@@ -477,30 +477,10 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                             ExprType::AggregateExpr(agg_node) => {
                                 let input_phy_expr: Vec<Arc<dyn PhysicalExpr>> = agg_node.expr.iter()
                                     .map(|e| parse_physical_expr(e, registry, &physical_schema, extension_codec)).collect::<Result<Vec<_>>>()?;
-                                let ordering_req: Vec<PhysicalSortExpr> = agg_node.ordering_req.iter()
+                                let _ordering_req: Vec<PhysicalSortExpr> = agg_node.ordering_req.iter()
                                     .map(|e| parse_physical_sort_expr(e, registry, &physical_schema, extension_codec)).collect::<Result<Vec<_>>>()?;
                                 agg_node.aggregate_function.as_ref().map(|func| {
                                     match func {
-                                        AggregateFunction::AggrFunction(i) => {
-                                            let aggr_function = protobuf::AggregateFunction::try_from(*i)
-                                                .map_err(
-                                                    |_| {
-                                                        proto_error(format!(
-                                                            "Received an unknown aggregate function: {i}"
-                                                        ))
-                                                    },
-                                                )?;
-
-                                            create_aggregate_expr(
-                                                &aggr_function.into(),
-                                                agg_node.distinct,
-                                                input_phy_expr.as_slice(),
-                                                &ordering_req,
-                                                &physical_schema,
-                                                name.to_string(),
-                                                agg_node.ignore_nulls,
-                                            )
-                                        }
                                         AggregateFunction::UserDefinedAggrFunction(udaf_name) => {
                                             let agg_udf = match &agg_node.fun_definition {
                                                 Some(buf) => extension_codec.try_decode_udaf(udaf_name, buf)?,
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 140482b9903ca..57cd22a99ae1b 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -24,8 +24,8 @@ use datafusion::physical_expr::window::{NthValueKind, SlidingAggregateWindowExpr
 use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr};
 use datafusion::physical_plan::expressions::{
     BinaryExpr, CaseExpr, CastExpr, Column, CumeDist, InListExpr, IsNotNullExpr,
-    IsNullExpr, Literal, Max, Min, NegativeExpr, NotExpr, NthValue, Ntile, Rank,
-    RankType, RowNumber, TryCastExpr, WindowShift,
+    IsNullExpr, Literal, NegativeExpr, NotExpr, NthValue, Ntile, Rank, RankType,
+    RowNumber, TryCastExpr, WindowShift,
 };
 use datafusion::physical_plan::udaf::AggregateFunctionExpr;
 use datafusion::physical_plan::windows::{BuiltInWindowExpr, PlainAggregateWindowExpr};
@@ -60,7 +60,7 @@ pub fn serialize_physical_aggr_expr(
         let name = a.fun().name().to_string();
         let mut buf = Vec::new();
         codec.try_encode_udaf(a.fun(), &mut buf)?;
-        return Ok(protobuf::PhysicalExprNode {
+        Ok(protobuf::PhysicalExprNode {
             expr_type: Some(protobuf::physical_expr_node::ExprType::AggregateExpr(
                 protobuf::PhysicalAggregateExprNode {
                     aggregate_function: Some(physical_aggregate_expr_node::AggregateFunction::UserDefinedAggrFunction(name)),
@@ -71,35 +71,15 @@ pub fn serialize_physical_aggr_expr(
                     fun_definition: (!buf.is_empty()).then_some(buf)
                 },
             )),
-        });
+        })
+    } else {
+        unreachable!("No other types exists besides AggergationFunctionExpr");
     }
-
-    let AggrFn {
-        inner: aggr_function,
-        distinct,
-    } = aggr_expr_to_aggr_fn(aggr_expr.as_ref())?;
-
-    Ok(protobuf::PhysicalExprNode {
-        expr_type: Some(protobuf::physical_expr_node::ExprType::AggregateExpr(
-            protobuf::PhysicalAggregateExprNode {
-                aggregate_function: Some(
-                    physical_aggregate_expr_node::AggregateFunction::AggrFunction(
-                        aggr_function as i32,
-                    ),
-                ),
-                expr: expressions,
-                ordering_req,
-                distinct,
-                ignore_nulls: false,
-                fun_definition: None,
-            },
-        )),
-    })
 }
 
 fn serialize_physical_window_aggr_expr(
     aggr_expr: &dyn AggregateExpr,
-    window_frame: &WindowFrame,
+    _window_frame: &WindowFrame,
     codec: &dyn PhysicalExtensionCodec,
 ) -> Result<(physical_window_expr_node::WindowFunction, Option<Vec<u8>>)> {
     if let Some(a) = aggr_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
@@ -119,23 +99,7 @@ fn serialize_physical_window_aggr_expr(
             (!buf.is_empty()).then_some(buf),
         ))
     } else {
-        let AggrFn { inner, distinct } = aggr_expr_to_aggr_fn(aggr_expr)?;
-        if distinct {
-            return not_impl_err!(
-                "Distinct aggregate functions not supported in window expressions"
-            );
-        }
-
-        if !window_frame.start_bound.is_unbounded() {
-            return Err(DataFusionError::Internal(format!(
-                "Unbounded start bound in WindowFrame = {window_frame}"
-            )));
-        }
-
-        Ok((
-            physical_window_expr_node::WindowFunction::AggrFunction(inner as i32),
-            None,
-        ))
+        unreachable!("No other types exists besides AggergationFunctionExpr");
     }
 }
 
@@ -252,29 +216,6 @@ pub fn serialize_physical_window_expr(
     })
 }
 
-struct AggrFn {
-    inner: protobuf::AggregateFunction,
-    distinct: bool,
-}
-
-fn aggr_expr_to_aggr_fn(expr: &dyn AggregateExpr) -> Result<AggrFn> {
-    let aggr_expr = expr.as_any();
-
-    // TODO: remove Min and Max
-    let inner = if aggr_expr.downcast_ref::<Min>().is_some() {
-        protobuf::AggregateFunction::Min
-    } else if aggr_expr.downcast_ref::<Max>().is_some() {
-        protobuf::AggregateFunction::Max
-    } else {
-        return not_impl_err!("Aggregate function not supported: {expr:?}");
-    };
-
-    Ok(AggrFn {
-        inner,
-        distinct: false,
-    })
-}
-
 pub fn serialize_physical_sort_exprs<I>(
     sort_exprs: I,
     codec: &dyn PhysicalExtensionCodec,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index f7ad2b9b6158b..d150c474e88fc 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -42,9 +42,10 @@ use datafusion::execution::FunctionRegistry;
 use datafusion::functions_aggregate::count::count_udaf;
 use datafusion::functions_aggregate::expr_fn::{
     approx_median, approx_percentile_cont, approx_percentile_cont_with_weight, count,
-    count_distinct, covar_pop, covar_samp, first_value, grouping, median, stddev,
-    stddev_pop, sum, var_pop, var_sample,
+    count_distinct, covar_pop, covar_samp, first_value, grouping, max, median, min,
+    stddev, stddev_pop, sum, var_pop, var_sample,
 };
+use datafusion::functions_aggregate::min_max::max_udaf;
 use datafusion::functions_nested::map::map;
 use datafusion::prelude::*;
 use datafusion::test_util::{TestTableFactory, TestTableProvider};
@@ -61,10 +62,10 @@ use datafusion_expr::expr::{
 };
 use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore};
 use datafusion_expr::{
-    Accumulator, AggregateFunction, AggregateUDF, ColumnarValue, ExprFunctionExt,
-    ExprSchemable, Literal, LogicalPlan, Operator, PartitionEvaluator, ScalarUDF,
-    Signature, TryCast, Volatility, WindowFrame, WindowFrameBound, WindowFrameUnits,
-    WindowFunctionDefinition, WindowUDF, WindowUDFImpl,
+    Accumulator, AggregateUDF, ColumnarValue, ExprFunctionExt, ExprSchemable, Literal,
+    LogicalPlan, Operator, PartitionEvaluator, ScalarUDF, Signature, TryCast, Volatility,
+    WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, WindowUDF,
+    WindowUDFImpl,
 };
 use datafusion_functions_aggregate::average::avg_udaf;
 use datafusion_functions_aggregate::expr_fn::{
@@ -875,7 +876,9 @@ async fn roundtrip_expr_api() -> Result<()> {
         covar_pop(lit(1.5), lit(2.2)),
         corr(lit(1.5), lit(2.2)),
         sum(lit(1)),
+        max(lit(1)),
         median(lit(2)),
+        min(lit(2)),
         var_sample(lit(2.2)),
         var_pop(lit(2.2)),
         stddev(lit(2.2)),
@@ -2284,7 +2287,7 @@ fn roundtrip_window() {
     );
 
     let test_expr4 = Expr::WindowFunction(expr::WindowFunction::new(
-        WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
+        WindowFunctionDefinition::AggregateUDF(max_udaf()),
         vec![col("col1")],
     ))
     .partition_by(vec![col("col1")])
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 3ddc122e3de2a..0e2bc9cbb3e22 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -25,8 +25,10 @@ use std::vec;
 use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
 use datafusion::physical_expr_common::aggregate::AggregateExprBuilder;
+use datafusion_functions_aggregate::min_max::max_udaf;
 use prost::Message;
 
+use crate::cases::{MyAggregateUDF, MyAggregateUdfNode, MyRegexUdf, MyRegexUdfNode};
 use datafusion::arrow::array::ArrayRef;
 use datafusion::arrow::compute::kernels::sort::SortOptions;
 use datafusion::arrow::datatypes::{DataType, Field, IntervalUnit, Schema};
@@ -43,7 +45,7 @@ use datafusion::execution::FunctionRegistry;
 use datafusion::functions_aggregate::sum::sum_udaf;
 use datafusion::logical_expr::{create_udf, JoinType, Operator, Volatility};
 use datafusion::physical_expr::aggregate::utils::down_cast_any_ref;
-use datafusion::physical_expr::expressions::{Literal, Max};
+use datafusion::physical_expr::expressions::Literal;
 use datafusion::physical_expr::window::SlidingAggregateWindowExpr;
 use datafusion::physical_expr::{PhysicalSortRequirement, ScalarFunctionExpr};
 use datafusion::physical_plan::aggregates::{
@@ -92,8 +94,6 @@ use datafusion_proto::physical_plan::{
 };
 use datafusion_proto::protobuf;
 
-use crate::cases::{MyAggregateUDF, MyAggregateUdfNode, MyRegexUdf, MyRegexUdfNode};
-
 /// Perform a serde roundtrip and assert that the string representation of the before and after plans
 /// are identical. Note that this often isn't sufficient to guarantee that no information is
 /// lost during serde because the string representation of a plan often only shows a subset of state.
@@ -909,11 +909,18 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
         )),
         input,
     )?);
+    let aggr_expr = AggregateExprBuilder::new(
+        max_udaf(),
+        vec![udf_expr.clone() as Arc<dyn PhysicalExpr>],
+    )
+    .schema(schema.clone())
+    .name("max")
+    .build()?;
 
     let window = Arc::new(WindowAggExec::try_new(
         vec![Arc::new(PlainAggregateWindowExpr::new(
-            Arc::new(Max::new(udf_expr.clone(), "max", DataType::Int64)),
-            &[col("author", &schema)?],
+            aggr_expr.clone(),
+            &[col("author", &schema.clone())?],
             &[],
             Arc::new(WindowFrame::new(None)),
         ))],
@@ -924,7 +931,7 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
     let aggregate = Arc::new(AggregateExec::try_new(
         AggregateMode::Final,
         PhysicalGroupBy::new(vec![], vec![], vec![]),
-        vec![Arc::new(Max::new(udf_expr, "max", DataType::Int64))],
+        vec![aggr_expr.clone()],
         vec![None],
         window,
         schema.clone(),
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index 2506ef740fded..d16d08b041ae0 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -15,8 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::str::FromStr;
-
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
 use arrow_schema::DataType;
@@ -26,8 +24,7 @@ use datafusion_common::{
 };
 use datafusion_expr::planner::PlannerResult;
 use datafusion_expr::{
-    expr, AggregateFunction, Expr, ExprFunctionExt, ExprSchemable, WindowFrame,
-    WindowFunctionDefinition,
+    expr, Expr, ExprFunctionExt, ExprSchemable, WindowFrame, WindowFunctionDefinition,
 };
 use datafusion_expr::{
     expr::{ScalarFunction, Unnest},
@@ -38,7 +35,6 @@ use sqlparser::ast::{
     FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments,
     NullTreatment, ObjectName, OrderByExpr, WindowType,
 };
-
 use strum::IntoEnumIterator;
 
 /// Suggest a valid function based on an invalid input function name
@@ -51,7 +47,6 @@ pub fn suggest_valid_function(
         // All aggregate functions and builtin window functions
         let mut funcs = Vec::new();
 
-        funcs.extend(AggregateFunction::iter().map(|func| func.to_string()));
         funcs.extend(ctx.udaf_names());
         funcs.extend(BuiltInWindowFunction::iter().map(|func| func.to_string()));
         funcs.extend(ctx.udwf_names());
@@ -62,7 +57,6 @@ pub fn suggest_valid_function(
         let mut funcs = Vec::new();
 
         funcs.extend(ctx.udf_names());
-        funcs.extend(AggregateFunction::iter().map(|func| func.to_string()));
         funcs.extend(ctx.udaf_names());
 
         funcs
@@ -324,31 +318,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             };
 
             if let Ok(fun) = self.find_window_func(&name) {
-                return match fun {
-                    WindowFunctionDefinition::AggregateFunction(aggregate_fun) => {
-                        let args =
-                            self.function_args_to_expr(args, schema, planner_context)?;
-
-                        Expr::WindowFunction(expr::WindowFunction::new(
-                            WindowFunctionDefinition::AggregateFunction(aggregate_fun),
-                            args,
-                        ))
-                        .partition_by(partition_by)
-                        .order_by(order_by)
-                        .window_frame(window_frame)
-                        .null_treatment(null_treatment)
-                        .build()
-                    }
-                    _ => Expr::WindowFunction(expr::WindowFunction::new(
-                        fun,
-                        self.function_args_to_expr(args, schema, planner_context)?,
-                    ))
-                    .partition_by(partition_by)
-                    .order_by(order_by)
-                    .window_frame(window_frame)
-                    .null_treatment(null_treatment)
-                    .build(),
-                };
+                return Expr::WindowFunction(expr::WindowFunction::new(
+                    fun,
+                    self.function_args_to_expr(args, schema, planner_context)?,
+                ))
+                .partition_by(partition_by)
+                .order_by(order_by)
+                .window_frame(window_frame)
+                .null_treatment(null_treatment)
+                .build();
             }
         } else {
             // User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function
@@ -375,32 +353,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     null_treatment,
                 )));
             }
-
-            // next, aggregate built-ins
-            if let Ok(fun) = AggregateFunction::from_str(&name) {
-                let order_by = self.order_by_to_sort_expr(
-                    order_by,
-                    schema,
-                    planner_context,
-                    true,
-                    None,
-                )?;
-                let order_by = (!order_by.is_empty()).then_some(order_by);
-                let args = self.function_args_to_expr(args, schema, planner_context)?;
-                let filter: Option<Box<Expr>> = filter
-                    .map(|e| self.sql_expr_to_logical_expr(*e, schema, planner_context))
-                    .transpose()?
-                    .map(Box::new);
-
-                return Ok(Expr::AggregateFunction(expr::AggregateFunction::new(
-                    fun,
-                    args,
-                    distinct,
-                    filter,
-                    order_by,
-                    null_treatment,
-                )));
-            };
         }
 
         // Could not find the relevant function, so return an error
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index d1ac7a0c96d1e..bae3ec2e2779e 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -20,7 +20,7 @@ use std::vec;
 
 use arrow_schema::*;
 use datafusion_common::{DFSchema, Result, TableReference};
-use datafusion_expr::test::function_stub::{count_udaf, sum_udaf};
+use datafusion_expr::test::function_stub::{count_udaf, max_udaf, min_udaf, sum_udaf};
 use datafusion_expr::{col, table_scan};
 use datafusion_sql::planner::{ContextProvider, PlannerContext, SqlToRel};
 use datafusion_sql::unparser::dialect::{
@@ -381,7 +381,9 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
             .parse_statement()?;
 
         let context = MockContextProvider::default()
-            .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
+            .with_expr_planner(Arc::new(CoreFunctionPlanner::default()))
+            .with_udaf(max_udaf())
+            .with_udaf(min_udaf());
         let sql_to_rel = SqlToRel::new(&context);
         let plan = sql_to_rel
             .sql_statement_to_plan(statement)
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 40a58827b3886..c1b2246e4980f 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -42,7 +42,8 @@ use datafusion_sql::{
 
 use datafusion_functions::core::planner::CoreFunctionPlanner;
 use datafusion_functions_aggregate::{
-    approx_median::approx_median_udaf, count::count_udaf,
+    approx_median::approx_median_udaf, count::count_udaf, min_max::max_udaf,
+    min_max::min_udaf,
 };
 use datafusion_functions_aggregate::{average::avg_udaf, grouping::grouping_udaf};
 use rstest::rstest;
@@ -2764,6 +2765,8 @@ fn logical_plan_with_dialect_and_options(
         .with_udaf(approx_median_udaf())
         .with_udaf(count_udaf())
         .with_udaf(avg_udaf())
+        .with_udaf(min_udaf())
+        .with_udaf(max_udaf())
         .with_udaf(grouping_udaf())
         .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
 
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index eebadb239d564..89f2efec66aa8 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -30,8 +30,8 @@ use datafusion::execution::FunctionRegistry;
 use datafusion::logical_expr::expr::{Exists, InSubquery, Sort};
 
 use datafusion::logical_expr::{
-    aggregate_function, expr::find_df_window_func, Aggregate, BinaryExpr, Case,
-    EmptyRelation, Expr, ExprSchemable, LogicalPlan, Operator, Projection, Values,
+    expr::find_df_window_func, Aggregate, BinaryExpr, Case, EmptyRelation, Expr,
+    ExprSchemable, LogicalPlan, Operator, Projection, Values,
 };
 use substrait::proto::expression::subquery::set_predicate::PredicateOp;
 use url::Url;
@@ -67,7 +67,6 @@ use datafusion::{
     scalar::ScalarValue,
 };
 use std::collections::{HashMap, HashSet};
-use std::str::FromStr;
 use std::sync::Arc;
 use substrait::proto::exchange_rel::ExchangeKind;
 use substrait::proto::expression::literal::user_defined::Val;
@@ -1005,11 +1004,6 @@ pub async fn from_substrait_agg_func(
         Ok(Arc::new(Expr::AggregateFunction(
             expr::AggregateFunction::new_udf(fun, args, distinct, filter, order_by, None),
         )))
-    } else if let Ok(fun) = aggregate_function::AggregateFunction::from_str(function_name)
-    {
-        Ok(Arc::new(Expr::AggregateFunction(
-            expr::AggregateFunction::new(fun, args, distinct, filter, order_by, None),
-        )))
     } else {
         not_impl_err!(
             "Aggregate function {} is not supported: function anchor = {:?}",
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index 8263209ffccc7..bd6e0e00491a1 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -48,7 +48,6 @@ use datafusion::common::{
 };
 use datafusion::common::{substrait_err, DFSchemaRef};
 #[allow(unused_imports)]
-use datafusion::logical_expr::aggregate_function;
 use datafusion::logical_expr::expr::{
     AggregateFunctionDefinition, Alias, BinaryExpr, Case, Cast, GroupingSet, InList,
     InSubquery, Sort, WindowFunction,
@@ -767,37 +766,6 @@ pub fn to_substrait_agg_measure(
     match expr {
         Expr::AggregateFunction(expr::AggregateFunction { func_def, args, distinct, filter, order_by, null_treatment: _, }) => {
             match func_def {
-                AggregateFunctionDefinition::BuiltIn (fun) => {
-                    let sorts = if let Some(order_by) = order_by {
-                        order_by.iter().map(|expr| to_substrait_sort_field(ctx, expr, schema, extensions)).collect::<Result<Vec<_>>>()?
-                    } else {
-                        vec![]
-                    };
-                    let mut arguments: Vec<FunctionArgument> = vec![];
-                    for arg in args {
-                        arguments.push(FunctionArgument { arg_type: Some(ArgType::Value(to_substrait_rex(ctx, arg, schema, 0, extensions)?)) });
-                    }
-                    let function_anchor = extensions.register_function(fun.to_string());
-                    Ok(Measure {
-                        measure: Some(AggregateFunction {
-                            function_reference: function_anchor,
-                            arguments,
-                            sorts,
-                            output_type: None,
-                            invocation: match distinct {
-                                true => AggregationInvocation::Distinct as i32,
-                                false => AggregationInvocation::All as i32,
-                            },
-                            phase: AggregationPhase::Unspecified as i32,
-                            args: vec![],
-                            options: vec![],
-                        }),
-                        filter: match filter {
-                            Some(f) => Some(to_substrait_rex(ctx, f, schema, 0, extensions)?),
-                            None => None
-                        }
-                    })
-                }
                 AggregateFunctionDefinition::UDF(fun) => {
                     let sorts = if let Some(order_by) = order_by {
                         order_by.iter().map(|expr| to_substrait_sort_field(ctx, expr, schema, extensions)).collect::<Result<Vec<_>>>()?
diff --git a/docs/source/user-guide/dataframe.md b/docs/source/user-guide/dataframe.md
index c3d0b6c2d6881..96be1bb9e2568 100644
--- a/docs/source/user-guide/dataframe.md
+++ b/docs/source/user-guide/dataframe.md
@@ -53,6 +53,7 @@ Here is a minimal example showing the execution of a query using the DataFrame A
 ```rust
 use datafusion::prelude::*;
 use datafusion::error::Result;
+use datafusion::functions_aggregate::expr_fn::min;
 
 #[tokio::main]
 async fn main() -> Result<()> {
diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md
index 813dbb1bc02ae..6108315f398aa 100644
--- a/docs/source/user-guide/example-usage.md
+++ b/docs/source/user-guide/example-usage.md
@@ -60,6 +60,7 @@ library guide for more information on the SQL API.
 
 ```rust
 use datafusion::prelude::*;
+use datafusion::functions_aggregate::expr_fn::min;
 
 #[tokio::main]
 async fn main() -> datafusion::error::Result<()> {
@@ -148,6 +149,7 @@ async fn main() -> datafusion::error::Result<()> {
 
 ```rust
 use datafusion::prelude::*;
+use datafusion::functions_aggregate::expr_fn::min;
 
 #[tokio::main]
 async fn main() -> datafusion::error::Result<()> {

From a4d41d6a661753f24de0913228f02898ebabc201 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Sun, 4 Aug 2024 19:40:39 +0800
Subject: [PATCH 209/357] Support `LogicalPlan` `Debug` differently than
 `Display` (#11774)

* Derive Debug for logical plan nodes

* Improve LogicalPlan debug printing

* Fix tests

* Fix tests

* Fix tests
---
 benchmarks/src/tpch/run.rs                    |  4 +-
 datafusion/core/src/dataframe/mod.rs          | 14 ++--
 .../core/tests/custom_sources_cases/mod.rs    |  2 +-
 .../core/tests/expr_api/simplification.rs     |  2 +-
 .../core/tests/optimizer_integration.rs       |  5 +-
 datafusion/core/tests/sql/explain_analyze.rs  |  6 +-
 .../user_defined/user_defined_aggregates.rs   |  2 +-
 .../user_defined_scalar_functions.rs          |  6 +-
 datafusion/expr/src/logical_plan/builder.rs   | 30 ++++----
 datafusion/expr/src/logical_plan/ddl.rs       | 18 ++---
 datafusion/expr/src/logical_plan/dml.rs       | 18 ++++-
 datafusion/expr/src/logical_plan/plan.rs      | 17 ++++-
 datafusion/expr/src/logical_plan/statement.rs |  8 +-
 .../optimizer/src/common_subexpr_eliminate.rs |  6 +-
 datafusion/optimizer/src/eliminate_limit.rs   |  4 +-
 .../optimizer/src/optimize_projections/mod.rs |  6 +-
 datafusion/optimizer/src/push_down_filter.rs  | 74 +++++++++----------
 .../simplify_expressions/simplify_exprs.rs    |  4 +-
 datafusion/optimizer/src/test/mod.rs          |  8 +-
 .../optimizer/tests/optimizer_integration.rs  | 40 +++++-----
 .../tests/cases/roundtrip_logical_plan.rs     | 26 +++----
 datafusion/sql/examples/sql.rs                |  2 +-
 datafusion/sql/tests/cases/plan_to_sql.rs     |  4 +-
 datafusion/sql/tests/sql_integration.rs       | 11 +--
 .../substrait/src/logical_plan/producer.rs    |  2 +-
 .../tests/cases/consumer_integration.rs       | 32 ++++----
 .../substrait/tests/cases/function_test.rs    |  2 +-
 .../substrait/tests/cases/logical_plans.rs    |  6 +-
 .../tests/cases/roundtrip_logical_plan.rs     | 34 ++++-----
 datafusion/substrait/tests/cases/serialize.rs |  4 +-
 30 files changed, 211 insertions(+), 186 deletions(-)

diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index a72dfaa0f58ca..ebc5ac0dbd5a2 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -205,12 +205,12 @@ impl RunOpt {
         let (state, plan) = plan.into_parts();
 
         if debug {
-            println!("=== Logical plan ===\n{plan:?}\n");
+            println!("=== Logical plan ===\n{plan}\n");
         }
 
         let plan = state.optimize(&plan)?;
         if debug {
-            println!("=== Optimized logical plan ===\n{plan:?}\n");
+            println!("=== Optimized logical plan ===\n{plan}\n");
         }
         let physical_plan = state.create_physical_plan(&plan).await?;
         if debug {
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index cacfa4c6f2aa5..6ec44b33f89e7 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -2553,7 +2553,7 @@ mod tests {
         \n    TableScan: a\
         \n  Projection: b.c1, b.c2\
         \n    TableScan: b";
-        assert_eq!(expected_plan, format!("{:?}", join.logical_plan()));
+        assert_eq!(expected_plan, format!("{}", join.logical_plan()));
 
         Ok(())
     }
@@ -2572,7 +2572,7 @@ mod tests {
         let expected_plan = "CrossJoin:\
         \n  TableScan: a projection=[c1], full_filters=[Boolean(NULL)]\
         \n  TableScan: b projection=[c1]";
-        assert_eq!(expected_plan, format!("{:?}", join.into_optimized_plan()?));
+        assert_eq!(expected_plan, format!("{}", join.into_optimized_plan()?));
 
         // JOIN ON expression must be boolean type
         let join = left.join_on(right, JoinType::Inner, Some(lit("TRUE")))?;
@@ -2914,7 +2914,7 @@ mod tests {
         \n      Inner Join: t1.c1 = t2.c1\
         \n        TableScan: t1\
         \n        TableScan: t2",
-            format!("{:?}", df_with_column.logical_plan())
+            format!("{}", df_with_column.logical_plan())
         );
 
         assert_eq!(
@@ -2927,7 +2927,7 @@ mod tests {
         \n          TableScan: aggregate_test_100 projection=[c1]\
         \n        SubqueryAlias: t2\
         \n          TableScan: aggregate_test_100 projection=[c1]",
-            format!("{:?}", df_with_column.clone().into_optimized_plan()?)
+            format!("{}", df_with_column.clone().into_optimized_plan()?)
         );
 
         let df_results = df_with_column.collect().await?;
@@ -3109,7 +3109,7 @@ mod tests {
         \n      Inner Join: t1.c1 = t2.c1\
         \n        TableScan: t1\
         \n        TableScan: t2",
-                   format!("{:?}", df_renamed.logical_plan())
+                   format!("{}", df_renamed.logical_plan())
         );
 
         assert_eq!("\
@@ -3121,7 +3121,7 @@ mod tests {
         \n          TableScan: aggregate_test_100 projection=[c1, c2, c3]\
         \n        SubqueryAlias: t2\
         \n          TableScan: aggregate_test_100 projection=[c1, c2, c3]",
-                   format!("{:?}", df_renamed.clone().into_optimized_plan()?)
+                   format!("{}", df_renamed.clone().into_optimized_plan()?)
         );
 
         let df_results = df_renamed.collect().await?;
@@ -3306,7 +3306,7 @@ mod tests {
 
         assert_eq!(
             "TableScan: ?table? projection=[c2, c3, sum]",
-            format!("{:?}", cached_df.clone().into_optimized_plan()?)
+            format!("{}", cached_df.clone().into_optimized_plan()?)
         );
 
         let df_results = df.collect().await?;
diff --git a/datafusion/core/tests/custom_sources_cases/mod.rs b/datafusion/core/tests/custom_sources_cases/mod.rs
index 7c051ffaa7e10..673fafe55b533 100644
--- a/datafusion/core/tests/custom_sources_cases/mod.rs
+++ b/datafusion/core/tests/custom_sources_cases/mod.rs
@@ -246,7 +246,7 @@ async fn custom_source_dataframe() -> Result<()> {
     }
 
     let expected = format!("TableScan: {UNNAMED_TABLE} projection=[c2]");
-    assert_eq!(format!("{optimized_plan:?}"), expected);
+    assert_eq!(format!("{optimized_plan}"), expected);
 
     let physical_plan = state.create_physical_plan(&optimized_plan).await?;
 
diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs
index 9ce47153ba4a8..b6068e4859df3 100644
--- a/datafusion/core/tests/expr_api/simplification.rs
+++ b/datafusion/core/tests/expr_api/simplification.rs
@@ -119,7 +119,7 @@ fn get_optimized_plan_formatted(plan: LogicalPlan, date_time: &DateTime<Utc>) ->
     let optimizer = Optimizer::with_rules(vec![Arc::new(SimplifyExpressions::new())]);
     let optimized_plan = optimizer.optimize(plan, &config, observe).unwrap();
 
-    format!("{optimized_plan:?}")
+    format!("{optimized_plan}")
 }
 
 // ------------------------------
diff --git a/datafusion/core/tests/optimizer_integration.rs b/datafusion/core/tests/optimizer_integration.rs
index 39f745cd33091..f17d13a420607 100644
--- a/datafusion/core/tests/optimizer_integration.rs
+++ b/datafusion/core/tests/optimizer_integration.rs
@@ -81,14 +81,13 @@ fn timestamp_nano_ts_utc_predicates() {
     let sql = "SELECT col_int32
         FROM test
         WHERE col_ts_nano_utc < (now() - interval '1 hour')";
-    let plan = test_sql(sql).unwrap();
     // a scan should have the now()... predicate folded to a single
     // constant and compared to the column without a cast so it can be
     // pushed down / pruned
     let expected =
         "Projection: test.col_int32\n  Filter: test.col_ts_nano_utc < TimestampNanosecond(1666612093000000000, Some(\"+00:00\"))\
          \n    TableScan: test projection=[col_int32, col_ts_nano_utc]";
-    assert_eq!(expected, format!("{plan:?}"));
+    quick_test(sql, expected);
 }
 
 #[test]
@@ -117,7 +116,7 @@ fn concat_ws_literals() -> Result<()> {
 
 fn quick_test(sql: &str, expected_plan: &str) {
     let plan = test_sql(sql).unwrap();
-    assert_eq!(expected_plan, format!("{:?}", plan));
+    assert_eq!(expected_plan, format!("{}", plan));
 }
 
 fn test_sql(sql: &str) -> Result<LogicalPlan> {
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index fe4777b04396c..07be00fc35159 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -253,7 +253,7 @@ async fn csv_explain_plans() {
 
     // Optimized logical plan
     let state = ctx.state();
-    let msg = format!("Optimizing logical plan for '{sql}': {plan:?}");
+    let msg = format!("Optimizing logical plan for '{sql}': {plan}");
     let plan = state.optimize(plan).expect(&msg);
     let optimized_logical_schema = plan.schema();
     // Both schema has to be the same
@@ -327,7 +327,7 @@ async fn csv_explain_plans() {
 
     // Physical plan
     // Create plan
-    let msg = format!("Creating physical plan for '{sql}': {plan:?}");
+    let msg = format!("Creating physical plan for '{sql}': {plan}");
     let plan = state.create_physical_plan(&plan).await.expect(&msg);
     //
     // Execute plan
@@ -548,7 +548,7 @@ async fn csv_explain_verbose_plans() {
 
     // Physical plan
     // Create plan
-    let msg = format!("Creating physical plan for '{sql}': {plan:?}");
+    let msg = format!("Creating physical plan for '{sql}': {plan}");
     let plan = state.create_physical_plan(&plan).await.expect(&msg);
     //
     // Execute plan
diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index 96de865b6554a..93550d38021ac 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -413,7 +413,7 @@ async fn test_parameterized_aggregate_udf() -> Result<()> {
         .build()?;
 
     assert_eq!(
-        format!("{plan:?}"),
+        format!("{plan}"),
         "Aggregate: groupBy=[[t.text]], aggr=[[geo_mean(t.text) AS a, geo_mean(t.text) AS b]]\n  TableScan: t projection=[text]"
     );
 
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index 9164e89de8f9a..259cce74f2e52 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -139,7 +139,7 @@ async fn scalar_udf() -> Result<()> {
         .build()?;
 
     assert_eq!(
-        format!("{plan:?}"),
+        format!("{plan}"),
         "Projection: t.a, t.b, my_add(t.a, t.b)\n  TableScan: t projection=[a, b]"
     );
 
@@ -393,7 +393,7 @@ async fn udaf_as_window_func() -> Result<()> {
     TableScan: my_table"#;
 
     let dataframe = context.sql(sql).await.unwrap();
-    assert_eq!(format!("{:?}", dataframe.logical_plan()), expected);
+    assert_eq!(format!("{}", dataframe.logical_plan()), expected);
     Ok(())
 }
 
@@ -1124,7 +1124,7 @@ async fn test_parameterized_scalar_udf() -> Result<()> {
         .build()?;
 
     assert_eq!(
-        format!("{plan:?}"),
+        format!("{plan}"),
         "Filter: t.text IS NOT NULL\n  Filter: regex_udf(t.text) AND regex_udf(t.text)\n    TableScan: t projection=[text]"
     );
 
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 736310c7ac0f8..aa2ea4ae1c266 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1749,7 +1749,7 @@ mod tests {
         \n  Filter: employee_csv.state = Utf8(\"CO\")\
         \n    TableScan: employee_csv projection=[id, state]";
 
-        assert_eq!(expected, format!("{plan:?}"));
+        assert_eq!(expected, format!("{plan}"));
 
         Ok(())
     }
@@ -1802,7 +1802,7 @@ mod tests {
         let expected = "Sort: employee_csv.state ASC NULLS FIRST, employee_csv.salary DESC NULLS LAST\
         \n  TableScan: employee_csv projection=[state, salary]";
 
-        assert_eq!(expected, format!("{plan:?}"));
+        assert_eq!(expected, format!("{plan}"));
 
         Ok(())
     }
@@ -1822,7 +1822,7 @@ mod tests {
         \n    TableScan: t1\
         \n    TableScan: t2";
 
-        assert_eq!(expected, format!("{plan:?}"));
+        assert_eq!(expected, format!("{plan}"));
 
         Ok(())
     }
@@ -1847,7 +1847,7 @@ mod tests {
         \n    TableScan: employee_csv projection=[state, salary]\
         \n  TableScan: employee_csv projection=[state, salary]";
 
-        assert_eq!(expected, format!("{plan:?}"));
+        assert_eq!(expected, format!("{plan}"));
 
         Ok(())
     }
@@ -1876,7 +1876,7 @@ mod tests {
         \n        TableScan: employee_csv projection=[state, salary]\
         \n    TableScan: employee_csv projection=[state, salary]";
 
-        assert_eq!(expected, format!("{plan:?}"));
+        assert_eq!(expected, format!("{plan}"));
 
         Ok(())
     }
@@ -1913,7 +1913,7 @@ mod tests {
         \n    Filter: employee_csv.state = Utf8(\"CO\")\
         \n      TableScan: employee_csv projection=[id, state]";
 
-        assert_eq!(expected, format!("{plan:?}"));
+        assert_eq!(expected, format!("{plan}"));
 
         Ok(())
     }
@@ -1940,7 +1940,7 @@ mod tests {
         \n        TableScan: foo\
         \n  Projection: bar.a\
         \n    TableScan: bar";
-        assert_eq!(expected, format!("{outer_query:?}"));
+        assert_eq!(expected, format!("{outer_query}"));
 
         Ok(())
     }
@@ -1968,7 +1968,7 @@ mod tests {
         \n        TableScan: foo\
         \n  Projection: bar.a\
         \n    TableScan: bar";
-        assert_eq!(expected, format!("{outer_query:?}"));
+        assert_eq!(expected, format!("{outer_query}"));
 
         Ok(())
     }
@@ -1994,7 +1994,7 @@ mod tests {
         \n      Projection: foo.b\
         \n        TableScan: foo\
         \n  TableScan: bar";
-        assert_eq!(expected, format!("{outer_query:?}"));
+        assert_eq!(expected, format!("{outer_query}"));
 
         Ok(())
     }
@@ -2116,7 +2116,7 @@ mod tests {
         let expected = "\
         Unnest: lists[test_table.strings] structs[]\
         \n  TableScan: test_table";
-        assert_eq!(expected, format!("{plan:?}"));
+        assert_eq!(expected, format!("{plan}"));
 
         // Check unnested field is a scalar
         let field = plan.schema().field_with_name(None, "strings").unwrap();
@@ -2130,7 +2130,7 @@ mod tests {
         let expected = "\
         Unnest: lists[] structs[test_table.struct_singular]\
         \n  TableScan: test_table";
-        assert_eq!(expected, format!("{plan:?}"));
+        assert_eq!(expected, format!("{plan}"));
 
         for field_name in &["a", "b"] {
             // Check unnested struct field is a scalar
@@ -2153,7 +2153,7 @@ mod tests {
         \n  Unnest: lists[test_table.structs] structs[]\
         \n    Unnest: lists[test_table.strings] structs[]\
         \n      TableScan: test_table";
-        assert_eq!(expected, format!("{plan:?}"));
+        assert_eq!(expected, format!("{plan}"));
 
         // Check unnested struct list field should be a struct.
         let field = plan.schema().field_with_name(None, "structs").unwrap();
@@ -2171,7 +2171,7 @@ mod tests {
         let expected = "\
         Unnest: lists[test_table.strings, test_table.structs] structs[test_table.struct_singular]\
         \n  TableScan: test_table";
-        assert_eq!(expected, format!("{plan:?}"));
+        assert_eq!(expected, format!("{plan}"));
 
         // Unnesting missing column should fail.
         let plan = nested_table_scan("test_table")?.unnest_column("missing");
@@ -2263,9 +2263,9 @@ mod tests {
                 ])?
                 .build()?;
 
-        let plan_expected = format!("{plan:?}");
+        let plan_expected = format!("{plan}");
         let plan_builder: LogicalPlanBuilder = Arc::new(plan).into();
-        assert_eq!(plan_expected, format!("{:?}", plan_builder.plan));
+        assert_eq!(plan_expected, format!("{}", plan_builder.plan));
 
         Ok(())
     }
diff --git a/datafusion/expr/src/logical_plan/ddl.rs b/datafusion/expr/src/logical_plan/ddl.rs
index 45ddbafecfd7c..255bf4699b7f5 100644
--- a/datafusion/expr/src/logical_plan/ddl.rs
+++ b/datafusion/expr/src/logical_plan/ddl.rs
@@ -29,7 +29,7 @@ use datafusion_common::{Constraints, DFSchemaRef, SchemaReference, TableReferenc
 use sqlparser::ast::Ident;
 
 /// Various types of DDL  (CREATE / DROP) catalog manipulation
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum DdlStatement {
     /// Creates an external table.
     CreateExternalTable(CreateExternalTable),
@@ -179,7 +179,7 @@ impl DdlStatement {
 }
 
 /// Creates an external table.
-#[derive(Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub struct CreateExternalTable {
     /// The table schema
     pub schema: DFSchemaRef,
@@ -224,7 +224,7 @@ impl Hash for CreateExternalTable {
 }
 
 /// Creates an in memory table.
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct CreateMemoryTable {
     /// The table name
     pub name: TableReference,
@@ -241,7 +241,7 @@ pub struct CreateMemoryTable {
 }
 
 /// Creates a view.
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct CreateView {
     /// The table name
     pub name: TableReference,
@@ -254,7 +254,7 @@ pub struct CreateView {
 }
 
 /// Creates a catalog (aka "Database").
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct CreateCatalog {
     /// The catalog name
     pub catalog_name: String,
@@ -265,7 +265,7 @@ pub struct CreateCatalog {
 }
 
 /// Creates a schema.
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct CreateCatalogSchema {
     /// The table schema
     pub schema_name: String,
@@ -276,7 +276,7 @@ pub struct CreateCatalogSchema {
 }
 
 /// Drops a table.
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct DropTable {
     /// The table name
     pub name: TableReference,
@@ -287,7 +287,7 @@ pub struct DropTable {
 }
 
 /// Drops a view.
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct DropView {
     /// The view name
     pub name: TableReference,
@@ -298,7 +298,7 @@ pub struct DropView {
 }
 
 /// Drops a schema
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct DropCatalogSchema {
     /// The schema name
     pub name: SchemaReference,
diff --git a/datafusion/expr/src/logical_plan/dml.rs b/datafusion/expr/src/logical_plan/dml.rs
index c9eef9bd34cc0..025bb7b289dce 100644
--- a/datafusion/expr/src/logical_plan/dml.rs
+++ b/datafusion/expr/src/logical_plan/dml.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::collections::HashMap;
-use std::fmt::{self, Display};
+use std::fmt::{self, Debug, Display, Formatter};
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
@@ -41,6 +41,18 @@ pub struct CopyTo {
     pub options: HashMap<String, String>,
 }
 
+impl Debug for CopyTo {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        f.debug_struct("CopyTo")
+            .field("input", &self.input)
+            .field("output_url", &self.output_url)
+            .field("partition_by", &self.partition_by)
+            .field("file_type", &"...")
+            .field("options", &self.options)
+            .finish_non_exhaustive()
+    }
+}
+
 // Implement PartialEq manually
 impl PartialEq for CopyTo {
     fn eq(&self, other: &Self) -> bool {
@@ -61,7 +73,7 @@ impl Hash for CopyTo {
 
 /// The operator that modifies the content of a database (adapted from
 /// substrait WriteRel)
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct DmlStatement {
     /// The table name
     pub table_name: TableReference,
@@ -100,7 +112,7 @@ impl DmlStatement {
     }
 }
 
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum WriteOp {
     InsertOverwrite,
     InsertInto,
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 6bea1ad948a16..02176a506a251 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -187,7 +187,7 @@ pub use datafusion_common::{JoinConstraint, JoinType};
 /// # }
 /// ```
 ///
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum LogicalPlan {
     /// Evaluates an arbitrary list of expressions (essentially a
     /// SELECT with an expression list) on its input.
@@ -1882,7 +1882,7 @@ impl LogicalPlan {
     }
 }
 
-impl Debug for LogicalPlan {
+impl Display for LogicalPlan {
     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
         self.display_indent().fmt(f)
     }
@@ -2291,6 +2291,19 @@ pub struct TableScan {
     pub fetch: Option<usize>,
 }
 
+impl Debug for TableScan {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        f.debug_struct("TableScan")
+            .field("table_name", &self.table_name)
+            .field("source", &"...")
+            .field("projection", &self.projection)
+            .field("projected_schema", &self.projected_schema)
+            .field("filters", &self.filters)
+            .field("fetch", &self.fetch)
+            .finish_non_exhaustive()
+    }
+}
+
 impl PartialEq for TableScan {
     fn eq(&self, other: &Self) -> bool {
         self.table_name == other.table_name
diff --git a/datafusion/expr/src/logical_plan/statement.rs b/datafusion/expr/src/logical_plan/statement.rs
index f294e7d3ea4cd..21ff8dbd8eece 100644
--- a/datafusion/expr/src/logical_plan/statement.rs
+++ b/datafusion/expr/src/logical_plan/statement.rs
@@ -25,7 +25,7 @@ use std::fmt::{self, Display};
 /// While DataFusion does not offer support transactions, it provides
 /// [`LogicalPlan`](crate::LogicalPlan) support to assist building
 /// database systems using DataFusion
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Statement {
     // Begin a transaction
     TransactionStart(TransactionStart),
@@ -115,7 +115,7 @@ pub enum TransactionIsolationLevel {
 }
 
 /// Indicator that the following statements should be committed or rolled back atomically
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct TransactionStart {
     /// indicates if transaction is allowed to write
     pub access_mode: TransactionAccessMode,
@@ -126,7 +126,7 @@ pub struct TransactionStart {
 }
 
 /// Indicator that any current transaction should be terminated
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct TransactionEnd {
     /// whether the transaction committed or aborted
     pub conclusion: TransactionConclusion,
@@ -138,7 +138,7 @@ pub struct TransactionEnd {
 
 /// Set a Variable's value -- value in
 /// [`ConfigOptions`](datafusion_common::config::ConfigOptions)
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct SetVariable {
     /// The variable name
     pub variable: String,
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 70ca6f5304ad5..9cd9e4dece26a 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -1189,7 +1189,7 @@ mod test {
         plan: LogicalPlan,
         config: Option<&dyn OptimizerConfig>,
     ) {
-        assert_eq!(expected, format!("{plan:?}"), "Unexpected starting plan");
+        assert_eq!(expected, format!("{plan}"), "Unexpected starting plan");
         let optimizer = CommonSubexprEliminate::new();
         let default_config = OptimizerContext::new();
         let config = config.unwrap_or(&default_config);
@@ -1198,7 +1198,7 @@ mod test {
         let optimized_plan = optimized_plan.data;
         assert_eq!(
             expected,
-            format!("{optimized_plan:?}"),
+            format!("{optimized_plan}"),
             "Unexpected optimized plan"
         );
     }
@@ -1214,7 +1214,7 @@ mod test {
         let optimized_plan = optimizer.rewrite(plan, config).unwrap();
         assert!(optimized_plan.transformed, "failed to optimize plan");
         let optimized_plan = optimized_plan.data;
-        let formatted_plan = format!("{optimized_plan:?}");
+        let formatted_plan = format!("{optimized_plan}");
         assert_eq!(expected, formatted_plan);
     }
 
diff --git a/datafusion/optimizer/src/eliminate_limit.rs b/datafusion/optimizer/src/eliminate_limit.rs
index 165834e759752..a42fe6a6f95b7 100644
--- a/datafusion/optimizer/src/eliminate_limit.rs
+++ b/datafusion/optimizer/src/eliminate_limit.rs
@@ -105,7 +105,7 @@ mod tests {
         let optimized_plan =
             optimizer.optimize(plan, &OptimizerContext::new(), observe)?;
 
-        let formatted_plan = format!("{optimized_plan:?}");
+        let formatted_plan = format!("{optimized_plan}");
         assert_eq!(formatted_plan, expected);
         Ok(())
     }
@@ -123,7 +123,7 @@ mod tests {
         let optimized_plan = optimizer
             .optimize(plan, &config, observe)
             .expect("failed to optimize plan");
-        let formatted_plan = format!("{optimized_plan:?}");
+        let formatted_plan = format!("{optimized_plan}");
         assert_eq!(formatted_plan, expected);
         Ok(())
     }
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 31d59da13323f..a307d0ae0a0b3 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -1529,7 +1529,7 @@ mod tests {
         \n  TableScan: test2 projection=[c1]";
 
         let optimized_plan = optimize(plan)?;
-        let formatted_plan = format!("{optimized_plan:?}");
+        let formatted_plan = format!("{optimized_plan}");
         assert_eq!(formatted_plan, expected);
 
         // make sure schema for join node include both join columns
@@ -1581,7 +1581,7 @@ mod tests {
         \n    TableScan: test2 projection=[c1]";
 
         let optimized_plan = optimize(plan)?;
-        let formatted_plan = format!("{optimized_plan:?}");
+        let formatted_plan = format!("{optimized_plan}");
         assert_eq!(formatted_plan, expected);
 
         // make sure schema for join node include both join columns
@@ -1631,7 +1631,7 @@ mod tests {
         \n    TableScan: test2 projection=[a]";
 
         let optimized_plan = optimize(plan)?;
-        let formatted_plan = format!("{optimized_plan:?}");
+        let formatted_plan = format!("{optimized_plan}");
         assert_eq!(formatted_plan, expected);
 
         // make sure schema for join node include both join columns
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index f9c9ec961c8e1..4254d3464662b 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -1240,7 +1240,7 @@ mod tests {
         let optimized_plan =
             optimizer.optimize(plan, &OptimizerContext::new(), observe)?;
 
-        let formatted_plan = format!("{optimized_plan:?}");
+        let formatted_plan = format!("{optimized_plan}");
         assert_eq!(expected, formatted_plan);
         Ok(())
     }
@@ -1401,7 +1401,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "\
             Filter: b = Int64(1)\
             \n  Projection: test.a * Int32(2) + test.c AS b, test.c\
@@ -1431,7 +1431,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "\
             Filter: a = Int64(1)\
             \n  Projection: b * Int32(3) AS a, test.c\
@@ -1581,7 +1581,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "\
             Filter: sum(test.c) > Int64(10)\
             \n  Filter: b > Int64(10)\
@@ -1616,7 +1616,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "\
             Filter: sum(test.c) > Int64(10) AND b > Int64(10) AND sum(test.c) < Int64(20)\
             \n  Aggregate: groupBy=[[b]], aggr=[[sum(test.c)]]\
@@ -1765,7 +1765,7 @@ mod tests {
 
         // not part of the test
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: test.a >= Int64(1)\
              \n  Projection: test.a\
              \n    Limit: skip=0, fetch=1\
@@ -1797,7 +1797,7 @@ mod tests {
 
         // not part of the test
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Projection: test.a\
             \n  Filter: test.a >= Int64(1)\
             \n    Filter: test.a <= Int64(1)\
@@ -1831,7 +1831,7 @@ mod tests {
              \n    TableScan: test";
 
         // not part of the test
-        assert_eq!(format!("{plan:?}"), expected);
+        assert_eq!(format!("{plan}"), expected);
 
         let expected = "\
         TestUserDefined\
@@ -1861,7 +1861,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: test.a <= Int64(1)\
             \n  Inner Join: test.a = test2.a\
             \n    TableScan: test\
@@ -1898,7 +1898,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: test.a <= Int64(1)\
             \n  Inner Join: Using test.a = test2.a\
             \n    TableScan: test\
@@ -1938,7 +1938,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: test.c <= test2.b\
             \n  Inner Join: test.a = test2.a\
             \n    Projection: test.a, test.c\
@@ -1981,7 +1981,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: test.b <= Int64(1)\
             \n  Inner Join: test.a = test2.a\
             \n    Projection: test.a, test.b\
@@ -2020,7 +2020,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: test2.a <= Int64(1)\
             \n  Left Join: Using test.a = test2.a\
             \n    TableScan: test\
@@ -2058,7 +2058,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: test.a <= Int64(1)\
             \n  Right Join: Using test.a = test2.a\
             \n    TableScan: test\
@@ -2097,7 +2097,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: test.a <= Int64(1)\
             \n  Left Join: Using test.a = test2.a\
             \n    TableScan: test\
@@ -2135,7 +2135,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: test2.a <= Int64(1)\
             \n  Right Join: Using test.a = test2.a\
             \n    TableScan: test\
@@ -2178,7 +2178,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Inner Join: test.a = test2.a Filter: test.c > UInt32(1) AND test.b < test2.b AND test2.c > UInt32(4)\
             \n  Projection: test.a, test.b, test.c\
             \n    TableScan: test\
@@ -2220,7 +2220,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Inner Join: test.a = test2.a Filter: test.b > UInt32(1) AND test2.c > UInt32(4)\
             \n  Projection: test.a, test.b, test.c\
             \n    TableScan: test\
@@ -2260,7 +2260,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Inner Join: test.a = test2.b Filter: test.a > UInt32(1)\
             \n  Projection: test.a\
             \n    TableScan: test\
@@ -2303,7 +2303,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Left Join: test.a = test2.a Filter: test.a > UInt32(1) AND test.b < test2.b AND test2.c > UInt32(4)\
             \n  Projection: test.a, test.b, test.c\
             \n    TableScan: test\
@@ -2346,7 +2346,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Right Join: test.a = test2.a Filter: test.a > UInt32(1) AND test.b < test2.b AND test2.c > UInt32(4)\
             \n  Projection: test.a, test.b, test.c\
             \n    TableScan: test\
@@ -2389,7 +2389,7 @@ mod tests {
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Full Join: test.a = test2.a Filter: test.a > UInt32(1) AND test.b < test2.b AND test2.c > UInt32(4)\
             \n  Projection: test.a, test.b, test.c\
             \n    TableScan: test\
@@ -2397,7 +2397,7 @@ mod tests {
             \n    TableScan: test2"
         );
 
-        let expected = &format!("{plan:?}");
+        let expected = &format!("{plan}");
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -2574,7 +2574,7 @@ Projection: a, b
 
         // filter on col b
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: b > Int64(10) AND test.c > Int64(10)\
             \n  Projection: test.a AS b, test.c\
             \n    TableScan: test"
@@ -2603,7 +2603,7 @@ Projection: a, b
 
         // filter on col b
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: b > Int64(10) AND test.c > Int64(10)\
             \n  Projection: b, test.c\
             \n    Projection: test.a AS b, test.c\
@@ -2631,7 +2631,7 @@ Projection: a, b
 
         // filter on col b and d
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: b > Int64(10) AND d > Int64(10)\
             \n  Projection: test.a AS b, test.c AS d\
             \n    TableScan: test\
@@ -2668,7 +2668,7 @@ Projection: a, b
             .build()?;
 
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Inner Join: c = d Filter: c > UInt32(1)\
             \n  Projection: test.a AS c\
             \n    TableScan: test\
@@ -2700,7 +2700,7 @@ Projection: a, b
 
         // filter on col b
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: b IN ([UInt32(1), UInt32(2), UInt32(3), UInt32(4)])\
             \n  Projection: test.a AS b, test.c\
             \n    TableScan: test\
@@ -2730,7 +2730,7 @@ Projection: a, b
 
         // filter on col b
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "Filter: b IN ([UInt32(1), UInt32(2), UInt32(3), UInt32(4)])\
             \n  Projection: b, test.c\
             \n    Projection: test.a AS b, test.c\
@@ -2771,7 +2771,7 @@ Projection: a, b
         \n      TableScan: sq\
         \n  Projection: test.a AS b, test.c\
         \n    TableScan: test";
-        assert_eq!(format!("{plan:?}"), expected_before);
+        assert_eq!(format!("{plan}"), expected_before);
 
         // rewrite filter col b to test.a
         let expected_after = "\
@@ -2802,7 +2802,7 @@ Projection: a, b
         \n        SubqueryAlias: b\
         \n          Projection: Int64(0) AS a\
         \n            EmptyRelation";
-        assert_eq!(format!("{plan:?}"), expected_before);
+        assert_eq!(format!("{plan}"), expected_before);
 
         // Ensure that the predicate without any columns (0 = 1) is
         // still there.
@@ -2877,7 +2877,7 @@ Projection: a, b
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "LeftSemi Join: test1.a = test2.a Filter: test1.b > UInt32(1) AND test2.b > UInt32(2)\
             \n  TableScan: test1\
             \n  Projection: test2.a, test2.b\
@@ -2918,7 +2918,7 @@ Projection: a, b
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "RightSemi Join: test1.a = test2.a Filter: test1.b > UInt32(1) AND test2.b > UInt32(2)\
             \n  TableScan: test1\
             \n  Projection: test2.a, test2.b\
@@ -2962,7 +2962,7 @@ Projection: a, b
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "LeftAnti Join: test1.a = test2.a Filter: test1.b > UInt32(1) AND test2.b > UInt32(2)\
             \n  Projection: test1.a, test1.b\
             \n    TableScan: test1\
@@ -3008,7 +3008,7 @@ Projection: a, b
 
         // not part of the test, just good to know:
         assert_eq!(
-            format!("{plan:?}"),
+            format!("{plan}"),
             "RightAnti Join: test1.a = test2.a Filter: test1.b > UInt32(1) AND test2.b > UInt32(2)\
             \n  Projection: test1.a, test1.b\
             \n    TableScan: test1\
@@ -3074,7 +3074,7 @@ Projection: a, b
         \n      Projection: test1.a, sum(test1.b), TestScalarUDF() + Int32(1) AS r\
         \n        Aggregate: groupBy=[[test1.a]], aggr=[[sum(test1.b)]]\
         \n          TableScan: test1";
-        assert_eq!(format!("{plan:?}"), expected_before);
+        assert_eq!(format!("{plan}"), expected_before);
 
         let expected_after = "Projection: t.a, t.r\
         \n  SubqueryAlias: t\
@@ -3119,7 +3119,7 @@ Projection: a, b
         \n        Inner Join: test1.a = test2.a\
         \n          TableScan: test1\
         \n          TableScan: test2";
-        assert_eq!(format!("{plan:?}"), expected_before);
+        assert_eq!(format!("{plan}"), expected_before);
 
         let expected = "Projection: t.a, t.r\
         \n  SubqueryAlias: t\
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index e44f60d1df220..cb9ec3024d939 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -187,7 +187,7 @@ mod tests {
         let optimizer = Optimizer::with_rules(vec![Arc::new(SimplifyExpressions::new())]);
         let optimized_plan =
             optimizer.optimize(plan, &OptimizerContext::new(), observe)?;
-        let formatted_plan = format!("{optimized_plan:?}");
+        let formatted_plan = format!("{optimized_plan}");
         assert_eq!(formatted_plan, expected);
         Ok(())
     }
@@ -437,7 +437,7 @@ mod tests {
         let rule = SimplifyExpressions::new();
 
         let optimized_plan = rule.rewrite(plan, &config).unwrap().data;
-        format!("{optimized_plan:?}")
+        format!("{optimized_plan}")
     }
 
     #[test]
diff --git a/datafusion/optimizer/src/test/mod.rs b/datafusion/optimizer/src/test/mod.rs
index 4dccb42941dd0..1266b548ab057 100644
--- a/datafusion/optimizer/src/test/mod.rs
+++ b/datafusion/optimizer/src/test/mod.rs
@@ -116,7 +116,7 @@ pub fn assert_analyzed_plan_eq(
     let options = ConfigOptions::default();
     let analyzed_plan =
         Analyzer::with_rules(vec![rule]).execute_and_check(plan, &options, |_, _| {})?;
-    let formatted_plan = format!("{analyzed_plan:?}");
+    let formatted_plan = format!("{analyzed_plan}");
     assert_eq!(formatted_plan, expected);
 
     Ok(())
@@ -130,7 +130,7 @@ pub fn assert_analyzed_plan_ne(
     let options = ConfigOptions::default();
     let analyzed_plan =
         Analyzer::with_rules(vec![rule]).execute_and_check(plan, &options, |_, _| {})?;
-    let formatted_plan = format!("{analyzed_plan:?}");
+    let formatted_plan = format!("{analyzed_plan}");
     assert_ne!(formatted_plan, expected);
 
     Ok(())
@@ -178,7 +178,7 @@ pub fn assert_optimized_plan_eq(
 
     let optimizer = Optimizer::with_rules(vec![Arc::clone(&rule)]);
     let optimized_plan = optimizer.optimize(plan, &opt_context, observe)?;
-    let formatted_plan = format!("{optimized_plan:?}");
+    let formatted_plan = format!("{optimized_plan}");
     assert_eq!(formatted_plan, expected);
 
     Ok(())
@@ -205,7 +205,7 @@ pub fn assert_optimized_plan_with_rules(
     eq: bool,
 ) -> Result<()> {
     let optimized_plan = generate_optimized_plan_with_rules(rules, plan);
-    let formatted_plan = format!("{optimized_plan:?}");
+    let formatted_plan = format!("{optimized_plan}");
     if eq {
         assert_eq!(formatted_plan, expected);
     } else {
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs
index 3c77ffaa17f6c..aaa5eec3955c7 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -50,13 +50,13 @@ fn case_when() -> Result<()> {
     let expected =
         "Projection: CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END\
          \n  TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
 
     let sql = "SELECT CASE WHEN col_uint32 > 0 THEN 1 ELSE 0 END FROM test";
     let plan = test_sql(sql)?;
     let expected = "Projection: CASE WHEN test.col_uint32 > UInt32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_uint32 > Int64(0) THEN Int64(1) ELSE Int64(0) END\
                     \n  TableScan: test projection=[col_uint32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -78,7 +78,7 @@ fn subquery_filter_with_cast() -> Result<()> {
     \n        Projection: test.col_int32\
     \n          Filter: test.col_utf8 >= Utf8(\"2002-05-08\") AND test.col_utf8 <= Utf8(\"2002-05-13\")\
     \n            TableScan: test projection=[col_int32, col_utf8]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -89,7 +89,7 @@ fn case_when_aggregate() -> Result<()> {
     let expected = "Projection: test.col_utf8, sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END) AS n\
                     \n  Aggregate: groupBy=[[test.col_utf8]], aggr=[[sum(CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END) AS sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END)]]\
                     \n    TableScan: test projection=[col_int32, col_utf8]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -100,7 +100,7 @@ fn unsigned_target_type() -> Result<()> {
     let expected = "Projection: test.col_utf8\
                     \n  Filter: test.col_uint32 > UInt32(0)\
                     \n    TableScan: test projection=[col_uint32, col_utf8]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -111,7 +111,7 @@ fn distribute_by() -> Result<()> {
     let plan = test_sql(sql)?;
     let expected = "Repartition: DistributeBy(test.col_utf8)\
     \n  TableScan: test projection=[col_int32, col_utf8]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -128,7 +128,7 @@ fn semi_join_with_join_filter() -> Result<()> {
                     \n    SubqueryAlias: __correlated_sq_1\
                     \n      SubqueryAlias: t2\
                     \n        TableScan: test projection=[col_int32, col_uint32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -145,7 +145,7 @@ fn anti_join_with_join_filter() -> Result<()> {
     \n    SubqueryAlias: __correlated_sq_1\
     \n      SubqueryAlias: t2\
     \n        TableScan: test projection=[col_int32, col_uint32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -160,7 +160,7 @@ fn where_exists_distinct() -> Result<()> {
     \n    Aggregate: groupBy=[[t2.col_int32]], aggr=[[]]\
     \n      SubqueryAlias: t2\
     \n        TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -178,7 +178,7 @@ fn intersect() -> Result<()> {
     \n        TableScan: test projection=[col_int32, col_utf8]\
     \n      TableScan: test projection=[col_int32, col_utf8]\
     \n  TableScan: test projection=[col_int32, col_utf8]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -192,7 +192,7 @@ fn between_date32_plus_interval() -> Result<()> {
         \n  Projection: \
         \n    Filter: test.col_date32 >= Date32(\"1998-03-18\") AND test.col_date32 <= Date32(\"1998-06-16\")\
         \n      TableScan: test projection=[col_date32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -206,7 +206,7 @@ fn between_date64_plus_interval() -> Result<()> {
         \n  Projection: \
         \n    Filter: test.col_date64 >= Date64(\"1998-03-18\") AND test.col_date64 <= Date64(\"1998-06-16\")\
         \n      TableScan: test projection=[col_date64]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
 
@@ -216,7 +216,7 @@ fn propagate_empty_relation() {
     let plan = test_sql(sql).unwrap();
     // when children exist EmptyRelation, it will bottom-up propagate.
     let expected = "EmptyRelation";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
 }
 
 #[test]
@@ -232,7 +232,7 @@ fn join_keys_in_subquery_alias() {
     \n      Filter: test.col_int32 IS NOT NULL\
     \n        TableScan: test projection=[col_int32]";
 
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
 }
 
 #[test]
@@ -251,7 +251,7 @@ fn join_keys_in_subquery_alias_1() {
     \n        SubqueryAlias: c\
     \n          Filter: test.col_int32 IS NOT NULL\
     \n            TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
 }
 
 #[test]
@@ -262,7 +262,7 @@ fn push_down_filter_groupby_expr_contains_alias() {
     \n  Aggregate: groupBy=[[test.col_int32 + CAST(test.col_uint32 AS Int32)]], aggr=[[count(Int64(1)) AS count(*)]]\
     \n    Filter: test.col_int32 + CAST(test.col_uint32 AS Int32) > Int32(3)\
     \n      TableScan: test projection=[col_int32, col_uint32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
 }
 
 #[test]
@@ -276,7 +276,7 @@ fn test_same_name_but_not_ambiguous() {
     \n      TableScan: test projection=[col_int32]\
     \n  SubqueryAlias: t2\
     \n    TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
 }
 
 #[test]
@@ -291,7 +291,7 @@ fn eliminate_nested_filters() {
         Filter: test.col_int32 > Int32(0)\
         \n  TableScan: test projection=[col_int32]";
 
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
 }
 
 #[test]
@@ -306,7 +306,7 @@ fn eliminate_redundant_null_check_on_count() {
         Projection: test.col_int32, count(*) AS c\
         \n  Aggregate: groupBy=[[test.col_int32]], aggr=[[count(Int64(1)) AS count(*)]]\
         \n    TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
 }
 
 #[test]
@@ -332,7 +332,7 @@ fn test_propagate_empty_relation_inner_join_and_unions() {
         \n  TableScan: test projection=[col_int32]\
         \n  Filter: test.col_int32 < Int32(0)\
         \n    TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan:?}"));
+    assert_eq!(expected, format!("{plan}"));
 }
 
 fn test_sql(sql: &str) -> Result<LogicalPlan> {
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index d150c474e88fc..b96398ef217f3 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -308,7 +308,7 @@ async fn roundtrip_logical_plan_aggregation_with_pk() -> Result<()> {
 
     let bytes = logical_plan_to_bytes(&plan)?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
 }
@@ -334,7 +334,7 @@ async fn roundtrip_logical_plan_aggregation() -> Result<()> {
 
     let bytes = logical_plan_to_bytes(&plan)?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
 }
@@ -358,7 +358,7 @@ async fn roundtrip_logical_plan_copy_to_sql_options() -> Result<()> {
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
         logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
 }
@@ -430,7 +430,7 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> {
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
         logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     match logical_round_trip {
         LogicalPlan::Copy(copy_to) => {
@@ -544,7 +544,7 @@ async fn roundtrip_logical_plan_copy_to_json() -> Result<()> {
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
         logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     match logical_round_trip {
         LogicalPlan::Copy(copy_to) => {
@@ -614,7 +614,7 @@ async fn roundtrip_logical_plan_copy_to_parquet() -> Result<()> {
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
         logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     match logical_round_trip {
         LogicalPlan::Copy(copy_to) => {
@@ -701,7 +701,7 @@ async fn roundtrip_logical_plan_distinct_on() -> Result<()> {
 
     let bytes = logical_plan_to_bytes(&plan)?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
 }
@@ -727,7 +727,7 @@ async fn roundtrip_single_count_distinct() -> Result<()> {
 
     let bytes = logical_plan_to_bytes(&plan)?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
 }
@@ -740,7 +740,7 @@ async fn roundtrip_logical_plan_with_extension() -> Result<()> {
     let plan = ctx.table("t1").await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
     Ok(())
 }
 
@@ -765,7 +765,7 @@ async fn roundtrip_logical_plan_unnest() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
     Ok(())
 }
 
@@ -906,7 +906,7 @@ async fn roundtrip_expr_api() -> Result<()> {
     let plan = table.select(expr_list)?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
     Ok(())
 }
 
@@ -926,13 +926,13 @@ async fn roundtrip_logical_plan_with_view_scan() -> Result<()> {
 
     let bytes = logical_plan_to_bytes(&plan)?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     // DROP
     let plan = ctx.sql("DROP VIEW view_t1").await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
-    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+    assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
 }
diff --git a/datafusion/sql/examples/sql.rs b/datafusion/sql/examples/sql.rs
index d9ee1b4db8e2d..aa17be6273ae7 100644
--- a/datafusion/sql/examples/sql.rs
+++ b/datafusion/sql/examples/sql.rs
@@ -63,7 +63,7 @@ fn main() {
     let plan = sql_to_rel.sql_statement_to_plan(statement.clone()).unwrap();
 
     // show the plan
-    println!("{plan:?}");
+    println!("{plan}");
 }
 
 struct MyContextProvider {
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index bae3ec2e2779e..b65f976a22511 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -209,7 +209,7 @@ fn roundtrip_crossjoin() -> Result<()> {
         \n    TableScan: j1\
         \n    TableScan: j2";
 
-    assert_eq!(format!("{plan_roundtrip:?}"), expected);
+    assert_eq!(format!("{plan_roundtrip}"), expected);
 
     Ok(())
 }
@@ -420,7 +420,7 @@ fn test_unnest_logical_plan() -> Result<()> {
         \n    Projection: unnest_table.struct_col AS unnest(unnest_table.struct_col), unnest_table.array_col AS unnest(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\
         \n      TableScan: unnest_table";
 
-    assert_eq!(format!("{plan:?}"), expected);
+    assert_eq!(format!("{plan}"), expected);
 
     Ok(())
 }
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index c1b2246e4980f..e61c29f1c80d2 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -149,7 +149,8 @@ fn parse_ident_normalization() {
             },
         );
         if plan.is_ok() {
-            assert_eq!(expected, format!("{plan:?}"));
+            let plan = plan.unwrap();
+            assert_eq!(expected, format!("Ok({plan})"));
         } else {
             assert_eq!(expected, plan.unwrap_err().strip_backtrace());
         }
@@ -198,7 +199,7 @@ fn test_parse_options_value_normalization() {
             },
         );
         if let Ok(plan) = plan {
-            assert_eq!(expected_plan, format!("{plan:?}"));
+            assert_eq!(expected_plan, format!("{plan}"));
 
             match plan {
                 LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
@@ -2827,7 +2828,7 @@ fn quick_test(sql: &str, expected: &str) {
 
 fn quick_test_with_options(sql: &str, expected: &str, options: ParserOptions) {
     let plan = logical_plan_with_options(sql, options).unwrap();
-    assert_eq!(format!("{plan:?}"), expected);
+    assert_eq!(format!("{plan}"), expected);
 }
 
 fn prepare_stmt_quick_test(
@@ -2839,7 +2840,7 @@ fn prepare_stmt_quick_test(
 
     let assert_plan = plan.clone();
     // verify plan
-    assert_eq!(format!("{assert_plan:?}"), expected_plan);
+    assert_eq!(format!("{assert_plan}"), expected_plan);
 
     // verify data types
     if let LogicalPlan::Prepare(Prepare { data_types, .. }) = assert_plan {
@@ -2857,7 +2858,7 @@ fn prepare_stmt_replace_params_quick_test(
 ) -> LogicalPlan {
     // replace params
     let plan = plan.with_param_values(param_values).unwrap();
-    assert_eq!(format!("{plan:?}"), expected_plan);
+    assert_eq!(format!("{plan}"), expected_plan);
 
     plan
 }
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index bd6e0e00491a1..a782af8eb2479 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -558,7 +558,7 @@ pub fn to_substrait_rel(
                 rel_type: Some(rel_type),
             }))
         }
-        _ => not_impl_err!("Unsupported operator: {plan:?}"),
+        _ => not_impl_err!("Unsupported operator: {plan}"),
     }
 }
 
diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs
index 8fbcd721166e3..fc5f82127d052 100644
--- a/datafusion/substrait/tests/cases/consumer_integration.rs
+++ b/datafusion/substrait/tests/cases/consumer_integration.rs
@@ -55,7 +55,7 @@ mod tests {
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
 
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(
             plan_str,
             "Projection: FILENAME_PLACEHOLDER_0.l_returnflag AS L_RETURNFLAG, FILENAME_PLACEHOLDER_0.l_linestatus AS L_LINESTATUS, sum(FILENAME_PLACEHOLDER_0.l_quantity) AS SUM_QTY, sum(FILENAME_PLACEHOLDER_0.l_extendedprice) AS SUM_BASE_PRICE, sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount) AS SUM_DISC_PRICE, sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount * Int32(1) + FILENAME_PLACEHOLDER_0.l_tax) AS SUM_CHARGE, avg(FILENAME_PLACEHOLDER_0.l_quantity) AS AVG_QTY, avg(FILENAME_PLACEHOLDER_0.l_extendedprice) AS AVG_PRICE, avg(FILENAME_PLACEHOLDER_0.l_discount) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER\
@@ -89,7 +89,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(
             plan_str,
             "Projection: FILENAME_PLACEHOLDER_1.s_acctbal AS S_ACCTBAL, FILENAME_PLACEHOLDER_1.s_name AS S_NAME, FILENAME_PLACEHOLDER_3.n_name AS N_NAME, FILENAME_PLACEHOLDER_0.p_partkey AS P_PARTKEY, FILENAME_PLACEHOLDER_0.p_mfgr AS P_MFGR, FILENAME_PLACEHOLDER_1.s_address AS S_ADDRESS, FILENAME_PLACEHOLDER_1.s_phone AS S_PHONE, FILENAME_PLACEHOLDER_1.s_comment AS S_COMMENT\
@@ -136,7 +136,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_2.l_orderkey AS L_ORDERKEY, sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount) AS REVENUE, FILENAME_PLACEHOLDER_1.o_orderdate AS O_ORDERDATE, FILENAME_PLACEHOLDER_1.o_shippriority AS O_SHIPPRIORITY\
         \n  Limit: skip=0, fetch=10\
         \n    Sort: sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount) DESC NULLS FIRST, FILENAME_PLACEHOLDER_1.o_orderdate ASC NULLS LAST\
@@ -164,7 +164,7 @@ mod tests {
         ))
         .expect("failed to parse json");
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.o_orderpriority AS O_ORDERPRIORITY, count(Int64(1)) AS ORDER_COUNT\
         \n  Sort: FILENAME_PLACEHOLDER_0.o_orderpriority ASC NULLS LAST\
         \n    Aggregate: groupBy=[[FILENAME_PLACEHOLDER_0.o_orderpriority]], aggr=[[count(Int64(1))]]\
@@ -195,7 +195,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: NATION.n_name AS N_NAME, sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount) AS REVENUE\
         \n  Sort: sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount) DESC NULLS FIRST\
         \n    Aggregate: groupBy=[[NATION.n_name]], aggr=[[sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount)]]\
@@ -229,7 +229,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Aggregate: groupBy=[[]], aggr=[[sum(FILENAME_PLACEHOLDER_0.l_extendedprice * FILENAME_PLACEHOLDER_0.l_discount) AS REVENUE]]\
         \n  Projection: FILENAME_PLACEHOLDER_0.l_extendedprice * FILENAME_PLACEHOLDER_0.l_discount\
         \n    Filter: FILENAME_PLACEHOLDER_0.l_shipdate >= CAST(Utf8(\"1994-01-01\") AS Date32) AND FILENAME_PLACEHOLDER_0.l_shipdate < CAST(Utf8(\"1995-01-01\") AS Date32) AND FILENAME_PLACEHOLDER_0.l_discount >= Decimal128(Some(5),3,2) AND FILENAME_PLACEHOLDER_0.l_discount <= Decimal128(Some(7),3,2) AND FILENAME_PLACEHOLDER_0.l_quantity < CAST(Int32(24) AS Decimal128(19, 0))\
@@ -254,7 +254,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.c_custkey AS C_CUSTKEY, FILENAME_PLACEHOLDER_0.c_name AS C_NAME, sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount) AS REVENUE, FILENAME_PLACEHOLDER_0.c_acctbal AS C_ACCTBAL, FILENAME_PLACEHOLDER_3.n_name AS N_NAME, FILENAME_PLACEHOLDER_0.c_address AS C_ADDRESS, FILENAME_PLACEHOLDER_0.c_phone AS C_PHONE, FILENAME_PLACEHOLDER_0.c_comment AS C_COMMENT\
         \n  Limit: skip=0, fetch=20\
         \n    Sort: sum(FILENAME_PLACEHOLDER_2.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_2.l_discount) DESC NULLS FIRST\
@@ -289,7 +289,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.ps_partkey AS PS_PARTKEY, sum(FILENAME_PLACEHOLDER_0.ps_supplycost * FILENAME_PLACEHOLDER_0.ps_availqty) AS value\
         \n  Sort: sum(FILENAME_PLACEHOLDER_0.ps_supplycost * FILENAME_PLACEHOLDER_0.ps_availqty) DESC NULLS FIRST\
         \n    Filter: sum(FILENAME_PLACEHOLDER_0.ps_supplycost * FILENAME_PLACEHOLDER_0.ps_availqty) > (<subquery>)\
@@ -329,7 +329,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: count(FILENAME_PLACEHOLDER_1.o_orderkey) AS C_COUNT, count(Int64(1)) AS CUSTDIST\
         \n  Sort: count(Int64(1)) DESC NULLS FIRST, count(FILENAME_PLACEHOLDER_1.o_orderkey) DESC NULLS FIRST\
         \n    Projection: count(FILENAME_PLACEHOLDER_1.o_orderkey), count(Int64(1))\
@@ -357,7 +357,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END) / sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount) AS PROMO_REVENUE\
         \n  Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END), sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount)]]\
         \n    Projection: CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_0.l_discount) ELSE Decimal128(Some(0),19,0) END, FILENAME_PLACEHOLDER_0.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_0.l_discount)\
@@ -383,7 +383,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_1.p_brand AS P_BRAND, FILENAME_PLACEHOLDER_1.p_type AS P_TYPE, FILENAME_PLACEHOLDER_1.p_size AS P_SIZE, count(DISTINCT FILENAME_PLACEHOLDER_0.ps_suppkey) AS SUPPLIER_CNT\
         \n  Sort: count(DISTINCT FILENAME_PLACEHOLDER_0.ps_suppkey) DESC NULLS FIRST, FILENAME_PLACEHOLDER_1.p_brand ASC NULLS LAST, FILENAME_PLACEHOLDER_1.p_type ASC NULLS LAST, FILENAME_PLACEHOLDER_1.p_size ASC NULLS LAST\
         \n    Aggregate: groupBy=[[FILENAME_PLACEHOLDER_1.p_brand, FILENAME_PLACEHOLDER_1.p_type, FILENAME_PLACEHOLDER_1.p_size]], aggr=[[count(DISTINCT FILENAME_PLACEHOLDER_0.ps_suppkey)]]\
@@ -434,7 +434,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.c_name AS C_NAME, FILENAME_PLACEHOLDER_0.c_custkey AS C_CUSTKEY, FILENAME_PLACEHOLDER_1.o_orderkey AS O_ORDERKEY, FILENAME_PLACEHOLDER_1.o_orderdate AS O_ORDERDATE, FILENAME_PLACEHOLDER_1.o_totalprice AS O_TOTALPRICE, sum(FILENAME_PLACEHOLDER_2.l_quantity) AS EXPR$5\
         \n  Limit: skip=0, fetch=100\
         \n    Sort: FILENAME_PLACEHOLDER_1.o_totalprice DESC NULLS FIRST, FILENAME_PLACEHOLDER_1.o_orderdate ASC NULLS LAST\
@@ -468,7 +468,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Aggregate: groupBy=[[]], aggr=[[sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount) AS REVENUE]]\n  Projection: FILENAME_PLACEHOLDER_0.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_0.l_discount)\
         \n    Filter: FILENAME_PLACEHOLDER_1.p_partkey = FILENAME_PLACEHOLDER_0.l_partkey AND FILENAME_PLACEHOLDER_1.p_brand = CAST(Utf8(\"Brand#12\") AS Utf8) AND (FILENAME_PLACEHOLDER_1.p_container = Utf8(\"SM CASE\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"SM BOX\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"SM PACK\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"SM PKG\")) AND FILENAME_PLACEHOLDER_0.l_quantity >= CAST(Int32(1) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_0.l_quantity <= CAST(Int32(1) + Int32(10) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_1.p_size >= Int32(1) AND FILENAME_PLACEHOLDER_1.p_size <= Int32(5) AND (FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR\") OR FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR REG\")) AND FILENAME_PLACEHOLDER_0.l_shipinstruct = CAST(Utf8(\"DELIVER IN PERSON\") AS Utf8) OR FILENAME_PLACEHOLDER_1.p_partkey = FILENAME_PLACEHOLDER_0.l_partkey AND FILENAME_PLACEHOLDER_1.p_brand = CAST(Utf8(\"Brand#23\") AS Utf8) AND (FILENAME_PLACEHOLDER_1.p_container = Utf8(\"MED BAG\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"MED BOX\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"MED PKG\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"MED PACK\")) AND FILENAME_PLACEHOLDER_0.l_quantity >= CAST(Int32(10) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_0.l_quantity <= CAST(Int32(10) + Int32(10) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_1.p_size >= Int32(1) AND FILENAME_PLACEHOLDER_1.p_size <= Int32(10) AND (FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR\") OR FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR REG\")) AND FILENAME_PLACEHOLDER_0.l_shipinstruct = CAST(Utf8(\"DELIVER IN PERSON\") AS Utf8) OR FILENAME_PLACEHOLDER_1.p_partkey = FILENAME_PLACEHOLDER_0.l_partkey AND FILENAME_PLACEHOLDER_1.p_brand = CAST(Utf8(\"Brand#34\") AS Utf8) AND (FILENAME_PLACEHOLDER_1.p_container = Utf8(\"LG CASE\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"LG BOX\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"LG PACK\") OR FILENAME_PLACEHOLDER_1.p_container = Utf8(\"LG PKG\")) AND FILENAME_PLACEHOLDER_0.l_quantity >= CAST(Int32(20) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_0.l_quantity <= CAST(Int32(20) + Int32(10) AS Decimal128(19, 0)) AND FILENAME_PLACEHOLDER_1.p_size >= Int32(1) AND FILENAME_PLACEHOLDER_1.p_size <= Int32(15) AND (FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR\") OR FILENAME_PLACEHOLDER_0.l_shipmode = Utf8(\"AIR REG\")) AND FILENAME_PLACEHOLDER_0.l_shipinstruct = CAST(Utf8(\"DELIVER IN PERSON\") AS Utf8)\
         \n      Inner Join:  Filter: Boolean(true)\
@@ -494,7 +494,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.s_name AS S_NAME, FILENAME_PLACEHOLDER_0.s_address AS S_ADDRESS\
         \n  Sort: FILENAME_PLACEHOLDER_0.s_name ASC NULLS LAST\
         \n    Projection: FILENAME_PLACEHOLDER_0.s_name, FILENAME_PLACEHOLDER_0.s_address\
@@ -537,7 +537,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: FILENAME_PLACEHOLDER_0.s_name AS S_NAME, count(Int64(1)) AS NUMWAIT\
         \n  Limit: skip=0, fetch=100\
         \n    Sort: count(Int64(1)) DESC NULLS FIRST, FILENAME_PLACEHOLDER_0.s_name ASC NULLS LAST\
@@ -574,7 +574,7 @@ mod tests {
         .expect("failed to parse json");
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
         assert_eq!(plan_str, "Projection: substr(FILENAME_PLACEHOLDER_0.c_phone,Int32(1),Int32(2)) AS CNTRYCODE, count(Int64(1)) AS NUMCUST, sum(FILENAME_PLACEHOLDER_0.c_acctbal) AS TOTACCTBAL\n  Sort: substr(FILENAME_PLACEHOLDER_0.c_phone,Int32(1),Int32(2)) ASC NULLS LAST\
         \n    Aggregate: groupBy=[[substr(FILENAME_PLACEHOLDER_0.c_phone,Int32(1),Int32(2))]], aggr=[[count(Int64(1)), sum(FILENAME_PLACEHOLDER_0.c_acctbal)]]\
         \n      Projection: substr(FILENAME_PLACEHOLDER_0.c_phone, Int32(1), Int32(2)), FILENAME_PLACEHOLDER_0.c_acctbal\
diff --git a/datafusion/substrait/tests/cases/function_test.rs b/datafusion/substrait/tests/cases/function_test.rs
index b4c5659a3a49e..610caf3a81dfc 100644
--- a/datafusion/substrait/tests/cases/function_test.rs
+++ b/datafusion/substrait/tests/cases/function_test.rs
@@ -38,7 +38,7 @@ mod tests {
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
 
-        let plan_str = format!("{:?}", plan);
+        let plan_str = format!("{}", plan);
 
         assert_eq!(
             plan_str,
diff --git a/datafusion/substrait/tests/cases/logical_plans.rs b/datafusion/substrait/tests/cases/logical_plans.rs
index 6492febc938ec..f6a2b5036c802 100644
--- a/datafusion/substrait/tests/cases/logical_plans.rs
+++ b/datafusion/substrait/tests/cases/logical_plans.rs
@@ -44,7 +44,7 @@ mod tests {
         let plan = from_substrait_plan(&ctx, &proto).await?;
 
         assert_eq!(
-            format!("{:?}", plan),
+            format!("{}", plan),
             "Projection: NOT DATA.a AS EXPR$0\
             \n  TableScan: DATA projection=[a, b, c, d, e, f]"
         );
@@ -70,7 +70,7 @@ mod tests {
         let plan = from_substrait_plan(&ctx, &proto).await?;
 
         assert_eq!(
-            format!("{:?}", plan),
+            format!("{}", plan),
             "Projection: sum(DATA.a) PARTITION BY [DATA.b] ORDER BY [DATA.c ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR\
             \n  WindowAggr: windowExpr=[[sum(DATA.a) PARTITION BY [DATA.b] ORDER BY [DATA.c ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]\
             \n    TableScan: DATA projection=[a, b, c, d, e, f]"
@@ -89,7 +89,7 @@ mod tests {
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
 
-        assert_eq!(format!("{:?}", &plan), "Values: (List([1, 2]))");
+        assert_eq!(format!("{}", &plan), "Values: (List([1, 2]))");
 
         // Need to trigger execution to ensure that Arrow has validated the plan
         DataFrame::new(ctx.state(), plan).show().await?;
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 439e3efa29228..083a589fce267 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -841,8 +841,8 @@ async fn extension_logical_plan() -> Result<()> {
     let proto = to_substrait_plan(&ext_plan, &ctx)?;
     let plan2 = from_substrait_plan(&ctx, &proto).await?;
 
-    let plan1str = format!("{ext_plan:?}");
-    let plan2str = format!("{plan2:?}");
+    let plan1str = format!("{ext_plan}");
+    let plan2str = format!("{plan2}");
     assert_eq!(plan1str, plan2str);
 
     Ok(())
@@ -943,7 +943,7 @@ async fn roundtrip_repartition_roundrobin() -> Result<()> {
     let plan2 = from_substrait_plan(&ctx, &proto).await?;
     let plan2 = ctx.state().optimize(&plan2)?;
 
-    assert_eq!(format!("{plan:?}"), format!("{plan2:?}"));
+    assert_eq!(format!("{plan}"), format!("{plan2}"));
     Ok(())
 }
 
@@ -960,7 +960,7 @@ async fn roundtrip_repartition_hash() -> Result<()> {
     let plan2 = from_substrait_plan(&ctx, &proto).await?;
     let plan2 = ctx.state().optimize(&plan2)?;
 
-    assert_eq!(format!("{plan:?}"), format!("{plan2:?}"));
+    assert_eq!(format!("{plan}"), format!("{plan2}"));
     Ok(())
 }
 
@@ -1061,8 +1061,8 @@ async fn assert_expected_plan(
     let plan2 = from_substrait_plan(&ctx, &proto).await?;
     let plan2 = ctx.state().optimize(&plan2)?;
 
-    println!("{plan:#?}");
-    println!("{plan2:#?}");
+    println!("{plan}");
+    println!("{plan2}");
 
     println!("{proto:?}");
 
@@ -1070,7 +1070,7 @@ async fn assert_expected_plan(
         assert_eq!(plan.schema(), plan2.schema());
     }
 
-    let plan2str = format!("{plan2:?}");
+    let plan2str = format!("{plan2}");
     assert_eq!(expected_plan_str, &plan2str);
 
     Ok(())
@@ -1085,8 +1085,8 @@ async fn roundtrip_fill_na(sql: &str) -> Result<()> {
     let plan2 = ctx.state().optimize(&plan2)?;
 
     // Format plan string and replace all None's with 0
-    let plan1str = format!("{plan:?}").replace("None", "0");
-    let plan2str = format!("{plan2:?}").replace("None", "0");
+    let plan1str = format!("{plan}").replace("None", "0");
+    let plan2str = format!("{plan2}").replace("None", "0");
 
     assert_eq!(plan1str, plan2str);
 
@@ -1108,11 +1108,11 @@ async fn test_alias(sql_with_alias: &str, sql_no_alias: &str) -> Result<()> {
     let proto = to_substrait_plan(&df.into_optimized_plan()?, &ctx)?;
     let plan = from_substrait_plan(&ctx, &proto).await?;
 
-    println!("{plan_with_alias:#?}");
-    println!("{plan:#?}");
+    println!("{plan_with_alias}");
+    println!("{plan}");
 
-    let plan1str = format!("{plan_with_alias:?}");
-    let plan2str = format!("{plan:?}");
+    let plan1str = format!("{plan_with_alias}");
+    let plan2str = format!("{plan}");
     assert_eq!(plan1str, plan2str);
 
     assert_eq!(plan_with_alias.schema(), plan.schema());
@@ -1126,13 +1126,13 @@ async fn roundtrip_with_ctx(sql: &str, ctx: SessionContext) -> Result<Box<Plan>>
     let plan2 = from_substrait_plan(&ctx, &proto).await?;
     let plan2 = ctx.state().optimize(&plan2)?;
 
-    println!("{plan:#?}");
-    println!("{plan2:#?}");
+    println!("{plan}");
+    println!("{plan2}");
 
     println!("{proto:?}");
 
-    let plan1str = format!("{plan:?}");
-    let plan2str = format!("{plan2:?}");
+    let plan1str = format!("{plan}");
+    let plan2str = format!("{plan2}");
     assert_eq!(plan1str, plan2str);
 
     assert_eq!(plan.schema(), plan2.schema());
diff --git a/datafusion/substrait/tests/cases/serialize.rs b/datafusion/substrait/tests/cases/serialize.rs
index f6736ca222790..d792ac33c3339 100644
--- a/datafusion/substrait/tests/cases/serialize.rs
+++ b/datafusion/substrait/tests/cases/serialize.rs
@@ -43,8 +43,8 @@ mod tests {
         let proto = serializer::deserialize(path).await?;
         // Check plan equality
         let plan = from_substrait_plan(&ctx, &proto).await?;
-        let plan_str_ref = format!("{plan_ref:?}");
-        let plan_str = format!("{plan:?}");
+        let plan_str_ref = format!("{plan_ref}");
+        let plan_str = format!("{plan}");
         assert_eq!(plan_str_ref, plan_str);
         // Delete test binary file
         fs::remove_file(path)?;

From c8e5996c4751e8231334c7af8a614af5e4a24bde Mon Sep 17 00:00:00 2001
From: mertak-synnada <mertak67+synaada@gmail.com>
Date: Sun, 4 Aug 2024 17:18:53 +0300
Subject: [PATCH 210/357] Remove redundant Aggregate when `DISTINCT` & `GROUP
 BY` are in the same query (#11781)

* Delete docs.yaml

* initialize eliminate_aggregate.rs rule

* remove redundant prints

* Add multiple group by expression handling.

* rename eliminate_aggregate.rs as eliminate_distinct.rs

implement as rewrite function

* remove logic for distinct on since group by statement must exist in projection

* format code

* add eliminate_distinct rule to tests

* simplify function
add additional tests for not removing cases

* fix child issue

* format

* fix docs

* remove eliminate_distinct rule and make it a part of replace_distinct_aggregate

* Update datafusion/common/src/functional_dependencies.rs

Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>

* add comment and fix variable call

* fix test cases as optimized plan

* format code

* simplify comments

Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>

* do not replace redundant distincts with aggregate

---------

Co-authored-by: metesynnada <100111937+metesynnada@users.noreply.github.com>
Co-authored-by: Mustafa Akur <106137913+mustafasrepo@users.noreply.github.com>
Co-authored-by: Mustafa Akur <mustafa.akur@synnada.ai>
Co-authored-by: Mert Akkaya <makkaya@eseye.com>
Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
---
 .../common/src/functional_dependencies.rs     | 39 ++++----
 .../src/replace_distinct_aggregate.rs         | 90 +++++++++++++++++++
 .../src/single_distinct_to_groupby.rs         |  2 +-
 .../sqllogictest/test_files/aggregate.slt     | 18 +---
 4 files changed, 119 insertions(+), 30 deletions(-)

diff --git a/datafusion/common/src/functional_dependencies.rs b/datafusion/common/src/functional_dependencies.rs
index 452f1862b2746..666ea73027b38 100644
--- a/datafusion/common/src/functional_dependencies.rs
+++ b/datafusion/common/src/functional_dependencies.rs
@@ -524,22 +524,31 @@ pub fn aggregate_functional_dependencies(
         }
     }
 
-    // If we have a single GROUP BY key, we can guarantee uniqueness after
+    // When we have a GROUP BY key, we can guarantee uniqueness after
     // aggregation:
-    if group_by_expr_names.len() == 1 {
-        // If `source_indices` contain 0, delete this functional dependency
-        // as it will be added anyway with mode `Dependency::Single`:
-        aggregate_func_dependencies.retain(|item| !item.source_indices.contains(&0));
-        // Add a new functional dependency associated with the whole table:
-        aggregate_func_dependencies.push(
-            // Use nullable property of the group by expression
-            FunctionalDependence::new(
-                vec![0],
-                target_indices,
-                aggr_fields[0].is_nullable(),
-            )
-            .with_mode(Dependency::Single),
-        );
+    if !group_by_expr_names.is_empty() {
+        let count = group_by_expr_names.len();
+        let source_indices = (0..count).collect::<Vec<_>>();
+        let nullable = source_indices
+            .iter()
+            .any(|idx| aggr_fields[*idx].is_nullable());
+        // If GROUP BY expressions do not already act as a determinant:
+        if !aggregate_func_dependencies.iter().any(|item| {
+            // If `item.source_indices` is a subset of GROUP BY expressions, we shouldn't add
+            // them since `item.source_indices` defines this relation already.
+
+            // The following simple comparison is working well because
+            // GROUP BY expressions come here as a prefix.
+            item.source_indices.iter().all(|idx| idx < &count)
+        }) {
+            // Add a new functional dependency associated with the whole table:
+            // Use nullable property of the GROUP BY expression:
+            aggregate_func_dependencies.push(
+                // Use nullable property of the GROUP BY expression:
+                FunctionalDependence::new(source_indices, target_indices, nullable)
+                    .with_mode(Dependency::Single),
+            );
+        }
     }
     FunctionalDependencies::new(aggregate_func_dependencies)
 }
diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs
index 430517121f2ac..f73eeacfbf0e1 100644
--- a/datafusion/optimizer/src/replace_distinct_aggregate.rs
+++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -77,6 +77,21 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
         match plan {
             LogicalPlan::Distinct(Distinct::All(input)) => {
                 let group_expr = expand_wildcard(input.schema(), &input, None)?;
+
+                let field_count = input.schema().fields().len();
+                for dep in input.schema().functional_dependencies().iter() {
+                    // If distinct is exactly the same with a previous GROUP BY, we can
+                    // simply remove it:
+                    if dep.source_indices[..field_count]
+                        .iter()
+                        .enumerate()
+                        .all(|(idx, f_idx)| idx == *f_idx)
+                    {
+                        return Ok(Transformed::yes(input.as_ref().clone()));
+                    }
+                }
+
+                // Replace with aggregation:
                 let aggr_plan = LogicalPlan::Aggregate(Aggregate::try_new(
                     input,
                     group_expr,
@@ -165,3 +180,78 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
         Some(BottomUp)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use crate::replace_distinct_aggregate::ReplaceDistinctWithAggregate;
+    use crate::test::*;
+
+    use datafusion_common::Result;
+    use datafusion_expr::{
+        col, logical_plan::builder::LogicalPlanBuilder, Expr, LogicalPlan,
+    };
+    use datafusion_functions_aggregate::sum::sum;
+
+    fn assert_optimized_plan_equal(plan: &LogicalPlan, expected: &str) -> Result<()> {
+        assert_optimized_plan_eq(
+            Arc::new(ReplaceDistinctWithAggregate::new()),
+            plan.clone(),
+            expected,
+        )
+    }
+
+    #[test]
+    fn eliminate_redundant_distinct_simple() -> Result<()> {
+        let table_scan = test_table_scan().unwrap();
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .aggregate(vec![col("c")], Vec::<Expr>::new())?
+            .project(vec![col("c")])?
+            .distinct()?
+            .build()?;
+
+        let expected = "Projection: test.c\n  Aggregate: groupBy=[[test.c]], aggr=[[]]\n    TableScan: test";
+        assert_optimized_plan_equal(&plan, expected)
+    }
+
+    #[test]
+    fn eliminate_redundant_distinct_pair() -> Result<()> {
+        let table_scan = test_table_scan().unwrap();
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .aggregate(vec![col("a"), col("b")], Vec::<Expr>::new())?
+            .project(vec![col("a"), col("b")])?
+            .distinct()?
+            .build()?;
+
+        let expected =
+            "Projection: test.a, test.b\n  Aggregate: groupBy=[[test.a, test.b]], aggr=[[]]\n    TableScan: test";
+        assert_optimized_plan_equal(&plan, expected)
+    }
+
+    #[test]
+    fn do_not_eliminate_distinct() -> Result<()> {
+        let table_scan = test_table_scan().unwrap();
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .project(vec![col("a"), col("b")])?
+            .distinct()?
+            .build()?;
+
+        let expected = "Aggregate: groupBy=[[test.a, test.b]], aggr=[[]]\n  Projection: test.a, test.b\n    TableScan: test";
+        assert_optimized_plan_equal(&plan, expected)
+    }
+
+    #[test]
+    fn do_not_eliminate_distinct_with_aggr() -> Result<()> {
+        let table_scan = test_table_scan().unwrap();
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .aggregate(vec![col("a"), col("b"), col("c")], vec![sum(col("c"))])?
+            .project(vec![col("a"), col("b")])?
+            .distinct()?
+            .build()?;
+
+        let expected =
+            "Aggregate: groupBy=[[test.a, test.b]], aggr=[[]]\n  Projection: test.a, test.b\n    Aggregate: groupBy=[[test.a, test.b, test.c]], aggr=[[sum(test.c)]]\n      TableScan: test";
+        assert_optimized_plan_equal(&plan, expected)
+    }
+}
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index 69c1b505727d8..fba42d7f880bd 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -39,7 +39,7 @@ use hashbrown::HashSet;
 /// single distinct to group by optimizer rule
 ///  ```text
 ///    Before:
-///    SELECT a, count(DINSTINCT b), sum(c)
+///    SELECT a, count(DISTINCT b), sum(c)
 ///    FROM t
 ///    GROUP BY a
 ///
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index ee72289d66eb1..abeeb767b9488 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -4536,19 +4536,14 @@ EXPLAIN SELECT DISTINCT c3 FROM aggregate_test_100 group by c3 limit 5;
 logical_plan
 01)Limit: skip=0, fetch=5
 02)--Aggregate: groupBy=[[aggregate_test_100.c3]], aggr=[[]]
-03)----Aggregate: groupBy=[[aggregate_test_100.c3]], aggr=[[]]
-04)------TableScan: aggregate_test_100 projection=[c3]
+03)----TableScan: aggregate_test_100 projection=[c3]
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--AggregateExec: mode=Final, gby=[c3@0 as c3], aggr=[], lim=[5]
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[c3@0 as c3], aggr=[], lim=[5]
 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------AggregateExec: mode=Final, gby=[c3@0 as c3], aggr=[], lim=[5]
-07)------------CoalescePartitionsExec
-08)--------------AggregateExec: mode=Partial, gby=[c3@0 as c3], aggr=[], lim=[5]
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3], has_header=true
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3], has_header=true
 
 query I
 SELECT DISTINCT c3 FROM aggregate_test_100 group by c3 limit 5;
@@ -4699,19 +4694,14 @@ EXPLAIN SELECT DISTINCT c3 FROM aggregate_test_100 group by c3 limit 5;
 logical_plan
 01)Limit: skip=0, fetch=5
 02)--Aggregate: groupBy=[[aggregate_test_100.c3]], aggr=[[]]
-03)----Aggregate: groupBy=[[aggregate_test_100.c3]], aggr=[[]]
-04)------TableScan: aggregate_test_100 projection=[c3]
+03)----TableScan: aggregate_test_100 projection=[c3]
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--AggregateExec: mode=Final, gby=[c3@0 as c3], aggr=[]
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[c3@0 as c3], aggr=[]
 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------AggregateExec: mode=Final, gby=[c3@0 as c3], aggr=[]
-07)------------CoalescePartitionsExec
-08)--------------AggregateExec: mode=Partial, gby=[c3@0 as c3], aggr=[]
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3], has_header=true
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3], has_header=true
 
 statement ok
 set datafusion.optimizer.enable_distinct_aggregation_soft_limit = true;

From b4069a65a9bb207370d382bdde93f1c98d69b9eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Mon, 5 Aug 2024 08:15:57 +0800
Subject: [PATCH 211/357] Remove `AggregateFunctionDefinition` (#11803)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Remove [200~if udf.name() == count => {

* Apply review suggestions
---
 datafusion/core/src/physical_planner.rs       | 69 +++++++++----------
 datafusion/expr/src/expr.rs                   | 34 +++------
 datafusion/expr/src/expr_schema.rs            | 47 ++++++-------
 datafusion/expr/src/tree_node.rs              | 31 ++++-----
 datafusion/functions-nested/src/planner.rs    |  4 +-
 .../src/analyzer/count_wildcard_rule.rs       |  8 +--
 .../optimizer/src/analyzer/type_coercion.rs   | 42 ++++++-----
 datafusion/optimizer/src/decorrelate.rs       | 20 ++----
 .../simplify_expressions/expr_simplifier.rs   |  8 +--
 .../src/single_distinct_to_groupby.rs         | 17 +++--
 datafusion/proto/src/logical_plan/to_proto.rs | 50 +++++++-------
 datafusion/sql/src/unparser/expr.rs           |  2 +-
 .../substrait/src/logical_plan/producer.rs    | 11 +--
 13 files changed, 144 insertions(+), 199 deletions(-)

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 03e20b886e2c4..378a892111c59 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -74,8 +74,8 @@ use datafusion_common::{
 };
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
-    self, AggregateFunction, AggregateFunctionDefinition, Alias, Between, BinaryExpr,
-    Cast, GroupingSet, InList, Like, TryCast, WindowFunction,
+    self, AggregateFunction, Alias, Between, BinaryExpr, Cast, GroupingSet, InList, Like,
+    TryCast, WindowFunction,
 };
 use datafusion_expr::expr_rewriter::unnormalize_cols;
 use datafusion_expr::expr_vec_fmt;
@@ -223,18 +223,15 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
             create_function_physical_name(&fun.to_string(), false, args, Some(order_by))
         }
         Expr::AggregateFunction(AggregateFunction {
-            func_def,
+            func,
             distinct,
             args,
             filter: _,
             order_by,
             null_treatment: _,
-        }) => create_function_physical_name(
-            func_def.name(),
-            *distinct,
-            args,
-            order_by.as_ref(),
-        ),
+        }) => {
+            create_function_physical_name(func.name(), *distinct, args, order_by.as_ref())
+        }
         Expr::GroupingSet(grouping_set) => match grouping_set {
             GroupingSet::Rollup(exprs) => Ok(format!(
                 "ROLLUP ({})",
@@ -1817,7 +1814,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
 ) -> Result<AggregateExprWithOptionalArgs> {
     match e {
         Expr::AggregateFunction(AggregateFunction {
-            func_def,
+            func,
             distinct,
             args,
             filter,
@@ -1839,36 +1836,34 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                 .unwrap_or(sqlparser::ast::NullTreatment::RespectNulls)
                 == NullTreatment::IgnoreNulls;
 
-            let (agg_expr, filter, order_by) = match func_def {
-                AggregateFunctionDefinition::UDF(fun) => {
-                    let sort_exprs = order_by.clone().unwrap_or(vec![]);
-                    let physical_sort_exprs = match order_by {
-                        Some(exprs) => Some(create_physical_sort_exprs(
-                            exprs,
-                            logical_input_schema,
-                            execution_props,
-                        )?),
-                        None => None,
-                    };
+            let (agg_expr, filter, order_by) = {
+                let sort_exprs = order_by.clone().unwrap_or(vec![]);
+                let physical_sort_exprs = match order_by {
+                    Some(exprs) => Some(create_physical_sort_exprs(
+                        exprs,
+                        logical_input_schema,
+                        execution_props,
+                    )?),
+                    None => None,
+                };
 
-                    let ordering_reqs: Vec<PhysicalSortExpr> =
-                        physical_sort_exprs.clone().unwrap_or(vec![]);
+                let ordering_reqs: Vec<PhysicalSortExpr> =
+                    physical_sort_exprs.clone().unwrap_or(vec![]);
 
-                    let agg_expr = udaf::create_aggregate_expr_with_dfschema(
-                        fun,
-                        &physical_args,
-                        args,
-                        &sort_exprs,
-                        &ordering_reqs,
-                        logical_input_schema,
-                        name,
-                        ignore_nulls,
-                        *distinct,
-                        false,
-                    )?;
+                let agg_expr = udaf::create_aggregate_expr_with_dfschema(
+                    func,
+                    &physical_args,
+                    args,
+                    &sort_exprs,
+                    &ordering_reqs,
+                    logical_input_schema,
+                    name,
+                    ignore_nulls,
+                    *distinct,
+                    false,
+                )?;
 
-                    (agg_expr, filter, physical_sort_exprs)
-                }
+                (agg_expr, filter, physical_sort_exprs)
             };
 
             Ok((agg_expr, filter, order_by))
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 708843494814b..1a51c181f49f3 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -627,22 +627,6 @@ impl Sort {
     }
 }
 
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-/// Defines which implementation of an aggregate function DataFusion should call.
-pub enum AggregateFunctionDefinition {
-    /// Resolved to a user defined aggregate function
-    UDF(Arc<crate::AggregateUDF>),
-}
-
-impl AggregateFunctionDefinition {
-    /// Function's name for display
-    pub fn name(&self) -> &str {
-        match self {
-            AggregateFunctionDefinition::UDF(udf) => udf.name(),
-        }
-    }
-}
-
 /// Aggregate function
 ///
 /// See also  [`ExprFunctionExt`] to set these fields on `Expr`
@@ -651,7 +635,7 @@ impl AggregateFunctionDefinition {
 #[derive(Clone, PartialEq, Eq, Hash, Debug)]
 pub struct AggregateFunction {
     /// Name of the function
-    pub func_def: AggregateFunctionDefinition,
+    pub func: Arc<crate::AggregateUDF>,
     /// List of expressions to feed to the functions as arguments
     pub args: Vec<Expr>,
     /// Whether this is a DISTINCT aggregation or not
@@ -666,7 +650,7 @@ pub struct AggregateFunction {
 impl AggregateFunction {
     /// Create a new AggregateFunction expression with a user-defined function (UDF)
     pub fn new_udf(
-        udf: Arc<crate::AggregateUDF>,
+        func: Arc<crate::AggregateUDF>,
         args: Vec<Expr>,
         distinct: bool,
         filter: Option<Box<Expr>>,
@@ -674,7 +658,7 @@ impl AggregateFunction {
         null_treatment: Option<NullTreatment>,
     ) -> Self {
         Self {
-            func_def: AggregateFunctionDefinition::UDF(udf),
+            func,
             args,
             distinct,
             filter,
@@ -1666,14 +1650,14 @@ impl Expr {
                 func.hash(hasher);
             }
             Expr::AggregateFunction(AggregateFunction {
-                func_def,
+                func,
                 args: _args,
                 distinct,
                 filter: _filter,
                 order_by: _order_by,
                 null_treatment,
             }) => {
-                func_def.hash(hasher);
+                func.hash(hasher);
                 distinct.hash(hasher);
                 null_treatment.hash(hasher);
             }
@@ -1870,7 +1854,7 @@ impl fmt::Display for Expr {
                 Ok(())
             }
             Expr::AggregateFunction(AggregateFunction {
-                func_def,
+                func,
                 distinct,
                 ref args,
                 filter,
@@ -1878,7 +1862,7 @@ impl fmt::Display for Expr {
                 null_treatment,
                 ..
             }) => {
-                fmt_function(f, func_def.name(), *distinct, args, true)?;
+                fmt_function(f, func.name(), *distinct, args, true)?;
                 if let Some(nt) = null_treatment {
                     write!(f, " {}", nt)?;
                 }
@@ -2190,14 +2174,14 @@ fn write_name<W: Write>(w: &mut W, e: &Expr) -> Result<()> {
             write!(w, "{window_frame}")?;
         }
         Expr::AggregateFunction(AggregateFunction {
-            func_def,
+            func,
             distinct,
             args,
             filter,
             order_by,
             null_treatment,
         }) => {
-            write_function_name(w, func_def.name(), *distinct, args)?;
+            write_function_name(w, func.name(), *distinct, args)?;
             if let Some(fe) = filter {
                 write!(w, " FILTER (WHERE {fe})")?;
             };
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 6344b892adb7e..676903d59a075 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -17,8 +17,8 @@
 
 use super::{Between, Expr, Like};
 use crate::expr::{
-    AggregateFunction, AggregateFunctionDefinition, Alias, BinaryExpr, Cast, InList,
-    InSubquery, Placeholder, ScalarFunction, Sort, TryCast, Unnest, WindowFunction,
+    AggregateFunction, Alias, BinaryExpr, Cast, InList, InSubquery, Placeholder,
+    ScalarFunction, Sort, TryCast, Unnest, WindowFunction,
 };
 use crate::type_coercion::binary::get_result_type;
 use crate::type_coercion::functions::{
@@ -193,28 +193,24 @@ impl ExprSchemable for Expr {
                     _ => fun.return_type(&data_types, &nullability),
                 }
             }
-            Expr::AggregateFunction(AggregateFunction { func_def, args, .. }) => {
+            Expr::AggregateFunction(AggregateFunction { func, args, .. }) => {
                 let data_types = args
                     .iter()
                     .map(|e| e.get_type(schema))
                     .collect::<Result<Vec<_>>>()?;
-                match func_def {
-                    AggregateFunctionDefinition::UDF(fun) => {
-                        let new_types = data_types_with_aggregate_udf(&data_types, fun)
-                            .map_err(|err| {
-                            plan_datafusion_err!(
-                                "{} {}",
-                                err,
-                                utils::generate_signature_error_msg(
-                                    fun.name(),
-                                    fun.signature().clone(),
-                                    &data_types
-                                )
+                let new_types = data_types_with_aggregate_udf(&data_types, func)
+                    .map_err(|err| {
+                        plan_datafusion_err!(
+                            "{} {}",
+                            err,
+                            utils::generate_signature_error_msg(
+                                func.name(),
+                                func.signature().clone(),
+                                &data_types
                             )
-                        })?;
-                        Ok(fun.return_type(&new_types)?)
-                    }
-                }
+                        )
+                    })?;
+                Ok(func.return_type(&new_types)?)
             }
             Expr::Not(_)
             | Expr::IsNull(_)
@@ -329,13 +325,12 @@ impl ExprSchemable for Expr {
                 }
             }
             Expr::Cast(Cast { expr, .. }) => expr.nullable(input_schema),
-            Expr::AggregateFunction(AggregateFunction { func_def, .. }) => {
-                match func_def {
-                    // TODO: UDF should be able to customize nullability
-                    AggregateFunctionDefinition::UDF(udf) if udf.name() == "count" => {
-                        Ok(false)
-                    }
-                    AggregateFunctionDefinition::UDF(_) => Ok(true),
+            Expr::AggregateFunction(AggregateFunction { func, .. }) => {
+                // TODO: UDF should be able to customize nullability
+                if func.name() == "count" {
+                    Ok(false)
+                } else {
+                    Ok(true)
                 }
             }
             Expr::ScalarVariable(_, _)
diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs
index a8062c0c07ee9..450ebb6c22752 100644
--- a/datafusion/expr/src/tree_node.rs
+++ b/datafusion/expr/src/tree_node.rs
@@ -18,9 +18,8 @@
 //! Tree node implementation for logical expr
 
 use crate::expr::{
-    AggregateFunction, AggregateFunctionDefinition, Alias, Between, BinaryExpr, Case,
-    Cast, GroupingSet, InList, InSubquery, Like, Placeholder, ScalarFunction, Sort,
-    TryCast, Unnest, WindowFunction,
+    AggregateFunction, Alias, Between, BinaryExpr, Case, Cast, GroupingSet, InList,
+    InSubquery, Like, Placeholder, ScalarFunction, Sort, TryCast, Unnest, WindowFunction,
 };
 use crate::{Expr, ExprFunctionExt};
 
@@ -304,7 +303,7 @@ impl TreeNode for Expr {
             }),
             Expr::AggregateFunction(AggregateFunction {
                 args,
-                func_def,
+                func,
                 distinct,
                 filter,
                 order_by,
@@ -316,20 +315,16 @@ impl TreeNode for Expr {
                 order_by,
                 transform_option_vec(order_by, &mut f)
             )?
-            .map_data(
-                |(new_args, new_filter, new_order_by)| match func_def {
-                    AggregateFunctionDefinition::UDF(fun) => {
-                        Ok(Expr::AggregateFunction(AggregateFunction::new_udf(
-                            fun,
-                            new_args,
-                            distinct,
-                            new_filter,
-                            new_order_by,
-                            null_treatment,
-                        )))
-                    }
-                },
-            )?,
+            .map_data(|(new_args, new_filter, new_order_by)| {
+                Ok(Expr::AggregateFunction(AggregateFunction::new_udf(
+                    func,
+                    new_args,
+                    distinct,
+                    new_filter,
+                    new_order_by,
+                    null_treatment,
+                )))
+            })?,
             Expr::GroupingSet(grouping_set) => match grouping_set {
                 GroupingSet::Rollup(exprs) => transform_vec(exprs, &mut f)?
                     .update_data(|ve| Expr::GroupingSet(GroupingSet::Rollup(ve))),
diff --git a/datafusion/functions-nested/src/planner.rs b/datafusion/functions-nested/src/planner.rs
index fee3e83a0d656..f980362105a1e 100644
--- a/datafusion/functions-nested/src/planner.rs
+++ b/datafusion/functions-nested/src/planner.rs
@@ -20,7 +20,6 @@
 use datafusion_common::{exec_err, utils::list_ndims, DFSchema, Result};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::{
-    expr::AggregateFunctionDefinition,
     planner::{ExprPlanner, PlannerResult, RawBinaryExpr, RawFieldAccessExpr},
     sqlparser, Expr, ExprSchemable, GetFieldAccess,
 };
@@ -171,6 +170,5 @@ impl ExprPlanner for FieldAccessPlanner {
 }
 
 fn is_array_agg(agg_func: &datafusion_expr::expr::AggregateFunction) -> bool {
-    let AggregateFunctionDefinition::UDF(udf) = &agg_func.func_def;
-    return udf.name() == "array_agg";
+    return agg_func.func.name() == "array_agg";
 }
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index 6f832966671c7..e2da6c66abc4c 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -21,9 +21,7 @@ use crate::utils::NamePreserver;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::Result;
-use datafusion_expr::expr::{
-    AggregateFunction, AggregateFunctionDefinition, WindowFunction,
-};
+use datafusion_expr::expr::{AggregateFunction, WindowFunction};
 use datafusion_expr::utils::COUNT_STAR_EXPANSION;
 use datafusion_expr::{lit, Expr, LogicalPlan, WindowFunctionDefinition};
 
@@ -56,10 +54,10 @@ fn is_wildcard(expr: &Expr) -> bool {
 fn is_count_star_aggregate(aggregate_function: &AggregateFunction) -> bool {
     matches!(aggregate_function,
         AggregateFunction {
-            func_def: AggregateFunctionDefinition::UDF(udf),
+            func,
             args,
             ..
-        } if udf.name() == "count" && (args.len() == 1 && is_wildcard(&args[0]) || args.is_empty()))
+        } if func.name() == "count" && (args.len() == 1 && is_wildcard(&args[0]) || args.is_empty()))
 }
 
 fn is_count_star_window_aggregate(window_function: &WindowFunction) -> bool {
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index bcd1cbcce23e3..2823b0fca2d12 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -28,8 +28,8 @@ use datafusion_common::{
     DataFusionError, Result, ScalarValue,
 };
 use datafusion_expr::expr::{
-    self, AggregateFunctionDefinition, Between, BinaryExpr, Case, Exists, InList,
-    InSubquery, Like, ScalarFunction, WindowFunction,
+    self, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, ScalarFunction,
+    WindowFunction,
 };
 use datafusion_expr::expr_schema::cast_subquery;
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
@@ -393,31 +393,29 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> {
                 )))
             }
             Expr::AggregateFunction(expr::AggregateFunction {
-                func_def,
+                func,
                 args,
                 distinct,
                 filter,
                 order_by,
                 null_treatment,
-            }) => match func_def {
-                AggregateFunctionDefinition::UDF(fun) => {
-                    let new_expr = coerce_arguments_for_signature_with_aggregate_udf(
-                        args,
-                        self.schema,
-                        &fun,
-                    )?;
-                    Ok(Transformed::yes(Expr::AggregateFunction(
-                        expr::AggregateFunction::new_udf(
-                            fun,
-                            new_expr,
-                            distinct,
-                            filter,
-                            order_by,
-                            null_treatment,
-                        ),
-                    )))
-                }
-            },
+            }) => {
+                let new_expr = coerce_arguments_for_signature_with_aggregate_udf(
+                    args,
+                    self.schema,
+                    &func,
+                )?;
+                Ok(Transformed::yes(Expr::AggregateFunction(
+                    expr::AggregateFunction::new_udf(
+                        func,
+                        new_expr,
+                        distinct,
+                        filter,
+                        order_by,
+                        null_treatment,
+                    ),
+                )))
+            }
             Expr::WindowFunction(WindowFunction {
                 fun,
                 args,
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index 6dbf1641bd7cb..fdd9ef8a8b0b8 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -28,7 +28,7 @@ use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
 };
 use datafusion_common::{plan_err, Column, DFSchemaRef, Result, ScalarValue};
-use datafusion_expr::expr::{AggregateFunctionDefinition, Alias};
+use datafusion_expr::expr::Alias;
 use datafusion_expr::simplify::SimplifyContext;
 use datafusion_expr::utils::{conjunction, find_join_exprs, split_conjunction};
 use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
@@ -433,19 +433,13 @@ fn agg_exprs_evaluation_result_on_empty_batch(
             .clone()
             .transform_up(|expr| {
                 let new_expr = match expr {
-                    Expr::AggregateFunction(expr::AggregateFunction {
-                        func_def, ..
-                    }) => match func_def {
-                        AggregateFunctionDefinition::UDF(fun) => {
-                            if fun.name() == "count" {
-                                Transformed::yes(Expr::Literal(ScalarValue::Int64(Some(
-                                    0,
-                                ))))
-                            } else {
-                                Transformed::yes(Expr::Literal(ScalarValue::Null))
-                            }
+                    Expr::AggregateFunction(expr::AggregateFunction { func, .. }) => {
+                        if func.name() == "count" {
+                            Transformed::yes(Expr::Literal(ScalarValue::Int64(Some(0))))
+                        } else {
+                            Transformed::yes(Expr::Literal(ScalarValue::Null))
                         }
-                    },
+                    }
                     _ => Transformed::no(expr),
                 };
                 Ok(new_expr)
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 1e1418744fb8a..979a1499d0dee 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -32,9 +32,7 @@ use datafusion_common::{
     tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
 };
 use datafusion_common::{internal_err, DFSchema, DataFusionError, Result, ScalarValue};
-use datafusion_expr::expr::{
-    AggregateFunctionDefinition, InList, InSubquery, WindowFunction,
-};
+use datafusion_expr::expr::{InList, InSubquery, WindowFunction};
 use datafusion_expr::simplify::ExprSimplifyResult;
 use datafusion_expr::{
     and, lit, or, BinaryExpr, Case, ColumnarValue, Expr, Like, Operator, Volatility,
@@ -1408,9 +1406,9 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
             }
 
             Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
-                func_def: AggregateFunctionDefinition::UDF(ref udaf),
+                ref func,
                 ..
-            }) => match (udaf.simplify(), expr) {
+            }) => match (func.simplify(), expr) {
                 (Some(simplify_function), Expr::AggregateFunction(af)) => {
                     Transformed::yes(simplify_function(af, info)?)
                 }
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index fba42d7f880bd..9a0fab14d3e0e 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -26,7 +26,6 @@ use datafusion_common::{
     internal_err, qualified_name, tree_node::Transformed, DataFusionError, Result,
 };
 use datafusion_expr::builder::project;
-use datafusion_expr::expr::AggregateFunctionDefinition;
 use datafusion_expr::{
     col,
     expr::AggregateFunction,
@@ -70,7 +69,7 @@ fn is_single_distinct_agg(aggr_expr: &[Expr]) -> Result<bool> {
     let mut aggregate_count = 0;
     for expr in aggr_expr {
         if let Expr::AggregateFunction(AggregateFunction {
-            func_def: AggregateFunctionDefinition::UDF(fun),
+            func,
             distinct,
             args,
             filter,
@@ -86,9 +85,9 @@ fn is_single_distinct_agg(aggr_expr: &[Expr]) -> Result<bool> {
                 for e in args {
                     fields_set.insert(e);
                 }
-            } else if fun.name() != "sum"
-                && fun.name().to_lowercase() != "min"
-                && fun.name().to_lowercase() != "max"
+            } else if func.name() != "sum"
+                && func.name().to_lowercase() != "min"
+                && func.name().to_lowercase() != "max"
             {
                 return Ok(false);
             }
@@ -184,7 +183,7 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                     .into_iter()
                     .map(|aggr_expr| match aggr_expr {
                         Expr::AggregateFunction(AggregateFunction {
-                            func_def: AggregateFunctionDefinition::UDF(udf),
+                            func,
                             mut args,
                             distinct,
                             ..
@@ -200,7 +199,7 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                                         .push(arg.alias(SINGLE_DISTINCT_ALIAS));
                                 }
                                 Ok(Expr::AggregateFunction(AggregateFunction::new_udf(
-                                    udf,
+                                    func,
                                     vec![col(SINGLE_DISTINCT_ALIAS)],
                                     false, // intentional to remove distinct here
                                     None,
@@ -213,7 +212,7 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                                 let alias_str = format!("alias{}", index);
                                 inner_aggr_exprs.push(
                                     Expr::AggregateFunction(AggregateFunction::new_udf(
-                                        Arc::clone(&udf),
+                                        Arc::clone(&func),
                                         args,
                                         false,
                                         None,
@@ -223,7 +222,7 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                                     .alias(&alias_str),
                                 );
                                 Ok(Expr::AggregateFunction(AggregateFunction::new_udf(
-                                    udf,
+                                    func,
                                     vec![col(&alias_str)],
                                     false,
                                     None,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 74d9d61b3a7f3..ab81ce8af9cb9 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -21,8 +21,8 @@
 
 use datafusion_common::{TableReference, UnnestOptions};
 use datafusion_expr::expr::{
-    self, AggregateFunctionDefinition, Alias, Between, BinaryExpr, Cast, GroupingSet,
-    InList, Like, Placeholder, ScalarFunction, Sort, Unnest,
+    self, Alias, Between, BinaryExpr, Cast, GroupingSet, InList, Like, Placeholder,
+    ScalarFunction, Sort, Unnest,
 };
 use datafusion_expr::{
     logical_plan::PlanType, logical_plan::StringifiedPlan, BuiltInWindowFunction, Expr,
@@ -361,38 +361,34 @@ pub fn serialize_expr(
             }
         }
         Expr::AggregateFunction(expr::AggregateFunction {
-            ref func_def,
+            ref func,
             ref args,
             ref distinct,
             ref filter,
             ref order_by,
             null_treatment: _,
-        }) => match func_def {
-            AggregateFunctionDefinition::UDF(fun) => {
-                let mut buf = Vec::new();
-                let _ = codec.try_encode_udaf(fun, &mut buf);
-                protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::AggregateUdfExpr(Box::new(
-                        protobuf::AggregateUdfExprNode {
-                            fun_name: fun.name().to_string(),
-                            args: serialize_exprs(args, codec)?,
-                            distinct: *distinct,
-                            filter: match filter {
-                                Some(e) => {
-                                    Some(Box::new(serialize_expr(e.as_ref(), codec)?))
-                                }
-                                None => None,
-                            },
-                            order_by: match order_by {
-                                Some(e) => serialize_exprs(e, codec)?,
-                                None => vec![],
-                            },
-                            fun_definition: (!buf.is_empty()).then_some(buf),
+        }) => {
+            let mut buf = Vec::new();
+            let _ = codec.try_encode_udaf(func, &mut buf);
+            protobuf::LogicalExprNode {
+                expr_type: Some(ExprType::AggregateUdfExpr(Box::new(
+                    protobuf::AggregateUdfExprNode {
+                        fun_name: func.name().to_string(),
+                        args: serialize_exprs(args, codec)?,
+                        distinct: *distinct,
+                        filter: match filter {
+                            Some(e) => Some(Box::new(serialize_expr(e.as_ref(), codec)?)),
+                            None => None,
                         },
-                    ))),
-                }
+                        order_by: match order_by {
+                            Some(e) => serialize_exprs(e, codec)?,
+                            None => vec![],
+                        },
+                        fun_definition: (!buf.is_empty()).then_some(buf),
+                    },
+                ))),
             }
-        },
+        }
 
         Expr::ScalarVariable(_, _) => {
             return Err(Error::General(
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 9b44848a91a88..de130754ab1aa 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -337,7 +337,7 @@ impl Unparser<'_> {
                 escape_char: escape_char.map(|c| c.to_string()),
             }),
             Expr::AggregateFunction(agg) => {
-                let func_name = agg.func_def.name();
+                let func_name = agg.func.name();
 
                 let args = self.function_args_to_sql(&agg.args)?;
                 let filter = match &agg.filter {
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index a782af8eb2479..ee04749f5e6b4 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -49,8 +49,7 @@ use datafusion::common::{
 use datafusion::common::{substrait_err, DFSchemaRef};
 #[allow(unused_imports)]
 use datafusion::logical_expr::expr::{
-    AggregateFunctionDefinition, Alias, BinaryExpr, Case, Cast, GroupingSet, InList,
-    InSubquery, Sort, WindowFunction,
+    Alias, BinaryExpr, Case, Cast, GroupingSet, InList, InSubquery, Sort, WindowFunction,
 };
 use datafusion::logical_expr::{expr, Between, JoinConstraint, LogicalPlan, Operator};
 use datafusion::prelude::Expr;
@@ -764,9 +763,7 @@ pub fn to_substrait_agg_measure(
     extensions: &mut Extensions,
 ) -> Result<Measure> {
     match expr {
-        Expr::AggregateFunction(expr::AggregateFunction { func_def, args, distinct, filter, order_by, null_treatment: _, }) => {
-            match func_def {
-                AggregateFunctionDefinition::UDF(fun) => {
+        Expr::AggregateFunction(expr::AggregateFunction { func, args, distinct, filter, order_by, null_treatment: _, }) => {
                     let sorts = if let Some(order_by) = order_by {
                         order_by.iter().map(|expr| to_substrait_sort_field(ctx, expr, schema, extensions)).collect::<Result<Vec<_>>>()?
                     } else {
@@ -776,7 +773,7 @@ pub fn to_substrait_agg_measure(
                     for arg in args {
                         arguments.push(FunctionArgument { arg_type: Some(ArgType::Value(to_substrait_rex(ctx, arg, schema, 0, extensions)?)) });
                     }
-                    let function_anchor = extensions.register_function(fun.name().to_string());
+                    let function_anchor = extensions.register_function(func.name().to_string());
                     Ok(Measure {
                         measure: Some(AggregateFunction {
                             function_reference: function_anchor,
@@ -796,8 +793,6 @@ pub fn to_substrait_agg_measure(
                             None => None
                         }
                     })
-                }
-            }
 
         }
         Expr::Alias(Alias{expr,..})=> {

From 1d3bdbe14fd01bbd03abeb57d3ec9bc155a36d71 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 4 Aug 2024 20:16:36 -0400
Subject: [PATCH 212/357] Minor: add ticket reference and fmt (#11805)

---
 datafusion/proto/src/physical_plan/mod.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index fbb9e442980b1..aefa1d87a2784 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -489,7 +489,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
 
                                             // TODO: approx_percentile_cont and approx_percentile_cont_weight are not supported for UDAF from protobuf yet.
                                             // TODO: `order by` is not supported for UDAF yet
-                                            AggregateExprBuilder::new(agg_udf, input_phy_expr).schema(Arc::clone(&physical_schema)).name(name).with_ignore_nulls(agg_node.ignore_nulls).with_distinct(agg_node.distinct).build()
+                                            // https://github.com/apache/datafusion/issues/11804
+                                            AggregateExprBuilder::new(agg_udf, input_phy_expr)
+                                                .schema(Arc::clone(&physical_schema))
+                                                .name(name)
+                                                .with_ignore_nulls(agg_node.ignore_nulls)
+                                                .with_distinct(agg_node.distinct)
+                                                .build()
                                         }
                                     }
                                 }).transpose()?.ok_or_else(|| {

From 336c15e759c334debb7834088af698ec6b03763a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 5 Aug 2024 06:51:10 -0400
Subject: [PATCH 213/357] Improve MSRV CI check to print out problems to log
 (#11789)

* Improve MSRV CI check to print out problems to log

* Add reproduction instructions

* Use `verify` and add `output-format` option

* Update .github/workflows/rust.yml

Co-authored-by: Eduard Karacharov <eduard.karacharov@gmail.com>

---------

Co-authored-by: Eduard Karacharov <eduard.karacharov@gmail.com>
---
 .github/workflows/rust.yml | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index fd8c2d2090b92..00d31765e77de 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -566,18 +566,32 @@ jobs:
       - name: Check datafusion
         working-directory: datafusion/core
         run: |
-          # If you encounter an error with any of the commands below
-          # it means some crate in your dependency tree has a higher 
-          # MSRV (Min Supported Rust Version) than the one specified 
-          # in the `rust-version` key of `Cargo.toml`. Check your 
-          # dependencies or update the version in `Cargo.toml`
-          cargo msrv verify
+          # If you encounter an error with any of the commands below it means
+          # your code or some crate in the dependency tree has a higher MSRV
+          # (Min Supported Rust Version) than the one specified in the
+          # `rust-version` key of `Cargo.toml`.
+          #
+          # To reproduce: 
+          # 1. Install the version of Rust that is failing. Example: 
+          #    rustup install 1.76.0
+          # 2. Run the command that failed with that version. Example:
+          #    cargo +1.76.0 check -p datafusion
+          # 
+          # To resolve, either:
+          # 1. Change your code to use older Rust features, 
+          # 2. Revert dependency update
+          # 3. Update the MSRV version in `Cargo.toml`
+          #
+          # Please see the DataFusion Rust Version Compatibility Policy before
+          # updating Cargo.toml. You may have to update the code instead. 
+          # https://github.com/apache/datafusion/blob/main/README.md#rust-version-compatibility-policy
+          cargo msrv --output-format json --log-target stdout verify
       - name: Check datafusion-substrait
         working-directory: datafusion/substrait
-        run: cargo msrv verify
+        run: cargo msrv --output-format json --log-target stdout verify
       - name: Check datafusion-proto
         working-directory: datafusion/proto
-        run: cargo msrv verify
+        run: cargo msrv --output-format json --log-target stdout verify
       - name: Check datafusion-cli
         working-directory: datafusion-cli
-        run: cargo msrv verify
\ No newline at end of file
+        run: cargo msrv --output-format json --log-target stdout verify
\ No newline at end of file

From c340b6ab7a2875c6924f825782f0c5ea408aff3d Mon Sep 17 00:00:00 2001
From: Eduard Karacharov <eduard.karacharov@gmail.com>
Date: Mon, 5 Aug 2024 13:55:18 +0300
Subject: [PATCH 214/357] Skipping partial aggregation when it is not helping
 for high cardinality aggregates (#11627)

* rfc: optional skipping partial aggregation

* benchmarks for convert_to_state

* speeding up conversion to state

* Fix MSRV error on 1.76.0

* Improve aggregatation documentation

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/common/src/config.rs               |   9 +
 datafusion/expr/src/accumulator.rs            | 105 +++++-
 datafusion/expr/src/groups_accumulator.rs     |  66 +++-
 datafusion/expr/src/udaf.rs                   |   4 +-
 datafusion/functions-aggregate/Cargo.toml     |  10 +
 .../functions-aggregate/benches/count.rs      |  98 ++++++
 datafusion/functions-aggregate/benches/sum.rs | 106 ++++++
 datafusion/functions-aggregate/src/count.rs   |  66 ++++
 .../aggregate/groups_accumulator/prim_op.rs   |  61 +++-
 .../physical-plan/src/aggregates/mod.rs       | 203 ++++++++++-
 .../physical-plan/src/aggregates/row_hash.rs  | 218 +++++++++++-
 .../test_files/aggregate_skip_partial.slt     | 324 ++++++++++++++++++
 .../test_files/information_schema.slt         |   4 +
 docs/source/user-guide/configs.md             |   2 +
 14 files changed, 1261 insertions(+), 15 deletions(-)
 create mode 100644 datafusion/functions-aggregate/benches/count.rs
 create mode 100644 datafusion/functions-aggregate/benches/sum.rs
 create mode 100644 datafusion/sqllogictest/test_files/aggregate_skip_partial.slt

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 9f8aa1cbdcaae..b5204b343f055 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -324,6 +324,15 @@ config_namespace! {
 
         /// Should DataFusion keep the columns used for partition_by in the output RecordBatches
         pub keep_partition_by_columns: bool, default = false
+
+        /// Aggregation ratio (number of distinct groups / number of input rows)
+        /// threshold for skipping partial aggregation. If the value is greater
+        /// then partial aggregation will skip aggregation for further input
+        pub skip_partial_aggregation_probe_ratio_threshold: f64, default = 0.8
+
+        /// Number of input rows partial aggregation partition should process, before
+        /// aggregation ratio check and trying to switch to skipping aggregation mode
+        pub skip_partial_aggregation_probe_rows_threshold: usize, default = 100_000
     }
 }
 
diff --git a/datafusion/expr/src/accumulator.rs b/datafusion/expr/src/accumulator.rs
index f9af7850cb924..262646d8ba3ae 100644
--- a/datafusion/expr/src/accumulator.rs
+++ b/datafusion/expr/src/accumulator.rs
@@ -94,7 +94,7 @@ pub trait Accumulator: Send + Sync + Debug {
     ///
     /// Intermediate state is used for "multi-phase" grouping in
     /// DataFusion, where an aggregate is computed in parallel with
-    /// multiple `Accumulator` instances, as illustrated below:
+    /// multiple `Accumulator` instances, as described below:
     ///
     /// # MultiPhase Grouping
     ///
@@ -130,7 +130,7 @@ pub trait Accumulator: Send + Sync + Debug {
     ///          `───────'                        `───────'
     /// ```
     ///
-    /// The partial state is serialied as `Arrays` and then combined
+    /// The partial state is serialized as `Arrays` and then combined
     /// with other partial states from different instances of this
     /// Accumulator (that ran on different partitions, for example).
     ///
@@ -147,6 +147,107 @@ pub trait Accumulator: Send + Sync + Debug {
     /// Note that [`ScalarValue::List`] can be used to pass multiple
     /// values if the number of intermediate values is not known at
     /// planning time (e.g. for `MEDIAN`)
+    ///
+    /// # Multi-phase repartitioned Grouping
+    ///
+    /// Many multi-phase grouping plans contain a Repartition operation
+    /// as well as shown below:
+    ///
+    /// ```text
+    ///                ▲                          ▲
+    ///                │                          │
+    ///                │                          │
+    ///                │                          │
+    ///                │                          │
+    ///                │                          │
+    ///    ┌───────────────────────┐  ┌───────────────────────┐       4. Each AggregateMode::Final
+    ///    │GroupBy                │  │GroupBy                │       GroupBy has an entry for its
+    ///    │(AggregateMode::Final) │  │(AggregateMode::Final) │       subset of groups (in this case
+    ///    │                       │  │                       │       that means half the entries)
+    ///    └───────────────────────┘  └───────────────────────┘
+    ///                ▲                          ▲
+    ///                │                          │
+    ///                └─────────────┬────────────┘
+    ///                              │
+    ///                              │
+    ///                              │
+    ///                 ┌─────────────────────────┐                   3. Repartitioning by hash(group
+    ///                 │       Repartition       │                   keys) ensures that each distinct
+    ///                 │         HASH(x)         │                   group key now appears in exactly
+    ///                 └─────────────────────────┘                   one partition
+    ///                              ▲
+    ///                              │
+    ///              ┌───────────────┴─────────────┐
+    ///              │                             │
+    ///              │                             │
+    /// ┌─────────────────────────┐  ┌──────────────────────────┐     2. Each AggregateMode::Partial
+    /// │        GroubyBy         │  │         GroubyBy         │     GroupBy has an entry for *all*
+    /// │(AggregateMode::Partial) │  │ (AggregateMode::Partial) │     the groups
+    /// └─────────────────────────┘  └──────────────────────────┘
+    ///              ▲                             ▲
+    ///              │                            ┌┘
+    ///              │                            │
+    ///         .─────────.                  .─────────.
+    ///      ,─'           '─.            ,─'           '─.
+    ///     ;      Input      :          ;      Input      :          1. Since input data is
+    ///     :   Partition 0   ;          :   Partition 1   ;          arbitrarily or RoundRobin
+    ///      ╲               ╱            ╲               ╱           distributed, each partition
+    ///       '─.         ,─'              '─.         ,─'            likely has all distinct
+    ///          `───────'                    `───────'
+    /// ```
+    ///
+    /// This structure is used so that the `AggregateMode::Partial` accumulators
+    /// reduces the cardinality of the input as soon as possible. Typically,
+    /// each partial accumulator sees all groups in the input as the group keys
+    /// are evenly distributed across the input.
+    ///
+    /// The final output is computed by repartitioning the result of
+    /// [`Self::state`] from each Partial aggregate and `hash(group keys)` so
+    /// that each distinct group key appears in exactly one of the
+    /// `AggregateMode::Final` GroupBy nodes. The output of the final nodes are
+    /// then unioned together to produce the overall final output.
+    ///
+    /// Here is an example that shows the distribution of groups in the
+    /// different phases
+    ///
+    /// ```text
+    ///               ┌─────┐                ┌─────┐
+    ///               │  1  │                │  3  │
+    ///               ├─────┤                ├─────┤
+    ///               │  2  │                │  4  │                After repartitioning by
+    ///               └─────┘                └─────┘                hash(group keys), each distinct
+    ///               ┌─────┐                ┌─────┐                group key now appears in exactly
+    ///               │  1  │                │  3  │                one partition
+    ///               ├─────┤                ├─────┤
+    ///               │  2  │                │  4  │
+    ///               └─────┘                └─────┘
+    ///
+    ///
+    /// ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
+    ///
+    ///               ┌─────┐                ┌─────┐
+    ///               │  2  │                │  2  │
+    ///               ├─────┤                ├─────┤
+    ///               │  1  │                │  2  │
+    ///               ├─────┤                ├─────┤
+    ///               │  3  │                │  3  │
+    ///               ├─────┤                ├─────┤
+    ///               │  4  │                │  1  │
+    ///               └─────┘                └─────┘                Input data is arbitrarily or
+    ///                 ...                    ...                  RoundRobin distributed, each
+    ///               ┌─────┐                ┌─────┐                partition likely has all
+    ///               │  1  │                │  4  │                distinct group keys
+    ///               ├─────┤                ├─────┤
+    ///               │  4  │                │  3  │
+    ///               ├─────┤                ├─────┤
+    ///               │  1  │                │  1  │
+    ///               ├─────┤                ├─────┤
+    ///               │  4  │                │  3  │
+    ///               └─────┘                └─────┘
+    ///
+    ///           group values           group values
+    ///           in partition 0         in partition 1
+    /// ```
     fn state(&mut self) -> Result<Vec<ScalarValue>>;
 
     /// Updates the accumulator's state from an `Array` containing one
diff --git a/datafusion/expr/src/groups_accumulator.rs b/datafusion/expr/src/groups_accumulator.rs
index 0d57c403bbe0b..886bd8443e4d3 100644
--- a/datafusion/expr/src/groups_accumulator.rs
+++ b/datafusion/expr/src/groups_accumulator.rs
@@ -18,7 +18,7 @@
 //! Vectorized [`GroupsAccumulator`]
 
 use arrow_array::{ArrayRef, BooleanArray};
-use datafusion_common::Result;
+use datafusion_common::{not_impl_err, Result};
 
 /// Describes how many rows should be emitted during grouping.
 #[derive(Debug, Clone, Copy)]
@@ -128,6 +128,9 @@ pub trait GroupsAccumulator: Send {
     /// Returns the intermediate aggregate state for this accumulator,
     /// used for multi-phase grouping, resetting its internal state.
     ///
+    /// See [`Accumulator::state`] for more information on multi-phase
+    /// aggregation.
+    ///
     /// For example, `AVG` might return two arrays: `SUM` and `COUNT`
     /// but the `MIN` aggregate would just return a single array.
     ///
@@ -135,11 +138,13 @@ pub trait GroupsAccumulator: Send {
     /// single `StructArray` rather than multiple arrays.
     ///
     /// See [`Self::evaluate`] for details on the required output
-    /// order and  `emit_to`.
+    /// order and `emit_to`.
+    ///
+    /// [`Accumulator::state`]: crate::Accumulator::state
     fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>>;
 
     /// Merges intermediate state (the output from [`Self::state`])
-    /// into this accumulator's values.
+    /// into this accumulator's current state.
     ///
     /// For some aggregates (such as `SUM`), `merge_batch` is the same
     /// as `update_batch`, but for some aggregates (such as `COUNT`,
@@ -158,8 +163,59 @@ pub trait GroupsAccumulator: Send {
         total_num_groups: usize,
     ) -> Result<()>;
 
+    /// Converts an input batch directly the intermediate aggregate state.
+    ///
+    /// This is the equivalent of treating each input row as its own group. It
+    /// is invoked when the Partial phase of a multi-phase aggregation is not
+    /// reducing the cardinality enough to warrant spending more effort on
+    /// pre-aggregation (see `Background` section below), and switches to
+    /// passing intermediate state directly on to the next aggregation phase.
+    ///
+    /// Examples:
+    /// * `COUNT`: an array of 1s for each row in the input batch.
+    /// * `SUM/MIN/MAX`: the input values themselves.
+    ///
+    /// # Arguments
+    /// * `values`: the input arguments to the accumulator
+    /// * `opt_filter`: if present, any row where `opt_filter[i]` is false should be ignored
+    ///
+    /// # Background
+    ///
+    /// In a multi-phase aggregation (see [`Accumulator::state`]), the initial
+    /// Partial phase reduces the cardinality of the input data as soon as
+    /// possible in the plan.
+    ///
+    /// This strategy is very effective for queries with a small number of
+    /// groups, as most of the data is aggregated immediately and only a small
+    /// amount of data must be repartitioned (see [`Accumulator::state`] for
+    /// background)
+    ///
+    /// However, for queries with a large number of groups, the Partial phase
+    /// often does not reduce the cardinality enough to warrant the memory and
+    /// CPU cost of actually performing the aggregation. For such cases, the
+    /// HashAggregate operator will dynamically switch to passing intermediate
+    /// state directly to the next aggregation phase with minimal processing
+    /// using this method.
+    ///
+    /// [`Accumulator::state`]: crate::Accumulator::state
+    fn convert_to_state(
+        &self,
+        _values: &[ArrayRef],
+        _opt_filter: Option<&BooleanArray>,
+    ) -> Result<Vec<ArrayRef>> {
+        not_impl_err!("Input batch conversion to state not implemented")
+    }
+
+    /// Returns `true` if [`Self::convert_to_state`] is implemented to support
+    /// intermediate aggregate state conversion.
+    fn supports_convert_to_state(&self) -> bool {
+        false
+    }
+
     /// Amount of memory used to store the state of this accumulator,
-    /// in bytes. This function is called once per batch, so it should
-    /// be `O(n)` to compute, not `O(num_groups)`
+    /// in bytes.
+    ///
+    /// This function is called once per batch, so it should be `O(n)` to
+    /// compute, not `O(num_groups)`
     fn size(&self) -> usize;
 }
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index f5eeef6b53bbe..3a292b2b49bfb 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -351,6 +351,8 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
 
     /// Return the fields used to store the intermediate state of this accumulator.
     ///
+    /// See [`Accumulator::state`] for background information.
+    ///
     /// args:  [`StateFieldsArgs`] contains arguments passed to the
     /// aggregate function's accumulator.
     ///
@@ -388,7 +390,7 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
     /// # Notes
     ///
     /// Even if this function returns true, DataFusion will still use
-    /// `Self::accumulator` for certain queries, such as when this aggregate is
+    /// [`Self::accumulator`] for certain queries, such as when this aggregate is
     /// used as a window function or when there no GROUP BY columns in the
     /// query.
     fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml
index 43ddd37cfb6ff..4f2bd864832e3 100644
--- a/datafusion/functions-aggregate/Cargo.toml
+++ b/datafusion/functions-aggregate/Cargo.toml
@@ -50,4 +50,14 @@ paste = "1.0.14"
 sqlparser = { workspace = true }
 
 [dev-dependencies]
+arrow = { workspace = true, features = ["test_utils"] }
+criterion = "0.5"
 rand = { workspace = true }
+
+[[bench]]
+name = "count"
+harness = false
+
+[[bench]]
+name = "sum"
+harness = false
diff --git a/datafusion/functions-aggregate/benches/count.rs b/datafusion/functions-aggregate/benches/count.rs
new file mode 100644
index 0000000000000..875112ca8d47d
--- /dev/null
+++ b/datafusion/functions-aggregate/benches/count.rs
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, BooleanArray};
+use arrow::datatypes::Int32Type;
+use arrow::util::bench_util::{create_boolean_array, create_primitive_array};
+use arrow_schema::{DataType, Field, Schema};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_common::DFSchema;
+use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator};
+use datafusion_functions_aggregate::count::Count;
+use std::sync::Arc;
+
+fn prepare_accumulator() -> Box<dyn GroupsAccumulator> {
+    let schema = Arc::new(Schema::new(vec![Field::new("f", DataType::Int32, true)]));
+    let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
+    let accumulator_args = AccumulatorArgs {
+        data_type: &DataType::Int64,
+        schema: &schema,
+        dfschema: &df_schema,
+        ignore_nulls: false,
+        sort_exprs: &[],
+        is_reversed: false,
+        name: "COUNT(f)",
+        is_distinct: false,
+        input_types: &[DataType::Int32],
+        input_exprs: &[datafusion_expr::col("f")],
+    };
+    let count_fn = Count::new();
+
+    count_fn
+        .create_groups_accumulator(accumulator_args)
+        .unwrap()
+}
+
+fn convert_to_state_bench(
+    c: &mut Criterion,
+    name: &str,
+    values: ArrayRef,
+    opt_filter: Option<&BooleanArray>,
+) {
+    let accumulator = prepare_accumulator();
+    c.bench_function(name, |b| {
+        b.iter(|| {
+            black_box(
+                accumulator
+                    .convert_to_state(&[values.clone()], opt_filter)
+                    .unwrap(),
+            )
+        })
+    });
+}
+
+fn count_benchmark(c: &mut Criterion) {
+    let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.0)) as ArrayRef;
+    convert_to_state_bench(c, "count convert state no nulls, no filter", values, None);
+
+    let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.3)) as ArrayRef;
+    convert_to_state_bench(c, "count convert state 30% nulls, no filter", values, None);
+
+    let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.3)) as ArrayRef;
+    convert_to_state_bench(c, "count convert state 70% nulls, no filter", values, None);
+
+    let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.0)) as ArrayRef;
+    let filter = create_boolean_array(8192, 0.0, 0.5);
+    convert_to_state_bench(
+        c,
+        "count convert state no nulls, filter",
+        values,
+        Some(&filter),
+    );
+
+    let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.3)) as ArrayRef;
+    let filter = create_boolean_array(8192, 0.0, 0.5);
+    convert_to_state_bench(
+        c,
+        "count convert state nulls, filter",
+        values,
+        Some(&filter),
+    );
+}
+
+criterion_group!(benches, count_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions-aggregate/benches/sum.rs b/datafusion/functions-aggregate/benches/sum.rs
new file mode 100644
index 0000000000000..dfaa93cdeff76
--- /dev/null
+++ b/datafusion/functions-aggregate/benches/sum.rs
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, BooleanArray};
+use arrow::datatypes::Int64Type;
+use arrow::util::bench_util::{create_boolean_array, create_primitive_array};
+use arrow_schema::{DataType, Field, Schema};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_common::DFSchema;
+use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator};
+use datafusion_functions_aggregate::sum::Sum;
+use std::sync::Arc;
+
+fn prepare_accumulator(data_type: &DataType) -> Box<dyn GroupsAccumulator> {
+    let schema = Arc::new(Schema::new(vec![Field::new("f", data_type.clone(), true)]));
+    let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
+    let accumulator_args = AccumulatorArgs {
+        data_type,
+        schema: &schema,
+        dfschema: &df_schema,
+        ignore_nulls: false,
+        sort_exprs: &[],
+        is_reversed: false,
+        name: "SUM(f)",
+        is_distinct: false,
+        input_types: &[data_type.clone()],
+        input_exprs: &[datafusion_expr::col("f")],
+    };
+    let sum_fn = Sum::new();
+
+    sum_fn.create_groups_accumulator(accumulator_args).unwrap()
+}
+
+fn convert_to_state_bench(
+    c: &mut Criterion,
+    name: &str,
+    values: ArrayRef,
+    opt_filter: Option<&BooleanArray>,
+) {
+    let accumulator = prepare_accumulator(values.data_type());
+    c.bench_function(name, |b| {
+        b.iter(|| {
+            black_box(
+                accumulator
+                    .convert_to_state(&[values.clone()], opt_filter)
+                    .unwrap(),
+            )
+        })
+    });
+}
+
+fn count_benchmark(c: &mut Criterion) {
+    let values = Arc::new(create_primitive_array::<Int64Type>(8192, 0.0)) as ArrayRef;
+    convert_to_state_bench(c, "sum i64 convert state no nulls, no filter", values, None);
+
+    let values = Arc::new(create_primitive_array::<Int64Type>(8192, 0.3)) as ArrayRef;
+    convert_to_state_bench(
+        c,
+        "sum i64 convert state 30% nulls, no filter",
+        values,
+        None,
+    );
+
+    let values = Arc::new(create_primitive_array::<Int64Type>(8192, 0.3)) as ArrayRef;
+    convert_to_state_bench(
+        c,
+        "sum i64 convert state 70% nulls, no filter",
+        values,
+        None,
+    );
+
+    let values = Arc::new(create_primitive_array::<Int64Type>(8192, 0.0)) as ArrayRef;
+    let filter = create_boolean_array(8192, 0.0, 0.5);
+    convert_to_state_bench(
+        c,
+        "sum i64 convert state no nulls, filter",
+        values,
+        Some(&filter),
+    );
+
+    let values = Arc::new(create_primitive_array::<Int64Type>(8192, 0.3)) as ArrayRef;
+    let filter = create_boolean_array(8192, 0.0, 0.5);
+    convert_to_state_bench(
+        c,
+        "sum i64 convert state nulls, filter",
+        values,
+        Some(&filter),
+    );
+}
+
+criterion_group!(benches, count_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index 64eb7253f5c9d..aea05442536ee 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -23,6 +23,7 @@ use std::{fmt::Debug, sync::Arc};
 
 use arrow::{
     array::{ArrayRef, AsArray},
+    compute,
     datatypes::{
         DataType, Date32Type, Date64Type, Decimal128Type, Decimal256Type, Field,
         Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
@@ -440,6 +441,71 @@ impl GroupsAccumulator for CountGroupsAccumulator {
         Ok(vec![Arc::new(counts) as ArrayRef])
     }
 
+    /// Converts an input batch directly to a state batch
+    ///
+    /// The state of `COUNT` is always a single Int64Array:
+    /// * `1` (for non-null, non filtered values)
+    /// * `0` (for null values)
+    fn convert_to_state(
+        &self,
+        values: &[ArrayRef],
+        opt_filter: Option<&BooleanArray>,
+    ) -> Result<Vec<ArrayRef>> {
+        let values = &values[0];
+
+        let state_array = match (values.logical_nulls(), opt_filter) {
+            (None, None) => {
+                // In case there is no nulls in input and no filter, returning array of 1
+                Arc::new(Int64Array::from_value(1, values.len()))
+            }
+            (Some(nulls), None) => {
+                // If there are any nulls in input values -- casting `nulls` (true for values, false for nulls)
+                // of input array to Int64
+                let nulls = BooleanArray::new(nulls.into_inner(), None);
+                compute::cast(&nulls, &DataType::Int64)?
+            }
+            (None, Some(filter)) => {
+                // If there is only filter
+                // - applying filter null mask to filter values by bitand filter values and nulls buffers
+                //   (using buffers guarantees absence of nulls in result)
+                // - casting result of bitand to Int64 array
+                let (filter_values, filter_nulls) = filter.clone().into_parts();
+
+                let state_buf = match filter_nulls {
+                    Some(filter_nulls) => &filter_values & filter_nulls.inner(),
+                    None => filter_values,
+                };
+
+                let boolean_state = BooleanArray::new(state_buf, None);
+                compute::cast(&boolean_state, &DataType::Int64)?
+            }
+            (Some(nulls), Some(filter)) => {
+                // For both input nulls and filter
+                // - applying filter null mask to filter values by bitand filter values and nulls buffers
+                //   (using buffers guarantees absence of nulls in result)
+                // - applying values null mask to filter buffer by another bitand on filter result and
+                //   nulls from input values
+                // - casting result to Int64 array
+                let (filter_values, filter_nulls) = filter.clone().into_parts();
+
+                let filter_buf = match filter_nulls {
+                    Some(filter_nulls) => &filter_values & filter_nulls.inner(),
+                    None => filter_values,
+                };
+                let state_buf = &filter_buf & nulls.inner();
+
+                let boolean_state = BooleanArray::new(state_buf, None);
+                compute::cast(&boolean_state, &DataType::Int64)?
+            }
+        };
+
+        Ok(vec![state_array])
+    }
+
+    fn supports_convert_to_state(&self) -> bool {
+        true
+    }
+
     fn size(&self) -> usize {
         self.counts.capacity() * std::mem::size_of::<usize>()
     }
diff --git a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs
index debb36852b224..8d69646bd422a 100644
--- a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs
+++ b/datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs
@@ -18,9 +18,11 @@
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, AsArray, BooleanArray, PrimitiveArray};
+use arrow::buffer::NullBuffer;
+use arrow::compute;
 use arrow::datatypes::ArrowPrimitiveType;
 use arrow::datatypes::DataType;
-use datafusion_common::Result;
+use datafusion_common::{internal_datafusion_err, DataFusionError, Result};
 use datafusion_expr::{EmitTo, GroupsAccumulator};
 
 use super::accumulate::NullState;
@@ -134,6 +136,63 @@ where
         self.update_batch(values, group_indices, opt_filter, total_num_groups)
     }
 
+    /// Converts an input batch directly to a state batch
+    ///
+    /// The state is:
+    /// - self.prim_fn for all non null, non filtered values
+    /// - null otherwise
+    ///
+    fn convert_to_state(
+        &self,
+        values: &[ArrayRef],
+        opt_filter: Option<&BooleanArray>,
+    ) -> Result<Vec<ArrayRef>> {
+        let values = values[0].as_primitive::<T>().clone();
+
+        // Initializing state with starting values
+        let initial_state =
+            PrimitiveArray::<T>::from_value(self.starting_value, values.len());
+
+        // Recalculating values in case there is filter
+        let values = match opt_filter {
+            None => values,
+            Some(filter) => {
+                let (filter_values, filter_nulls) = filter.clone().into_parts();
+                // Calculating filter mask as a result of bitand of filter, and converting it to null buffer
+                let filter_bool = match filter_nulls {
+                    Some(filter_nulls) => filter_nulls.inner() & &filter_values,
+                    None => filter_values,
+                };
+                let filter_nulls = NullBuffer::from(filter_bool);
+
+                // Rebuilding input values with a new nulls mask, which is equal to
+                // the union of original nulls and filter mask
+                let (dt, values_buf, original_nulls) = values.clone().into_parts();
+                let nulls_buf =
+                    NullBuffer::union(original_nulls.as_ref(), Some(&filter_nulls));
+                PrimitiveArray::<T>::new(values_buf, nulls_buf).with_data_type(dt)
+            }
+        };
+
+        let state_values = compute::binary_mut(initial_state, &values, |mut x, y| {
+            (self.prim_fn)(&mut x, y);
+            x
+        });
+        let state_values = state_values
+            .map_err(|_| {
+                internal_datafusion_err!(
+                    "initial_values underlying buffer must not be shared"
+                )
+            })?
+            .map_err(DataFusionError::from)?;
+
+        Ok(vec![Arc::new(state_values)])
+    }
+
+    fn supports_convert_to_state(&self) -> bool {
+        true
+    }
+
     fn size(&self) -> usize {
         self.values.capacity() * std::mem::size_of::<T::Native>() + self.null_state.size()
     }
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 43f9f98283bb1..8941418c12e1e 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -56,11 +56,20 @@ mod topk;
 mod topk_stream;
 
 /// Hash aggregate modes
+///
+/// See [`Accumulator::state`] for background information on multi-phase
+/// aggregation and how these modes are used.
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub enum AggregateMode {
-    /// Partial aggregate that can be applied in parallel across input partitions
+    /// Partial aggregate that can be applied in parallel across input
+    /// partitions.
+    ///
+    /// This is the first phase of a multi-phase aggregation.
     Partial,
-    /// Final aggregate that produces a single partition of output
+    /// Final aggregate that produces a single partition of output by combining
+    /// the output of multiple partial aggregates.
+    ///
+    /// This is the second phase of a multi-phase aggregation.
     Final,
     /// Final aggregate that works on pre-partitioned data.
     ///
@@ -72,12 +81,15 @@ pub enum AggregateMode {
     /// Applies the entire logical aggregation operation in a single operator,
     /// as opposed to Partial / Final modes which apply the logical aggregation using
     /// two operators.
+    ///
     /// This mode requires that the input is a single partition (like Final)
     Single,
     /// Applies the entire logical aggregation operation in a single operator,
     /// as opposed to Partial / Final modes which apply the logical aggregation using
     /// two operators.
-    /// This mode requires that the input is partitioned by group key (like FinalPartitioned)
+    ///
+    /// This mode requires that the input is partitioned by group key (like
+    /// FinalPartitioned)
     SinglePartitioned,
 }
 
@@ -2395,4 +2407,189 @@ mod tests {
 
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_skip_aggregation_after_first_batch() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("key", DataType::Int32, true),
+            Field::new("val", DataType::Int32, true),
+        ]));
+        let df_schema = DFSchema::try_from(Arc::clone(&schema))?;
+
+        let group_by =
+            PhysicalGroupBy::new_single(vec![(col("key", &schema)?, "key".to_string())]);
+
+        let aggr_expr: Vec<Arc<dyn AggregateExpr>> =
+            vec![create_aggregate_expr_with_dfschema(
+                &count_udaf(),
+                &[col("val", &schema)?],
+                &[datafusion_expr::col("val")],
+                &[],
+                &[],
+                &df_schema,
+                "COUNT(val)",
+                false,
+                false,
+                false,
+            )?];
+
+        let input_data = vec![
+            RecordBatch::try_new(
+                Arc::clone(&schema),
+                vec![
+                    Arc::new(Int32Array::from(vec![1, 2, 3])),
+                    Arc::new(Int32Array::from(vec![0, 0, 0])),
+                ],
+            )
+            .unwrap(),
+            RecordBatch::try_new(
+                Arc::clone(&schema),
+                vec![
+                    Arc::new(Int32Array::from(vec![2, 3, 4])),
+                    Arc::new(Int32Array::from(vec![0, 0, 0])),
+                ],
+            )
+            .unwrap(),
+        ];
+
+        let input = Arc::new(MemoryExec::try_new(
+            &[input_data],
+            Arc::clone(&schema),
+            None,
+        )?);
+        let aggregate_exec = Arc::new(AggregateExec::try_new(
+            AggregateMode::Partial,
+            group_by,
+            aggr_expr,
+            vec![None],
+            Arc::clone(&input) as Arc<dyn ExecutionPlan>,
+            schema,
+        )?);
+
+        let mut session_config = SessionConfig::default();
+        session_config = session_config.set(
+            "datafusion.execution.skip_partial_aggregation_probe_rows_threshold",
+            ScalarValue::Int64(Some(2)),
+        );
+        session_config = session_config.set(
+            "datafusion.execution.skip_partial_aggregation_probe_ratio_threshold",
+            ScalarValue::Float64(Some(0.1)),
+        );
+
+        let ctx = TaskContext::default().with_session_config(session_config);
+        let output = collect(aggregate_exec.execute(0, Arc::new(ctx))?).await?;
+
+        let expected = [
+            "+-----+-------------------+",
+            "| key | COUNT(val)[count] |",
+            "+-----+-------------------+",
+            "| 1   | 1                 |",
+            "| 2   | 1                 |",
+            "| 3   | 1                 |",
+            "| 2   | 1                 |",
+            "| 3   | 1                 |",
+            "| 4   | 1                 |",
+            "+-----+-------------------+",
+        ];
+        assert_batches_eq!(expected, &output);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_skip_aggregation_after_threshold() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("key", DataType::Int32, true),
+            Field::new("val", DataType::Int32, true),
+        ]));
+        let df_schema = DFSchema::try_from(Arc::clone(&schema))?;
+
+        let group_by =
+            PhysicalGroupBy::new_single(vec![(col("key", &schema)?, "key".to_string())]);
+
+        let aggr_expr: Vec<Arc<dyn AggregateExpr>> =
+            vec![create_aggregate_expr_with_dfschema(
+                &count_udaf(),
+                &[col("val", &schema)?],
+                &[datafusion_expr::col("val")],
+                &[],
+                &[],
+                &df_schema,
+                "COUNT(val)",
+                false,
+                false,
+                false,
+            )?];
+
+        let input_data = vec![
+            RecordBatch::try_new(
+                Arc::clone(&schema),
+                vec![
+                    Arc::new(Int32Array::from(vec![1, 2, 3])),
+                    Arc::new(Int32Array::from(vec![0, 0, 0])),
+                ],
+            )
+            .unwrap(),
+            RecordBatch::try_new(
+                Arc::clone(&schema),
+                vec![
+                    Arc::new(Int32Array::from(vec![2, 3, 4])),
+                    Arc::new(Int32Array::from(vec![0, 0, 0])),
+                ],
+            )
+            .unwrap(),
+            RecordBatch::try_new(
+                Arc::clone(&schema),
+                vec![
+                    Arc::new(Int32Array::from(vec![2, 3, 4])),
+                    Arc::new(Int32Array::from(vec![0, 0, 0])),
+                ],
+            )
+            .unwrap(),
+        ];
+
+        let input = Arc::new(MemoryExec::try_new(
+            &[input_data],
+            Arc::clone(&schema),
+            None,
+        )?);
+        let aggregate_exec = Arc::new(AggregateExec::try_new(
+            AggregateMode::Partial,
+            group_by,
+            aggr_expr,
+            vec![None],
+            Arc::clone(&input) as Arc<dyn ExecutionPlan>,
+            schema,
+        )?);
+
+        let mut session_config = SessionConfig::default();
+        session_config = session_config.set(
+            "datafusion.execution.skip_partial_aggregation_probe_rows_threshold",
+            ScalarValue::Int64(Some(5)),
+        );
+        session_config = session_config.set(
+            "datafusion.execution.skip_partial_aggregation_probe_ratio_threshold",
+            ScalarValue::Float64(Some(0.1)),
+        );
+
+        let ctx = TaskContext::default().with_session_config(session_config);
+        let output = collect(aggregate_exec.execute(0, Arc::new(ctx))?).await?;
+
+        let expected = [
+            "+-----+-------------------+",
+            "| key | COUNT(val)[count] |",
+            "+-----+-------------------+",
+            "| 1   | 1                 |",
+            "| 2   | 2                 |",
+            "| 3   | 2                 |",
+            "| 4   | 1                 |",
+            "| 2   | 1                 |",
+            "| 3   | 1                 |",
+            "| 4   | 1                 |",
+            "+-----+-------------------+",
+        ];
+        assert_batches_eq!(expected, &output);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index 167ca72407503..62ed79dad4aa8 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -39,7 +39,7 @@ use crate::{RecordBatchStream, SendableRecordBatchStream};
 use arrow::array::*;
 use arrow::datatypes::SchemaRef;
 use arrow_schema::SortOptions;
-use datafusion_common::{DataFusionError, Result};
+use datafusion_common::{internal_datafusion_err, DataFusionError, Result};
 use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::proxy::VecAllocExt;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
@@ -62,6 +62,12 @@ pub(crate) enum ExecutionState {
     /// When producing output, the remaining rows to output are stored
     /// here and are sliced off as needed in batch_size chunks
     ProducingOutput(RecordBatch),
+    /// Produce intermediate aggregate state for each input row without
+    /// aggregation.
+    ///
+    /// See "partial aggregation" discussion on [`GroupedHashAggregateStream`]
+    SkippingAggregation,
+    /// All input has been consumed and all groups have been emitted
     Done,
 }
 
@@ -90,6 +96,72 @@ struct SpillState {
     merging_group_by: PhysicalGroupBy,
 }
 
+/// Tracks if the aggregate should skip partial aggregations
+///
+/// See "partial aggregation" discussion on [`GroupedHashAggregateStream`]
+struct SkipAggregationProbe {
+    /// Number of processed input rows
+    input_rows: usize,
+    /// Number of total group values for `input_rows`
+    num_groups: usize,
+
+    /// Aggregation ratio check should be performed only when the
+    /// number of input rows exceeds this threshold
+    probe_rows_threshold: usize,
+    /// Maximum allowed value of `input_rows` / `num_groups` to
+    /// continue aggregation
+    probe_ratio_threshold: f64,
+
+    /// Flag indicating that further data aggregation mey be skipped
+    should_skip: bool,
+    /// Flag indicating that further updates of `SkipAggregationProbe`
+    /// state won't make any effect
+    is_locked: bool,
+}
+
+impl SkipAggregationProbe {
+    fn new(probe_rows_threshold: usize, probe_ratio_threshold: f64) -> Self {
+        Self {
+            input_rows: 0,
+            num_groups: 0,
+            probe_rows_threshold,
+            probe_ratio_threshold,
+            should_skip: false,
+            is_locked: false,
+        }
+    }
+
+    /// Updates `SkipAggregationProbe` state:
+    /// - increments the number of input rows
+    /// - replaces the number of groups with the new value
+    /// - on `probe_rows_threshold` exceeded calculates
+    ///   aggregation ratio and sets `should_skip` flag
+    /// - if `should_skip` is set, locks further state updates
+    fn update_state(&mut self, input_rows: usize, num_groups: usize) {
+        if self.is_locked {
+            return;
+        }
+        self.input_rows += input_rows;
+        self.num_groups = num_groups;
+        if self.input_rows >= self.probe_rows_threshold {
+            self.should_skip = self.num_groups as f64 / self.input_rows as f64
+                >= self.probe_ratio_threshold;
+            self.is_locked = true;
+        }
+    }
+
+    fn should_skip(&self) -> bool {
+        self.should_skip
+    }
+
+    /// Provides an ability to externally set `should_skip` flag
+    /// to `false` and prohibit further state updates
+    fn forbid_skipping(&mut self) {
+        self.should_skip = false;
+        self.is_locked = true;
+    }
+}
+
 /// HashTable based Grouping Aggregator
 ///
 /// # Design Goals
@@ -137,7 +209,7 @@ struct SpillState {
 /// of `x` and one accumulator for `SUM(y)`, specialized for the data
 /// type of `y`.
 ///
-/// # Description
+/// # Discussion
 ///
 /// [`group_values`] does not store any aggregate state inline. It only
 /// assigns "group indices", one for each (distinct) group value. The
@@ -155,7 +227,25 @@ struct SpillState {
 ///
 /// [`group_values`]: Self::group_values
 ///
-/// # Spilling
+/// # Partial Aggregate and multi-phase grouping
+///
+/// As described on [`Accumulator::state`], this operator is used in the context
+/// "multi-phase" grouping when the mode is [`AggregateMode::Partial`].
+///
+/// An important optimization for multi-phase partial aggregation is to skip
+/// partial aggregation when it is not effective enough to warrant the memory or
+/// CPU cost, as is often the case for queries many distinct groups (high
+/// cardinality group by). Memory is particularly important because each Partial
+/// aggregator must store the intermediate state for each group.
+///
+/// If the ratio of the number of groups to the number of input rows exceeds a
+/// threshold, and [`GroupsAccumulator::supports_convert_to_state`] is
+/// supported, this operator will stop applying Partial aggregation and directly
+/// pass the input rows to the next aggregation phase.
+///
+/// [`Accumulator::state`]: datafusion_expr::Accumulator::state
+///
+/// # Spilling (to disk)
 ///
 /// The sizes of group values and accumulators can become large. Before that causes out of memory,
 /// this hash aggregator outputs partial states early for partial aggregation or spills to local
@@ -275,6 +365,10 @@ pub(crate) struct GroupedHashAggregateStream {
     /// the `GroupedHashAggregateStream` operation immediately switches to
     /// output mode and emits all groups.
     group_values_soft_limit: Option<usize>,
+
+    /// Optional probe for skipping data aggregation, if supported by
+    /// current stream.
+    skip_aggregation_probe: Option<SkipAggregationProbe>,
 }
 
 impl GroupedHashAggregateStream {
@@ -365,6 +459,36 @@ impl GroupedHashAggregateStream {
             merging_group_by: PhysicalGroupBy::new_single(agg_group_by.expr.clone()),
         };
 
+        // Skip aggregation is supported if:
+        // - aggregation mode is Partial
+        // - input is not ordered by GROUP BY expressions,
+        //   since Final mode expects unique group values as its input
+        // - all accumulators support input batch to intermediate
+        //   aggregate state conversion
+        // - there is only one GROUP BY expressions set
+        let skip_aggregation_probe = if agg.mode == AggregateMode::Partial
+            && matches!(group_ordering, GroupOrdering::None)
+            && accumulators
+                .iter()
+                .all(|acc| acc.supports_convert_to_state())
+            && agg_group_by.is_single()
+        {
+            Some(SkipAggregationProbe::new(
+                context
+                    .session_config()
+                    .options()
+                    .execution
+                    .skip_partial_aggregation_probe_rows_threshold,
+                context
+                    .session_config()
+                    .options()
+                    .execution
+                    .skip_partial_aggregation_probe_ratio_threshold,
+            ))
+        } else {
+            None
+        };
+
         Ok(GroupedHashAggregateStream {
             schema: agg_schema,
             input,
@@ -384,6 +508,7 @@ impl GroupedHashAggregateStream {
             runtime: context.runtime_env(),
             spill_state,
             group_values_soft_limit: agg.limit,
+            skip_aggregation_probe,
         })
     }
 }
@@ -434,12 +559,16 @@ impl Stream for GroupedHashAggregateStream {
                         // new batch to aggregate
                         Some(Ok(batch)) => {
                             let timer = elapsed_compute.timer();
+                            let input_rows = batch.num_rows();
+
                             // Make sure we have enough capacity for `batch`, otherwise spill
                             extract_ok!(self.spill_previous_if_necessary(&batch));
 
                             // Do the grouping
                             extract_ok!(self.group_aggregate_batch(batch));
 
+                            self.update_skip_aggregation_probe(input_rows);
+
                             // If we can begin emitting rows, do so,
                             // otherwise keep consuming input
                             assert!(!self.input_done);
@@ -463,6 +592,8 @@ impl Stream for GroupedHashAggregateStream {
 
                             extract_ok!(self.emit_early_if_necessary());
 
+                            extract_ok!(self.switch_to_skip_aggregation());
+
                             timer.done();
                         }
                         Some(Err(e)) => {
@@ -476,6 +607,26 @@ impl Stream for GroupedHashAggregateStream {
                     }
                 }
 
+                ExecutionState::SkippingAggregation => {
+                    match ready!(self.input.poll_next_unpin(cx)) {
+                        Some(Ok(batch)) => {
+                            let _timer = elapsed_compute.timer();
+                            let states = self.transform_to_states(batch)?;
+                            return Poll::Ready(Some(Ok(
+                                states.record_output(&self.baseline_metrics)
+                            )));
+                        }
+                        Some(Err(e)) => {
+                            // inner had error, return to caller
+                            return Poll::Ready(Some(Err(e)));
+                        }
+                        None => {
+                            // inner is done, switching to `Done` state
+                            self.exec_state = ExecutionState::Done;
+                        }
+                    }
+                }
+
                 ExecutionState::ProducingOutput(batch) => {
                     // slice off a part of the batch, if needed
                     let output_batch;
@@ -484,6 +635,12 @@ impl Stream for GroupedHashAggregateStream {
                         (
                             if self.input_done {
                                 ExecutionState::Done
+                            } else if self
+                                .skip_aggregation_probe
+                                .as_ref()
+                                .is_some_and(|probe| probe.should_skip())
+                            {
+                                ExecutionState::SkippingAggregation
                             } else {
                                 ExecutionState::ReadingInput
                             },
@@ -797,4 +954,59 @@ impl GroupedHashAggregateStream {
         timer.done();
         Ok(())
     }
+
+    // Updates skip aggregation probe state.
+    // In case stream has any spills, the probe is forcefully set to
+    // forbid aggregation skipping, and locked, since spilling resets
+    // total number of unique groups.
+    //
+    // Note: currently spilling is not supported for Partial aggregation
+    fn update_skip_aggregation_probe(&mut self, input_rows: usize) {
+        if let Some(probe) = self.skip_aggregation_probe.as_mut() {
+            if !self.spill_state.spills.is_empty() {
+                probe.forbid_skipping();
+            } else {
+                probe.update_state(input_rows, self.group_values.len());
+            }
+        };
+    }
+
+    // In case the probe indicates that aggregation may be
+    // skipped, forces stream to produce currently accumulated output.
+    fn switch_to_skip_aggregation(&mut self) -> Result<()> {
+        if let Some(probe) = self.skip_aggregation_probe.as_mut() {
+            if probe.should_skip() {
+                let batch = self.emit(EmitTo::All, false)?;
+                self.exec_state = ExecutionState::ProducingOutput(batch);
+            }
+        }
+
+        Ok(())
+    }
+
+    // Transforms input batch to intermediate aggregate state, without grouping it
+    fn transform_to_states(&self, batch: RecordBatch) -> Result<RecordBatch> {
+        let group_values = evaluate_group_by(&self.group_by, &batch)?;
+        let input_values = evaluate_many(&self.aggregate_arguments, &batch)?;
+        let filter_values = evaluate_optional(&self.filter_expressions, &batch)?;
+
+        let mut output = group_values.first().cloned().ok_or_else(|| {
+            internal_datafusion_err!("group_values expected to have at least one element")
+        })?;
+
+        let iter = self
+            .accumulators
+            .iter()
+            .zip(input_values.iter())
+            .zip(filter_values.iter());
+
+        for ((acc, values), opt_filter) in iter {
+            let opt_filter = opt_filter.as_ref().map(|filter| filter.as_boolean());
+            output.extend(acc.convert_to_state(values, opt_filter)?);
+        }
+
+        let states_batch = RecordBatch::try_new(self.schema(), output)?;
+
+        Ok(states_batch)
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
new file mode 100644
index 0000000000000..65efc24ec037c
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
@@ -0,0 +1,324 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# The main goal of these tests is to verify correctness of transforming
+# input values to state by accumulators, supporting `convert_to_state`.
+
+
+# Setup test data table
+statement ok
+CREATE EXTERNAL TABLE aggregate_test_100 (
+  c1  VARCHAR NOT NULL,
+  c2  TINYINT NOT NULL,
+  c3  SMALLINT NOT NULL,
+  c4  SMALLINT,
+  c5  INT,
+  c6  BIGINT NOT NULL,
+  c7  SMALLINT NOT NULL,
+  c8  INT NOT NULL,
+  c9  INT UNSIGNED NOT NULL,
+  c10 BIGINT UNSIGNED NOT NULL,
+  c11 FLOAT NOT NULL,
+  c12 DOUBLE NOT NULL,
+  c13 VARCHAR NOT NULL
+)
+STORED AS CSV
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
+
+# Prepare settings to skip partial aggregation from the beginning
+statement ok
+set datafusion.execution.skip_partial_aggregation_probe_rows_threshold = 0;
+
+statement ok
+set datafusion.execution.skip_partial_aggregation_probe_ratio_threshold = 0.0;
+
+statement ok
+set datafusion.execution.target_partitions = 2;
+
+statement ok
+set datafusion.execution.batch_size = 1;
+
+statement ok
+set datafusion.sql_parser.dialect = 'Postgres';
+
+# Grouping by unique fields allows to check all accumulators
+query ITIIII
+SELECT c5, c1,
+       COUNT(),
+       COUNT(CASE WHEN c1 = 'a' THEN c5 ELSE NULL END),
+       COUNT() FILTER (WHERE c1 = 'b'),
+       COUNT(CASE WHEN c1 = 'a' THEN c5 ELSE NULL END) FILTER (WHERE c1 = 'b')
+FROM aggregate_test_100
+GROUP BY 1, 2 ORDER BY 1 LIMIT 5;
+----
+-2141999138 c 1 0 0 0
+-2141451704 a 1 1 0 0
+-2138770630 b 1 0 1 0
+-2117946883 d 1 0 0 0
+-2098805236 c 1 0 0 0
+
+query ITIIII
+SELECT c5, c1,
+       MIN(c5),
+       MIN(CASE WHEN c1 = 'a' THEN c5 ELSE NULL END),
+       MIN(c5) FILTER (WHERE c1 = 'b'),
+       MIN(CASE WHEN c1 = 'a' THEN c5 ELSE NULL END) FILTER (WHERE c1 = 'b')
+FROM aggregate_test_100
+GROUP BY 1, 2 ORDER BY 1 LIMIT 5;
+----
+-2141999138 c -2141999138 NULL NULL NULL
+-2141451704 a -2141451704 -2141451704 NULL NULL
+-2138770630 b -2138770630 NULL -2138770630 NULL
+-2117946883 d -2117946883 NULL NULL NULL
+-2098805236 c -2098805236 NULL NULL NULL
+
+query ITIIII
+SELECT c5, c1,
+       MAX(c5),
+       MAX(CASE WHEN c1 = 'a' THEN c5 ELSE NULL END),
+       MAX(c5) FILTER (WHERE c1 = 'b'),
+       MAX(CASE WHEN c1 = 'a' THEN c5 ELSE NULL END) FILTER (WHERE c1 = 'b')
+FROM aggregate_test_100
+GROUP BY 1, 2 ORDER BY 1 LIMIT 5;
+----
+-2141999138 c -2141999138 NULL NULL NULL
+-2141451704 a -2141451704 -2141451704 NULL NULL
+-2138770630 b -2138770630 NULL -2138770630 NULL
+-2117946883 d -2117946883 NULL NULL NULL
+-2098805236 c -2098805236 NULL NULL NULL
+
+query ITIIII
+SELECT c5, c1,
+       SUM(c5),
+       SUM(CASE WHEN c1 = 'a' THEN c5 ELSE NULL END),
+       SUM(c5) FILTER (WHERE c1 = 'b'),
+       SUM(CASE WHEN c1 = 'a' THEN c5 ELSE NULL END) FILTER (WHERE c1 = 'b')
+FROM aggregate_test_100
+GROUP BY 1, 2 ORDER BY 1 LIMIT 5;
+----
+-2141999138 c -2141999138 NULL NULL NULL
+-2141451704 a -2141451704 -2141451704 NULL NULL
+-2138770630 b -2138770630 NULL -2138770630 NULL
+-2117946883 d -2117946883 NULL NULL NULL
+-2098805236 c -2098805236 NULL NULL NULL
+
+# Prepare settings to always skip aggregation after couple of batches
+statement ok
+set datafusion.execution.skip_partial_aggregation_probe_rows_threshold = 10;
+
+statement ok
+set datafusion.execution.skip_partial_aggregation_probe_ratio_threshold = 0.0;
+
+statement ok
+set datafusion.execution.target_partitions = 2;
+
+statement ok
+set datafusion.execution.batch_size = 4;
+
+# Inserting into nullable table with batch_size specified above
+# to prevent creation on single in-memory batch
+statement ok
+CREATE TABLE aggregate_test_100_null (
+  c2  TINYINT NOT NULL,
+  c5  INT NOT NULL,
+  c3  SMALLINT,
+  c11 FLOAT
+);
+
+statement ok
+INSERT INTO aggregate_test_100_null
+SELECT
+  c2,
+  c5,
+  CASE WHEN c1 = 'e' THEN NULL ELSE c3 END as c3,
+  CASE WHEN c1 = 'a' THEN NULL ELSE c11 END as c11
+FROM aggregate_test_100;
+
+# Test count varchar / int / float
+query IIII
+SELECT c2, count(c1), count(c5), count(c11) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2;
+----
+1 22 22 22
+2 22 22 22
+3 19 19 19
+4 23 23 23
+5 14 14 14
+
+# Test min / max for int / float
+query IIIRR
+SELECT c2, min(c5), max(c5), min(c11), max(c11) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2;
+----
+1 -1991133944 2143473091 0.064453244 0.89651865
+2 -2138770630 2053379412 0.055064857 0.8315913
+3 -2141999138 2030965207 0.034291923 0.9488028
+4 -1885422396 2064155045 0.028003037 0.7459874
+5 -2117946883 2025611582 0.12559289 0.87989986
+
+# Test sum for int / float
+query IIR
+SELECT c2, sum(c5), sum(c11) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2;
+----
+1 -438598674 12.153253793716
+2 -8259865364 9.577824473381
+3 1956035476 9.590891361237
+4 16155718643 9.531112968922
+5 6449337880 7.074412226677
+
+# Test count with nullable fields
+query III
+SELECT c2, count(c3), count(c11) FROM aggregate_test_100_null GROUP BY c2 ORDER BY c2;
+----
+1 19 17
+2 17 19
+3 15 13
+4 16 19
+5 12 11
+
+# Test min / max with nullable fields
+query IIIRR
+SELECT c2, min(c3), max(c3), min(c11), max(c11) FROM aggregate_test_100_null GROUP BY c2 ORDER BY c2;
+----
+1 -99 125 0.064453244 0.89651865
+2 -117 122 0.09683716 0.8315913
+3 -101 123 0.034291923 0.94669616
+4 -117 123 0.028003037 0.7085086
+5 -101 118 0.12559289 0.87989986
+
+# Test sum with nullable fields
+query IIR
+SELECT c2, sum(c3), sum(c11) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2;
+----
+1 367 12.153253793716
+2 184 9.577824473381
+3 395 9.590891361237
+4 29 9.531112968922
+5 -194 7.074412226677
+
+# Enabling PG dialect for filtered aggregates tests
+statement ok
+set datafusion.sql_parser.dialect = 'Postgres';
+
+# Test count with filter
+query III
+SELECT
+  c2,
+  count(c3) FILTER (WHERE c3 > 0),
+  count(c3) FILTER (WHERE c11 > 10)
+FROM aggregate_test_100 GROUP BY c2 ORDER BY c2;
+----
+1 13 0
+2 13 0
+3 13 0
+4 13 0
+5 5 0
+
+# Test min / max with filter
+query III
+SELECT
+  c2,
+  min(c3) FILTER (WHERE c3 > 0),
+  max(c3) FILTER (WHERE c3 < 0)
+FROM aggregate_test_100 GROUP BY c2 ORDER BY c2;
+----
+1 12 -5
+2 1 -29
+3 13 -2
+4 3 -38
+5 36 -5
+
+# Test sum with filter
+query II
+SELECT
+  c2,
+  sum(c3) FILTER (WHERE c1 != 'e' AND c3 > 0)
+FROM aggregate_test_100 GROUP BY c2 ORDER BY c2;
+----
+1 612
+2 565
+3 466
+4 417
+5 284
+
+# Test count with nullable fields and filter
+query III
+SELECT c2,
+       COUNT(c3) FILTER (WHERE c5 > 0),
+       COUNT(c11) FILTER(WHERE c5 > 0)
+FROM aggregate_test_100_null GROUP BY c2 ORDER BY c2;
+----
+1 11 6
+2 6 6
+3 8 6
+4 11 14
+5 8 7
+
+# Test count with nullable fields and nullable filter
+query III
+SELECT c2,
+       COUNT(c3) FILTER (WHERE c11 > 0.5),
+       COUNT(c11) FILTER(WHERE c3 > 0)
+FROM aggregate_test_100_null GROUP BY c2 ORDER BY c2;
+----
+1 10 9
+2 7 8
+3 3 6
+4 3 7
+5 6 3
+
+# Test min / max with nullable fields and filter
+query IIIRR
+SELECT c2,
+       MIN(c3) FILTER (WHERE c5 > 0),
+       MAX(c3) FILTER (WHERE c5 > 0),
+       MIN(c11) FILTER (WHERE c5 < 0),
+       MAX(c11) FILTER (WHERE c5 < 0)
+FROM aggregate_test_100_null GROUP BY c2 ORDER BY c2;
+----
+1 -99 103 0.2578469 0.89651865
+2 -48 93 0.09683716 0.8315913
+3 -76 123 0.034291923 0.94669616
+4 -117 123 0.06563997 0.57360977
+5 -94 68 0.12559289 0.75173044
+
+# Test min / max with nullable fields and nullable filter
+query III
+SELECT c2,
+       MIN(c3) FILTER (WHERE c11 > 0.5),
+       MAX(c3) FILTER (WHERE c11 > 0.5)
+FROM aggregate_test_100_null GROUP BY c2 ORDER BY c2;
+----
+1 -99 125
+2 -106 122
+3 -76 73
+4 -117 47
+5 -82 118
+
+# Test sum with nullable field and nullable / non-nullable filters
+query IIIRR
+SELECT c2,
+       SUM(c3) FILTER (WHERE c5 > 0),
+       SUM(c3) FILTER (WHERE c11 < 0.5),
+       SUM(c11) FILTER (WHERE c5 < 0),
+       SUM(c11) FILTER (WHERE c3 > 0)
+FROM aggregate_test_100_null GROUP BY c2 ORDER BY c2;
+----
+1 -3 77 7.214695632458 5.085060358047
+2 100 77 6.197732746601 3.150197088718
+3 109 211 2.80575042963 2.80632930994
+4 -171 56 2.10740506649 1.939846396446
+5 -86 -76 1.8741710186 1.600569307804
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index fef7bfe821744..0cbbbf3c608c0 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -207,6 +207,8 @@ datafusion.execution.parquet.statistics_enabled page
 datafusion.execution.parquet.write_batch_size 1024
 datafusion.execution.parquet.writer_version 1.0
 datafusion.execution.planning_concurrency 13
+datafusion.execution.skip_partial_aggregation_probe_ratio_threshold 0.8
+datafusion.execution.skip_partial_aggregation_probe_rows_threshold 100000
 datafusion.execution.soft_max_rows_per_output_file 50000000
 datafusion.execution.sort_in_place_threshold_bytes 1048576
 datafusion.execution.sort_spill_reservation_bytes 10485760
@@ -294,6 +296,8 @@ datafusion.execution.parquet.statistics_enabled page (writing) Sets if statistic
 datafusion.execution.parquet.write_batch_size 1024 (writing) Sets write_batch_size in bytes
 datafusion.execution.parquet.writer_version 1.0 (writing) Sets parquet writer version valid values are "1.0" and "2.0"
 datafusion.execution.planning_concurrency 13 Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system
+datafusion.execution.skip_partial_aggregation_probe_ratio_threshold 0.8 Aggregation ratio (number of distinct groups / number of input rows) threshold for skipping partial aggregation. If the value is greater then partial aggregation will skip aggregation for further input
+datafusion.execution.skip_partial_aggregation_probe_rows_threshold 100000 Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode
 datafusion.execution.soft_max_rows_per_output_file 50000000 Target number of rows in output files when writing multiple. This is a soft max, so it can be exceeded slightly. There also will be one file smaller than the limit if the total number of rows written is not roughly divisible by the soft max
 datafusion.execution.sort_in_place_threshold_bytes 1048576 When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged.
 datafusion.execution.sort_spill_reservation_bytes 10485760 Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 78d0d7b0239ff..badd07822ac2e 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -89,6 +89,8 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.enable_recursive_ctes                              | true                      | Should DataFusion support recursive CTEs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | datafusion.execution.split_file_groups_by_statistics                    | false                     | Attempt to eliminate sorts by packing & sorting files with non-overlapping statistics into the same file groups. Currently experimental                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.keep_partition_by_columns                          | false                     | Should DataFusion keep the columns used for partition_by in the output RecordBatches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.skip_partial_aggregation_probe_ratio_threshold     | 0.8                       | Aggregation ratio (number of distinct groups / number of input rows) threshold for skipping partial aggregation. If the value is greater then partial aggregation will skip aggregation for further input                                                                                                                                                                                                                                                                                                                                                                                               |
+| datafusion.execution.skip_partial_aggregation_probe_rows_threshold      | 100000                    | Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.optimizer.enable_distinct_aggregation_soft_limit             | true                      | When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.optimizer.enable_round_robin_repartition                     | true                      | When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datafusion.optimizer.enable_topk_aggregation                            | true                      | When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |

From 682bc2eff2a428482a83fba0f304facb458e4428 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Mon, 5 Aug 2024 19:00:31 +0800
Subject: [PATCH 215/357] Improve log func stability (#11808)

---
 datafusion/functions/src/math/log.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs
index 0e181aa612503..ad7cff1f7149f 100644
--- a/datafusion/functions/src/math/log.rs
+++ b/datafusion/functions/src/math/log.rs
@@ -261,10 +261,10 @@ mod tests {
                     .expect("failed to convert result to a Float64Array");
 
                 assert_eq!(floats.len(), 4);
-                assert_eq!(floats.value(0), 3.0);
-                assert_eq!(floats.value(1), 2.0);
-                assert_eq!(floats.value(2), 4.0);
-                assert_eq!(floats.value(3), 4.0);
+                assert!((floats.value(0) - 3.0).abs() < 1e-10);
+                assert!((floats.value(1) - 2.0).abs() < 1e-10);
+                assert!((floats.value(2) - 4.0).abs() < 1e-10);
+                assert!((floats.value(3) - 4.0).abs() < 1e-10);
             }
             ColumnarValue::Scalar(_) => {
                 panic!("Expected an array value")
@@ -291,10 +291,10 @@ mod tests {
                     .expect("failed to convert result to a Float32Array");
 
                 assert_eq!(floats.len(), 4);
-                assert_eq!(floats.value(0), 3.0);
-                assert_eq!(floats.value(1), 2.0);
-                assert_eq!(floats.value(2), 4.0);
-                assert_eq!(floats.value(3), 4.0);
+                assert!((floats.value(0) - 3.0).abs() < f32::EPSILON);
+                assert!((floats.value(1) - 2.0).abs() < f32::EPSILON);
+                assert!((floats.value(2) - 4.0).abs() < f32::EPSILON);
+                assert!((floats.value(3) - 4.0).abs() < f32::EPSILON);
             }
             ColumnarValue::Scalar(_) => {
                 panic!("Expected an array value")

From 6aad19fad370cccf4caa16fcca552bf246f59d93 Mon Sep 17 00:00:00 2001
From: mertak-synnada <mertak67+synaada@gmail.com>
Date: Mon, 5 Aug 2024 15:59:12 +0300
Subject: [PATCH 216/357] add valid distinct case for aggregate.slt (#11814)

---
 .../src/replace_distinct_aggregate.rs         |  9 +++---
 .../sqllogictest/test_files/aggregate.slt     | 28 +++++++++++++++++++
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs
index f73eeacfbf0e1..c887192f6370a 100644
--- a/datafusion/optimizer/src/replace_distinct_aggregate.rs
+++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -82,10 +82,11 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
                 for dep in input.schema().functional_dependencies().iter() {
                     // If distinct is exactly the same with a previous GROUP BY, we can
                     // simply remove it:
-                    if dep.source_indices[..field_count]
-                        .iter()
-                        .enumerate()
-                        .all(|(idx, f_idx)| idx == *f_idx)
+                    if dep.source_indices.len() >= field_count
+                        && dep.source_indices[..field_count]
+                            .iter()
+                            .enumerate()
+                            .all(|(idx, f_idx)| idx == *f_idx)
                     {
                         return Ok(Transformed::yes(input.as_ref().clone()));
                     }
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index abeeb767b9488..6513258f879e2 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -4521,6 +4521,34 @@ false
 true
 NULL
 
+#
+# Add valid distinct case as aggregation plan test
+#
+
+query TT
+EXPLAIN SELECT DISTINCT c3, min(c1) FROM aggregate_test_100 group by c3 limit 5;
+----
+logical_plan
+01)Limit: skip=0, fetch=5
+02)--Aggregate: groupBy=[[aggregate_test_100.c3, MIN(aggregate_test_100.c1)]], aggr=[[]]
+03)----Aggregate: groupBy=[[aggregate_test_100.c3]], aggr=[[MIN(aggregate_test_100.c1)]]
+04)------TableScan: aggregate_test_100 projection=[c1, c3]
+physical_plan
+01)GlobalLimitExec: skip=0, fetch=5
+02)--CoalescePartitionsExec
+03)----LocalLimitExec: fetch=5
+04)------AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3, MIN(aggregate_test_100.c1)@1 as MIN(aggregate_test_100.c1)], aggr=[], lim=[5]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------RepartitionExec: partitioning=Hash([c3@0, MIN(aggregate_test_100.c1)@1], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[c3@0 as c3, MIN(aggregate_test_100.c1)@1 as MIN(aggregate_test_100.c1)], aggr=[], lim=[5]
+08)--------------AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3], aggr=[MIN(aggregate_test_100.c1)]
+09)----------------CoalesceBatchesExec: target_batch_size=8192
+10)------------------RepartitionExec: partitioning=Hash([c3@0], 4), input_partitions=4
+11)--------------------AggregateExec: mode=Partial, gby=[c3@1 as c3], aggr=[MIN(aggregate_test_100.c1)]
+12)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+13)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true
+
+
 #
 # Push limit into distinct group-by aggregation tests
 #

From 45d85b1d18ce33e2b42e8d3a5087addd8399a116 Mon Sep 17 00:00:00 2001
From: Edmondo Porcu <edmondo.porcu@gmail.com>
Date: Mon, 5 Aug 2024 10:49:46 -0400
Subject: [PATCH 217/357] Change name of MAX/MIN udaf to lowercase max/min 
 (#11795)

* Only row 8 diff

* MAX -> max, MIN -> min

* Updating tests

* Removed aliases
---
 datafusion/core/src/dataframe/mod.rs          |   2 +-
 datafusion/core/src/execution/context/mod.rs  |   6 +-
 datafusion/core/src/lib.rs                    |   4 +-
 .../core/tests/custom_sources_cases/mod.rs    |   4 +-
 .../core/tests/expr_api/parse_sql_expr.rs     |   6 +-
 datafusion/core/tests/sql/explain_analyze.rs  |   6 +-
 datafusion/expr/src/expr_rewriter/order_by.rs |  12 +-
 datafusion/expr/src/test/function_stub.rs     |  12 +-
 datafusion/functions-aggregate/src/min_max.rs |  12 +-
 .../src/analyzer/count_wildcard_rule.rs       |   2 +-
 .../optimizer/src/optimize_projections/mod.rs |  32 +-
 datafusion/optimizer/src/push_down_limit.rs   |   6 +-
 .../optimizer/src/scalar_subquery_to_join.rs  | 156 ++++----
 .../simplify_expressions/simplify_exprs.rs    |   2 +-
 .../src/single_distinct_to_groupby.rs         |  22 +-
 datafusion/physical-expr/src/aggregate/mod.rs |   1 -
 .../src/aggregate/moving_min_max.rs           | 335 ------------------
 datafusion/sql/tests/cases/plan_to_sql.rs     |   4 +-
 datafusion/sql/tests/sql_integration.rs       | 312 ++++++++--------
 .../sqllogictest/test_files/aggregate.slt     |  18 +-
 .../test_files/aggregates_topk.slt            |  66 ++--
 datafusion/sqllogictest/test_files/expr.slt   |   8 +-
 .../sqllogictest/test_files/group_by.slt      |  44 +--
 .../sqllogictest/test_files/tpch/q15.slt.part |  12 +-
 .../sqllogictest/test_files/tpch/q2.slt.part  |  16 +-
 datafusion/sqllogictest/test_files/union.slt  |   8 +-
 datafusion/sqllogictest/test_files/update.slt |   4 +-
 datafusion/sqllogictest/test_files/window.slt | 118 +++---
 .../tests/cases/consumer_integration.rs       |   2 +-
 .../library-user-guide/using-the-sql-api.md   |   4 +-
 30 files changed, 446 insertions(+), 790 deletions(-)
 delete mode 100644 datafusion/physical-expr/src/aggregate/moving_min_max.rs

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 6ec44b33f89e7..c4c5a4aa08342 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -2060,7 +2060,7 @@ mod tests {
 
         assert_batches_sorted_eq!(
             ["+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
-                "| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | avg(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | count(aggregate_test_100.c12) | count(DISTINCT aggregate_test_100.c12) |",
+                "| c1 | min(aggregate_test_100.c12) | max(aggregate_test_100.c12) | avg(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | count(aggregate_test_100.c12) | count(DISTINCT aggregate_test_100.c12) |",
                 "+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
                 "| a  | 0.02182578039211991         | 0.9800193410444061          | 0.48754517466109415         | 10.238448667882977          | 21                            | 21                                     |",
                 "| b  | 0.04893135681998029         | 0.9185813970744787          | 0.41040709263815384         | 7.797734760124923           | 19                            | 19                                     |",
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 24704bc794c28..e6bb1483e2565 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -159,7 +159,7 @@ where
 /// assert_batches_eq!(
 ///  &[
 ///    "+---+----------------+",
-///    "| a | MIN(?table?.b) |",
+///    "| a | min(?table?.b) |",
 ///    "+---+----------------+",
 ///    "| 1 | 2              |",
 ///    "+---+----------------+",
@@ -182,14 +182,14 @@ where
 /// let mut ctx = SessionContext::new();
 /// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
 /// let results = ctx
-///   .sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")
+///   .sql("SELECT a, min(b) FROM example GROUP BY a LIMIT 100")
 ///   .await?
 ///   .collect()
 ///   .await?;
 /// assert_batches_eq!(
 ///  &[
 ///    "+---+----------------+",
-///    "| a | MIN(example.b) |",
+///    "| a | min(example.b) |",
 ///    "+---+----------------+",
 ///    "| 1 | 2              |",
 ///    "+---+----------------+",
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 3bb0636652c08..d4b82f288bdd3 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -76,7 +76,7 @@
 //!
 //! let expected = vec![
 //!     "+---+----------------+",
-//!     "| a | MIN(?table?.b) |",
+//!     "| a | min(?table?.b) |",
 //!     "+---+----------------+",
 //!     "| 1 | 2              |",
 //!     "+---+----------------+"
@@ -114,7 +114,7 @@
 //!
 //! let expected = vec![
 //!     "+---+----------------+",
-//!     "| a | MIN(example.b) |",
+//!     "| a | min(example.b) |",
 //!     "+---+----------------+",
 //!     "| 1 | 2              |",
 //!     "+---+----------------+"
diff --git a/datafusion/core/tests/custom_sources_cases/mod.rs b/datafusion/core/tests/custom_sources_cases/mod.rs
index 673fafe55b533..c12dd4e1b0eee 100644
--- a/datafusion/core/tests/custom_sources_cases/mod.rs
+++ b/datafusion/core/tests/custom_sources_cases/mod.rs
@@ -284,8 +284,8 @@ async fn optimizers_catch_all_statistics() {
     let expected = RecordBatch::try_new(
         Arc::new(Schema::new(vec![
             Field::new("count(*)", DataType::Int64, false),
-            Field::new("MIN(test.c1)", DataType::Int32, false),
-            Field::new("MAX(test.c1)", DataType::Int32, false),
+            Field::new("min(test.c1)", DataType::Int32, false),
+            Field::new("max(test.c1)", DataType::Int32, false),
         ])),
         vec![
             Arc::new(Int64Array::from(vec![4])),
diff --git a/datafusion/core/tests/expr_api/parse_sql_expr.rs b/datafusion/core/tests/expr_api/parse_sql_expr.rs
index a3defceee247c..cc049f0004d97 100644
--- a/datafusion/core/tests/expr_api/parse_sql_expr.rs
+++ b/datafusion/core/tests/expr_api/parse_sql_expr.rs
@@ -49,9 +49,9 @@ async fn round_trip_parse_sql_expr() -> Result<()> {
         "((a = 10) AND b NOT IN (20, 30))",
         "sum(a)",
         "(sum(a) + 1)",
-        "(MIN(a) + MAX(b))",
-        "(MIN(a) + (MAX(b) * sum(c)))",
-        "(MIN(a) + ((MAX(b) * sum(c)) / 10))",
+        "(min(a) + max(b))",
+        "(min(a) + (max(b) * sum(c)))",
+        "(min(a) + ((max(b) * sum(c)) / 10))",
     ];
 
     for test in tests {
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index 07be00fc35159..4c1f5efaf9899 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -615,11 +615,11 @@ async fn test_physical_plan_display_indent() {
         "GlobalLimitExec: skip=0, fetch=10",
         "  SortPreservingMergeExec: [the_min@2 DESC], fetch=10",
         "    SortExec: TopK(fetch=10), expr=[the_min@2 DESC], preserve_partitioning=[true]",
-        "      ProjectionExec: expr=[c1@0 as c1, MAX(aggregate_test_100.c12)@1 as MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)@2 as the_min]",
-        "        AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)]",
+        "      ProjectionExec: expr=[c1@0 as c1, max(aggregate_test_100.c12)@1 as max(aggregate_test_100.c12), min(aggregate_test_100.c12)@2 as the_min]",
+        "        AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]",
         "          CoalesceBatchesExec: target_batch_size=4096",
         "            RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000",
-        "              AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)]",
+        "              AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]",
         "                CoalesceBatchesExec: target_batch_size=4096",
         "                  FilterExec: c12@1 < 10",
         "                    RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1",
diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs
index 2efdcae1a790c..3d79caa21fde3 100644
--- a/datafusion/expr/src/expr_rewriter/order_by.rs
+++ b/datafusion/expr/src/expr_rewriter/order_by.rs
@@ -25,7 +25,7 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{Column, Result};
 
 /// Rewrite sort on aggregate expressions to sort on the column of aggregate output
-/// For example, `max(x)` is written to `col("MAX(x)")`
+/// For example, `max(x)` is written to `col("max(x)")`
 pub fn rewrite_sort_cols_by_aggs(
     exprs: impl IntoIterator<Item = impl Into<Expr>>,
     plan: &LogicalPlan,
@@ -108,7 +108,7 @@ fn rewrite_in_terms_of_projection(
         };
 
         // expr is an actual expr like min(t.c2), but we are looking
-        // for a column with the same "MIN(C2)", so translate there
+        // for a column with the same "min(C2)", so translate there
         let name = normalized_expr.display_name()?;
 
         let search_col = Expr::Column(Column {
@@ -237,15 +237,15 @@ mod test {
                 expected: sort(col("c1")),
             },
             TestCase {
-                desc: r#"min(c2) --> "MIN(c2)" -- (column *named* "min(t.c2)"!)"#,
+                desc: r#"min(c2) --> "min(c2)" -- (column *named* "min(t.c2)"!)"#,
                 input: sort(min(col("c2"))),
-                expected: sort(col("MIN(t.c2)")),
+                expected: sort(col("min(t.c2)")),
             },
             TestCase {
-                desc: r#"c1 + min(c2) --> "c1 + MIN(c2)" -- (column *named* "min(t.c2)"!)"#,
+                desc: r#"c1 + min(c2) --> "c1 + min(c2)" -- (column *named* "min(t.c2)"!)"#,
                 input: sort(col("c1") + min(col("c2"))),
                 // should be "c1" not t.c1
-                expected: sort(col("c1") + col("MIN(t.c2)")),
+                expected: sort(col("c1") + col("min(t.c2)")),
             },
             TestCase {
                 desc: r#"avg(c3) --> "avg(t.c3)" as average (column *named* "avg(t.c3)", aliased)"#,
diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs
index 72b73ccee44fb..22af84db3f86a 100644
--- a/datafusion/expr/src/test/function_stub.rs
+++ b/datafusion/expr/src/test/function_stub.rs
@@ -305,7 +305,6 @@ pub fn min(expr: Expr) -> Expr {
 /// Testing stub implementation of Min aggregate
 pub struct Min {
     signature: Signature,
-    aliases: Vec<String>,
 }
 
 impl std::fmt::Debug for Min {
@@ -326,7 +325,6 @@ impl Default for Min {
 impl Min {
     pub fn new() -> Self {
         Self {
-            aliases: vec!["min".to_string()],
             signature: Signature::variadic_any(Volatility::Immutable),
         }
     }
@@ -338,7 +336,7 @@ impl AggregateUDFImpl for Min {
     }
 
     fn name(&self) -> &str {
-        "MIN"
+        "min"
     }
 
     fn signature(&self) -> &Signature {
@@ -358,7 +356,7 @@ impl AggregateUDFImpl for Min {
     }
 
     fn aliases(&self) -> &[String] {
-        &self.aliases
+        &[]
     }
 
     fn create_groups_accumulator(
@@ -392,7 +390,6 @@ pub fn max(expr: Expr) -> Expr {
 /// Testing stub implementation of MAX aggregate
 pub struct Max {
     signature: Signature,
-    aliases: Vec<String>,
 }
 
 impl std::fmt::Debug for Max {
@@ -413,7 +410,6 @@ impl Default for Max {
 impl Max {
     pub fn new() -> Self {
         Self {
-            aliases: vec!["max".to_string()],
             signature: Signature::variadic_any(Volatility::Immutable),
         }
     }
@@ -425,7 +421,7 @@ impl AggregateUDFImpl for Max {
     }
 
     fn name(&self) -> &str {
-        "MAX"
+        "max"
     }
 
     fn signature(&self) -> &Signature {
@@ -445,7 +441,7 @@ impl AggregateUDFImpl for Max {
     }
 
     fn aliases(&self) -> &[String] {
-        &self.aliases
+        &[]
     }
 
     fn create_groups_accumulator(
diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
index 4d743983411dc..18028e358b211 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -85,14 +85,12 @@ fn get_min_max_result_type(input_types: &[DataType]) -> Result<Vec<DataType>> {
 // MAX aggregate UDF
 #[derive(Debug)]
 pub struct Max {
-    aliases: Vec<String>,
     signature: Signature,
 }
 
 impl Max {
     pub fn new() -> Self {
         Self {
-            aliases: vec!["max".to_owned()],
             signature: Signature::user_defined(Volatility::Immutable),
         }
     }
@@ -146,7 +144,7 @@ impl AggregateUDFImpl for Max {
     }
 
     fn name(&self) -> &str {
-        "MAX"
+        "max"
     }
 
     fn signature(&self) -> &Signature {
@@ -162,7 +160,7 @@ impl AggregateUDFImpl for Max {
     }
 
     fn aliases(&self) -> &[String] {
-        &self.aliases
+        &[]
     }
 
     fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
@@ -891,14 +889,12 @@ impl Accumulator for SlidingMaxAccumulator {
 #[derive(Debug)]
 pub struct Min {
     signature: Signature,
-    aliases: Vec<String>,
 }
 
 impl Min {
     pub fn new() -> Self {
         Self {
             signature: Signature::user_defined(Volatility::Immutable),
-            aliases: vec!["min".to_owned()],
         }
     }
 }
@@ -915,7 +911,7 @@ impl AggregateUDFImpl for Min {
     }
 
     fn name(&self) -> &str {
-        "MIN"
+        "min"
     }
 
     fn signature(&self) -> &Signature {
@@ -931,7 +927,7 @@ impl AggregateUDFImpl for Min {
     }
 
     fn aliases(&self) -> &[String] {
-        &self.aliases
+        &[]
     }
 
     fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index e2da6c66abc4c..8ff00917dcb13 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -271,7 +271,7 @@ mod tests {
             .build()?;
 
         let expected = "Projection: count(Int64(1)) AS count(*) [count(*):Int64]\
-        \n  Aggregate: groupBy=[[]], aggr=[[MAX(count(Int64(1))) AS MAX(count(*))]] [MAX(count(*)):Int64;N]\
+        \n  Aggregate: groupBy=[[]], aggr=[[max(count(Int64(1))) AS max(count(*))]] [max(count(*)):Int64;N]\
         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
         assert_plan_eq(plan, expected)
     }
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index a307d0ae0a0b3..d014b9149aabf 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -1361,7 +1361,7 @@ mod tests {
             .aggregate(Vec::<Expr>::new(), vec![max(col("b"))])?
             .build()?;
 
-        let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(test.b)]]\
+        let expected = "Aggregate: groupBy=[[]], aggr=[[max(test.b)]]\
         \n  TableScan: test projection=[b]";
 
         assert_optimized_plan_equal(plan, expected)
@@ -1375,7 +1375,7 @@ mod tests {
             .aggregate(vec![col("c")], vec![max(col("b"))])?
             .build()?;
 
-        let expected = "Aggregate: groupBy=[[test.c]], aggr=[[MAX(test.b)]]\
+        let expected = "Aggregate: groupBy=[[test.c]], aggr=[[max(test.b)]]\
         \n  TableScan: test projection=[b, c]";
 
         assert_optimized_plan_equal(plan, expected)
@@ -1390,7 +1390,7 @@ mod tests {
             .aggregate(vec![col("c")], vec![max(col("b"))])?
             .build()?;
 
-        let expected = "Aggregate: groupBy=[[a.c]], aggr=[[MAX(a.b)]]\
+        let expected = "Aggregate: groupBy=[[a.c]], aggr=[[max(a.b)]]\
         \n  SubqueryAlias: a\
         \n    TableScan: test projection=[b, c]";
 
@@ -1406,7 +1406,7 @@ mod tests {
             .aggregate(Vec::<Expr>::new(), vec![max(col("b"))])?
             .build()?;
 
-        let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(test.b)]]\
+        let expected = "Aggregate: groupBy=[[]], aggr=[[max(test.b)]]\
         \n  Projection: test.b\
         \n    Filter: test.c > Int32(1)\
         \n      TableScan: test projection=[b, c]";
@@ -1422,7 +1422,7 @@ mod tests {
         // "tag.one", not a column named "one" in a table named "tag"):
         //
         // Projection: tag.one
-        //   Aggregate: groupBy=[], aggr=[MAX("tag.one") AS "tag.one"]
+        //   Aggregate: groupBy=[], aggr=[max("tag.one") AS "tag.one"]
         //    TableScan
         let plan = table_scan(Some("m4"), &schema, None)?
             .aggregate(
@@ -1433,7 +1433,7 @@ mod tests {
             .build()?;
 
         let expected = "\
-        Aggregate: groupBy=[[]], aggr=[[MAX(m4.tag.one) AS tag.one]]\
+        Aggregate: groupBy=[[]], aggr=[[max(m4.tag.one) AS tag.one]]\
         \n  TableScan: m4 projection=[tag.one]";
 
         assert_optimized_plan_equal(plan, expected)
@@ -1768,11 +1768,11 @@ mod tests {
             .aggregate(vec![col("c")], vec![max(col("a"))])?
             .build()?;
 
-        assert_fields_eq(&plan, vec!["c", "MAX(test.a)"]);
+        assert_fields_eq(&plan, vec!["c", "max(test.a)"]);
 
         let plan = optimize(plan).expect("failed to optimize plan");
         let expected = "\
-        Aggregate: groupBy=[[test.c]], aggr=[[MAX(test.a)]]\
+        Aggregate: groupBy=[[test.c]], aggr=[[max(test.a)]]\
         \n  Filter: test.c > Int32(1)\
         \n    Projection: test.c, test.a\
         \n      TableScan: test projection=[a, c]";
@@ -1862,14 +1862,14 @@ mod tests {
         let plan = LogicalPlanBuilder::from(table_scan)
             .aggregate(vec![col("a"), col("c")], vec![max(col("b")), min(col("b"))])?
             .filter(col("c").gt(lit(1)))?
-            .project(vec![col("c"), col("a"), col("MAX(test.b)")])?
+            .project(vec![col("c"), col("a"), col("max(test.b)")])?
             .build()?;
 
-        assert_fields_eq(&plan, vec!["c", "a", "MAX(test.b)"]);
+        assert_fields_eq(&plan, vec!["c", "a", "max(test.b)"]);
 
-        let expected = "Projection: test.c, test.a, MAX(test.b)\
+        let expected = "Projection: test.c, test.a, max(test.b)\
         \n  Filter: test.c > Int32(1)\
-        \n    Aggregate: groupBy=[[test.a, test.c]], aggr=[[MAX(test.b)]]\
+        \n    Aggregate: groupBy=[[test.a, test.c]], aggr=[[max(test.b)]]\
         \n      TableScan: test projection=[a, b, c]";
 
         assert_optimized_plan_equal(plan, expected)
@@ -1937,10 +1937,10 @@ mod tests {
             .project(vec![col1, col2])?
             .build()?;
 
-        let expected = "Projection: MAX(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MAX(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n  WindowAggr: windowExpr=[[MAX(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    Projection: test.b, MAX(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n      WindowAggr: windowExpr=[[MAX(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
+        let expected = "Projection: max(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, max(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
+        \n  WindowAggr: windowExpr=[[max(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
+        \n    Projection: test.b, max(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
+        \n      WindowAggr: windowExpr=[[max(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
         \n        TableScan: test projection=[a, b]";
 
         assert_optimized_plan_equal(plan, expected)
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index d7da3871ee89b..612aac1d152d7 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -375,7 +375,7 @@ mod test {
 
         // Limit should *not* push down aggregate node
         let expected = "Limit: skip=0, fetch=1000\
-        \n  Aggregate: groupBy=[[test.a]], aggr=[[MAX(test.b)]]\
+        \n  Aggregate: groupBy=[[test.a]], aggr=[[max(test.b)]]\
         \n    TableScan: test";
 
         assert_optimized_plan_equal(plan, expected)
@@ -447,7 +447,7 @@ mod test {
 
         // Limit should use deeper LIMIT 1000, but Limit 10 shouldn't push down aggregation
         let expected = "Limit: skip=0, fetch=10\
-        \n  Aggregate: groupBy=[[test.a]], aggr=[[MAX(test.b)]]\
+        \n  Aggregate: groupBy=[[test.a]], aggr=[[max(test.b)]]\
         \n    Limit: skip=0, fetch=1000\
         \n      TableScan: test, fetch=1000";
 
@@ -548,7 +548,7 @@ mod test {
 
         // Limit should *not* push down aggregate node
         let expected = "Limit: skip=10, fetch=1000\
-        \n  Aggregate: groupBy=[[test.a]], aggr=[[MAX(test.b)]]\
+        \n  Aggregate: groupBy=[[test.a]], aggr=[[max(test.b)]]\
         \n    TableScan: test";
 
         assert_optimized_plan_equal(plan, expected)
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index fbec675f6fc48..3c66da21aff65 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -422,17 +422,17 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: Int32(1) < __scalar_sq_1.MAX(orders.o_custkey) AND Int32(1) < __scalar_sq_2.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: __scalar_sq_2.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n      Left Join:  Filter: __scalar_sq_1.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n  Filter: Int32(1) < __scalar_sq_1.max(orders.o_custkey) AND Int32(1) < __scalar_sq_2.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: __scalar_sq_2.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n      Left Join:  Filter: __scalar_sq_1.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n        SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n            Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
+        \n        SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n          Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n            Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
         \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n      SubqueryAlias: __scalar_sq_2 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_2 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
@@ -519,12 +519,12 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
         \n            Filter: orders.o_orderkey = Int32(1) [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
         \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
@@ -557,12 +557,12 @@ mod tests {
 
         // it will optimize, but fail for the same reason the unoptimized query would
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
-        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N]\
+        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
-        \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
-        \n          Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N]\
+        \n        Projection: max(orders.o_custkey) [max(orders.o_custkey):Int64;N]\
+        \n          Aggregate: groupBy=[[]], aggr=[[max(orders.o_custkey)]] [max(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
@@ -589,12 +589,12 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
-        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N]\
+        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
-        \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
-        \n          Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N]\
+        \n        Projection: max(orders.o_custkey) [max(orders.o_custkey):Int64;N]\
+        \n          Aggregate: groupBy=[[]], aggr=[[max(orders.o_custkey)]] [max(orders.o_custkey):Int64;N]\
         \n            Filter: orders.o_custkey = orders.o_custkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
         \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
@@ -719,7 +719,7 @@ mod tests {
                         .eq(col("orders.o_custkey")),
                 )?
                 .aggregate(Vec::<Expr>::new(), vec![max(col("orders.o_custkey"))])?
-                .project(vec![col("MAX(orders.o_custkey)").add(lit(1))])?
+                .project(vec![col("max(orders.o_custkey)").add(lit(1))])?
                 .build()?,
         );
 
@@ -729,12 +729,12 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) + Int32(1) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) + Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64]\
-        \n        Projection: MAX(orders.o_custkey) + Int32(1), orders.o_custkey [MAX(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64]\
+        \n        Projection: max(orders.o_custkey) + Int32(1), orders.o_custkey [max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64]\
+        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
@@ -795,12 +795,12 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey >= __scalar_sq_1.MAX(orders.o_custkey) AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n  Filter: customer.c_custkey >= __scalar_sq_1.max(orders.o_custkey) AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
@@ -834,12 +834,12 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
@@ -874,12 +874,12 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) OR customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) OR customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
@@ -907,12 +907,12 @@ mod tests {
             .build()?;
 
         let expected = "Projection: test.c [c:UInt32]\
-        \n  Filter: test.c < __scalar_sq_1.MIN(sq.c) [a:UInt32, b:UInt32, c:UInt32, MIN(sq.c):UInt32;N, a:UInt32;N]\
-        \n    Left Join:  Filter: test.a = __scalar_sq_1.a [a:UInt32, b:UInt32, c:UInt32, MIN(sq.c):UInt32;N, a:UInt32;N]\
+        \n  Filter: test.c < __scalar_sq_1.min(sq.c) [a:UInt32, b:UInt32, c:UInt32, min(sq.c):UInt32;N, a:UInt32;N]\
+        \n    Left Join:  Filter: test.a = __scalar_sq_1.a [a:UInt32, b:UInt32, c:UInt32, min(sq.c):UInt32;N, a:UInt32;N]\
         \n      TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n      SubqueryAlias: __scalar_sq_1 [MIN(sq.c):UInt32;N, a:UInt32]\
-        \n        Projection: MIN(sq.c), sq.a [MIN(sq.c):UInt32;N, a:UInt32]\
-        \n          Aggregate: groupBy=[[sq.a]], aggr=[[MIN(sq.c)]] [a:UInt32, MIN(sq.c):UInt32;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [min(sq.c):UInt32;N, a:UInt32]\
+        \n        Projection: min(sq.c), sq.a [min(sq.c):UInt32;N, a:UInt32]\
+        \n          Aggregate: groupBy=[[sq.a]], aggr=[[min(sq.c)]] [a:UInt32, min(sq.c):UInt32;N]\
         \n            TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
@@ -939,12 +939,12 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey < __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
-        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n  Filter: customer.c_custkey < __scalar_sq_1.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N]\
+        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
-        \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
-        \n          Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N]\
+        \n        Projection: max(orders.o_custkey) [max(orders.o_custkey):Int64;N]\
+        \n          Aggregate: groupBy=[[]], aggr=[[max(orders.o_custkey)]] [max(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
@@ -970,12 +970,12 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
-        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N]\
+        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
-        \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
-        \n          Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N]\
+        \n        Projection: max(orders.o_custkey) [max(orders.o_custkey):Int64;N]\
+        \n          Aggregate: groupBy=[[]], aggr=[[max(orders.o_custkey)]] [max(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
@@ -1022,17 +1022,17 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.MIN(orders.o_custkey) AND __scalar_sq_2.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, o_custkey:Int64;N, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, o_custkey:Int64;N, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n      Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.min(orders.o_custkey) AND __scalar_sq_2.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, o_custkey:Int64;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, o_custkey:Int64;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n      Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n        SubqueryAlias: __scalar_sq_1 [MIN(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Projection: MIN(orders.o_custkey), orders.o_custkey [MIN(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n            Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MIN(orders.o_custkey)]] [o_custkey:Int64, MIN(orders.o_custkey):Int64;N]\
+        \n        SubqueryAlias: __scalar_sq_1 [min(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n          Projection: min(orders.o_custkey), orders.o_custkey [min(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n            Aggregate: groupBy=[[orders.o_custkey]], aggr=[[min(orders.o_custkey)]] [o_custkey:Int64, min(orders.o_custkey):Int64;N]\
         \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n      SubqueryAlias: __scalar_sq_2 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_2 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
@@ -1071,17 +1071,17 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.MIN(orders.o_custkey) AND __scalar_sq_2.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, MAX(orders.o_custkey):Int64;N]\
-        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, MAX(orders.o_custkey):Int64;N]\
-        \n      Left Join:  [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N]\
+        \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.min(orders.o_custkey) AND __scalar_sq_2.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, max(orders.o_custkey):Int64;N]\
+        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, max(orders.o_custkey):Int64;N]\
+        \n      Left Join:  [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N]\
         \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n        SubqueryAlias: __scalar_sq_1 [MIN(orders.o_custkey):Int64;N]\
-        \n          Projection: MIN(orders.o_custkey) [MIN(orders.o_custkey):Int64;N]\
-        \n            Aggregate: groupBy=[[]], aggr=[[MIN(orders.o_custkey)]] [MIN(orders.o_custkey):Int64;N]\
+        \n        SubqueryAlias: __scalar_sq_1 [min(orders.o_custkey):Int64;N]\
+        \n          Projection: min(orders.o_custkey) [min(orders.o_custkey):Int64;N]\
+        \n            Aggregate: groupBy=[[]], aggr=[[min(orders.o_custkey)]] [min(orders.o_custkey):Int64;N]\
         \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n      SubqueryAlias: __scalar_sq_2 [MAX(orders.o_custkey):Int64;N]\
-        \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
-        \n          Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_2 [max(orders.o_custkey):Int64;N]\
+        \n        Projection: max(orders.o_custkey) [max(orders.o_custkey):Int64;N]\
+        \n          Aggregate: groupBy=[[]], aggr=[[max(orders.o_custkey)]] [max(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index cb9ec3024d939..e13ce382a3e0b 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -401,7 +401,7 @@ mod tests {
             .build()?;
 
         let expected = "\
-        Aggregate: groupBy=[[test.a, test.c]], aggr=[[MAX(test.b) AS MAX(test.b = Boolean(true)), MIN(test.b)]]\
+        Aggregate: groupBy=[[test.a, test.c]], aggr=[[max(test.b) AS max(test.b = Boolean(true)), min(test.b)]]\
         \n  Projection: test.a, test.c, test.b\
         \n    TableScan: test";
 
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index 9a0fab14d3e0e..5b43957924474 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -324,7 +324,7 @@ mod tests {
 
         // Do nothing
         let expected =
-            "Aggregate: groupBy=[[]], aggr=[[MAX(test.b)]] [MAX(test.b):UInt32;N]\
+            "Aggregate: groupBy=[[]], aggr=[[max(test.b)]] [max(test.b):UInt32;N]\
                             \n  TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_equal(plan, expected)
@@ -467,8 +467,8 @@ mod tests {
             )?
             .build()?;
         // Should work
-        let expected = "Projection: test.a, count(alias1) AS count(DISTINCT test.b), MAX(alias1) AS MAX(DISTINCT test.b) [a:UInt32, count(DISTINCT test.b):Int64, MAX(DISTINCT test.b):UInt32;N]\
-                            \n  Aggregate: groupBy=[[test.a]], aggr=[[count(alias1), MAX(alias1)]] [a:UInt32, count(alias1):Int64, MAX(alias1):UInt32;N]\
+        let expected = "Projection: test.a, count(alias1) AS count(DISTINCT test.b), max(alias1) AS max(DISTINCT test.b) [a:UInt32, count(DISTINCT test.b):Int64, max(DISTINCT test.b):UInt32;N]\
+                            \n  Aggregate: groupBy=[[test.a]], aggr=[[count(alias1), max(alias1)]] [a:UInt32, count(alias1):Int64, max(alias1):UInt32;N]\
                             \n    Aggregate: groupBy=[[test.a, test.b AS alias1]], aggr=[[]] [a:UInt32, alias1:UInt32]\
                             \n      TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
 
@@ -525,8 +525,8 @@ mod tests {
             )?
             .build()?;
         // Should work
-        let expected = "Projection: test.a, sum(alias2) AS sum(test.c), count(alias1) AS count(DISTINCT test.b), MAX(alias1) AS MAX(DISTINCT test.b) [a:UInt32, sum(test.c):UInt64;N, count(DISTINCT test.b):Int64, MAX(DISTINCT test.b):UInt32;N]\
-                            \n  Aggregate: groupBy=[[test.a]], aggr=[[sum(alias2), count(alias1), MAX(alias1)]] [a:UInt32, sum(alias2):UInt64;N, count(alias1):Int64, MAX(alias1):UInt32;N]\
+        let expected = "Projection: test.a, sum(alias2) AS sum(test.c), count(alias1) AS count(DISTINCT test.b), max(alias1) AS max(DISTINCT test.b) [a:UInt32, sum(test.c):UInt64;N, count(DISTINCT test.b):Int64, max(DISTINCT test.b):UInt32;N]\
+                            \n  Aggregate: groupBy=[[test.a]], aggr=[[sum(alias2), count(alias1), max(alias1)]] [a:UInt32, sum(alias2):UInt64;N, count(alias1):Int64, max(alias1):UInt32;N]\
                             \n    Aggregate: groupBy=[[test.a, test.b AS alias1]], aggr=[[sum(test.c) AS alias2]] [a:UInt32, alias1:UInt32, alias2:UInt64;N]\
                             \n      TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
 
@@ -544,9 +544,9 @@ mod tests {
             )?
             .build()?;
         // Should work
-        let expected = "Projection: test.a, sum(alias2) AS sum(test.c), MAX(alias3) AS MAX(test.c), count(alias1) AS count(DISTINCT test.b) [a:UInt32, sum(test.c):UInt64;N, MAX(test.c):UInt32;N, count(DISTINCT test.b):Int64]\
-                            \n  Aggregate: groupBy=[[test.a]], aggr=[[sum(alias2), MAX(alias3), count(alias1)]] [a:UInt32, sum(alias2):UInt64;N, MAX(alias3):UInt32;N, count(alias1):Int64]\
-                            \n    Aggregate: groupBy=[[test.a, test.b AS alias1]], aggr=[[sum(test.c) AS alias2, MAX(test.c) AS alias3]] [a:UInt32, alias1:UInt32, alias2:UInt64;N, alias3:UInt32;N]\
+        let expected = "Projection: test.a, sum(alias2) AS sum(test.c), max(alias3) AS max(test.c), count(alias1) AS count(DISTINCT test.b) [a:UInt32, sum(test.c):UInt64;N, max(test.c):UInt32;N, count(DISTINCT test.b):Int64]\
+                            \n  Aggregate: groupBy=[[test.a]], aggr=[[sum(alias2), max(alias3), count(alias1)]] [a:UInt32, sum(alias2):UInt64;N, max(alias3):UInt32;N, count(alias1):Int64]\
+                            \n    Aggregate: groupBy=[[test.a, test.b AS alias1]], aggr=[[sum(test.c) AS alias2, max(test.c) AS alias3]] [a:UInt32, alias1:UInt32, alias2:UInt64;N, alias3:UInt32;N]\
                             \n      TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_equal(plan, expected)
@@ -563,9 +563,9 @@ mod tests {
             )?
             .build()?;
         // Should work
-        let expected = "Projection: test.c, MIN(alias2) AS MIN(test.a), count(alias1) AS count(DISTINCT test.b) [c:UInt32, MIN(test.a):UInt32;N, count(DISTINCT test.b):Int64]\
-                            \n  Aggregate: groupBy=[[test.c]], aggr=[[MIN(alias2), count(alias1)]] [c:UInt32, MIN(alias2):UInt32;N, count(alias1):Int64]\
-                            \n    Aggregate: groupBy=[[test.c, test.b AS alias1]], aggr=[[MIN(test.a) AS alias2]] [c:UInt32, alias1:UInt32, alias2:UInt32;N]\
+        let expected = "Projection: test.c, min(alias2) AS min(test.a), count(alias1) AS count(DISTINCT test.b) [c:UInt32, min(test.a):UInt32;N, count(DISTINCT test.b):Int64]\
+                            \n  Aggregate: groupBy=[[test.c]], aggr=[[min(alias2), count(alias1)]] [c:UInt32, min(alias2):UInt32;N, count(alias1):Int64]\
+                            \n    Aggregate: groupBy=[[test.c, test.b AS alias1]], aggr=[[min(test.a) AS alias2]] [c:UInt32, alias1:UInt32, alias2:UInt32;N]\
                             \n      TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_equal(plan, expected)
diff --git a/datafusion/physical-expr/src/aggregate/mod.rs b/datafusion/physical-expr/src/aggregate/mod.rs
index 0760986a87c6d..b477a815bf800 100644
--- a/datafusion/physical-expr/src/aggregate/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/mod.rs
@@ -18,7 +18,6 @@
 pub(crate) mod groups_accumulator;
 pub(crate) mod stats;
 
-pub mod moving_min_max;
 pub mod utils {
     pub use datafusion_physical_expr_common::aggregate::utils::{
         adjust_output_array, down_cast_any_ref, get_accum_scalar_values_as_arrays,
diff --git a/datafusion/physical-expr/src/aggregate/moving_min_max.rs b/datafusion/physical-expr/src/aggregate/moving_min_max.rs
deleted file mode 100644
index c4fb076797475..0000000000000
--- a/datafusion/physical-expr/src/aggregate/moving_min_max.rs
+++ /dev/null
@@ -1,335 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// The implementation is taken from https://github.com/spebern/moving_min_max/blob/master/src/lib.rs.
-
-//! Keep track of the minimum or maximum value in a sliding window.
-//!
-//! `moving min max` provides one data structure for keeping track of the
-//! minimum value and one for keeping track of the maximum value in a sliding
-//! window.
-//!
-//! Each element is stored with the current min/max. One stack to push and another one for pop. If pop stack is empty,
-//! push to this stack all elements popped from first stack while updating their current min/max. Now pop from
-//! the second stack (MovingMin/Max struct works as a queue). To find the minimum element of the queue,
-//! look at the smallest/largest two elements of the individual stacks, then take the minimum of those two values.
-//!
-//! The complexity of the operations are
-//! - O(1) for getting the minimum/maximum
-//! - O(1) for push
-//! - amortized O(1) for pop
-
-/// ```
-/// # use datafusion_physical_expr::aggregate::moving_min_max::MovingMin;
-/// let mut moving_min = MovingMin::<i32>::new();
-/// moving_min.push(2);
-/// moving_min.push(1);
-/// moving_min.push(3);
-///
-/// assert_eq!(moving_min.min(), Some(&1));
-/// assert_eq!(moving_min.pop(), Some(2));
-///
-/// assert_eq!(moving_min.min(), Some(&1));
-/// assert_eq!(moving_min.pop(), Some(1));
-///
-/// assert_eq!(moving_min.min(), Some(&3));
-/// assert_eq!(moving_min.pop(), Some(3));
-///
-/// assert_eq!(moving_min.min(), None);
-/// assert_eq!(moving_min.pop(), None);
-/// ```
-#[derive(Debug)]
-pub struct MovingMin<T> {
-    push_stack: Vec<(T, T)>,
-    pop_stack: Vec<(T, T)>,
-}
-
-impl<T: Clone + PartialOrd> Default for MovingMin<T> {
-    fn default() -> Self {
-        Self {
-            push_stack: Vec::new(),
-            pop_stack: Vec::new(),
-        }
-    }
-}
-
-impl<T: Clone + PartialOrd> MovingMin<T> {
-    /// Creates a new `MovingMin` to keep track of the minimum in a sliding
-    /// window.
-    #[inline]
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Creates a new `MovingMin` to keep track of the minimum in a sliding
-    /// window with `capacity` allocated slots.
-    #[inline]
-    pub fn with_capacity(capacity: usize) -> Self {
-        Self {
-            push_stack: Vec::with_capacity(capacity),
-            pop_stack: Vec::with_capacity(capacity),
-        }
-    }
-
-    /// Returns the minimum of the sliding window or `None` if the window is
-    /// empty.
-    #[inline]
-    pub fn min(&self) -> Option<&T> {
-        match (self.push_stack.last(), self.pop_stack.last()) {
-            (None, None) => None,
-            (Some((_, min)), None) => Some(min),
-            (None, Some((_, min))) => Some(min),
-            (Some((_, a)), Some((_, b))) => Some(if a < b { a } else { b }),
-        }
-    }
-
-    /// Pushes a new element into the sliding window.
-    #[inline]
-    pub fn push(&mut self, val: T) {
-        self.push_stack.push(match self.push_stack.last() {
-            Some((_, min)) => {
-                if val > *min {
-                    (val, min.clone())
-                } else {
-                    (val.clone(), val)
-                }
-            }
-            None => (val.clone(), val),
-        });
-    }
-
-    /// Removes and returns the last value of the sliding window.
-    #[inline]
-    pub fn pop(&mut self) -> Option<T> {
-        if self.pop_stack.is_empty() {
-            match self.push_stack.pop() {
-                Some((val, _)) => {
-                    let mut last = (val.clone(), val);
-                    self.pop_stack.push(last.clone());
-                    while let Some((val, _)) = self.push_stack.pop() {
-                        let min = if last.1 < val {
-                            last.1.clone()
-                        } else {
-                            val.clone()
-                        };
-                        last = (val.clone(), min);
-                        self.pop_stack.push(last.clone());
-                    }
-                }
-                None => return None,
-            }
-        }
-        self.pop_stack.pop().map(|(val, _)| val)
-    }
-
-    /// Returns the number of elements stored in the sliding window.
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.push_stack.len() + self.pop_stack.len()
-    }
-
-    /// Returns `true` if the moving window contains no elements.
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-}
-/// ```
-/// # use datafusion_physical_expr::aggregate::moving_min_max::MovingMax;
-/// let mut moving_max = MovingMax::<i32>::new();
-/// moving_max.push(2);
-/// moving_max.push(3);
-/// moving_max.push(1);
-///
-/// assert_eq!(moving_max.max(), Some(&3));
-/// assert_eq!(moving_max.pop(), Some(2));
-///
-/// assert_eq!(moving_max.max(), Some(&3));
-/// assert_eq!(moving_max.pop(), Some(3));
-///
-/// assert_eq!(moving_max.max(), Some(&1));
-/// assert_eq!(moving_max.pop(), Some(1));
-///
-/// assert_eq!(moving_max.max(), None);
-/// assert_eq!(moving_max.pop(), None);
-/// ```
-#[derive(Debug)]
-pub struct MovingMax<T> {
-    push_stack: Vec<(T, T)>,
-    pop_stack: Vec<(T, T)>,
-}
-
-impl<T: Clone + PartialOrd> Default for MovingMax<T> {
-    fn default() -> Self {
-        Self {
-            push_stack: Vec::new(),
-            pop_stack: Vec::new(),
-        }
-    }
-}
-
-impl<T: Clone + PartialOrd> MovingMax<T> {
-    /// Creates a new `MovingMax` to keep track of the maximum in a sliding window.
-    #[inline]
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Creates a new `MovingMax` to keep track of the maximum in a sliding window with
-    /// `capacity` allocated slots.
-    #[inline]
-    pub fn with_capacity(capacity: usize) -> Self {
-        Self {
-            push_stack: Vec::with_capacity(capacity),
-            pop_stack: Vec::with_capacity(capacity),
-        }
-    }
-
-    /// Returns the maximum of the sliding window or `None` if the window is empty.
-    #[inline]
-    pub fn max(&self) -> Option<&T> {
-        match (self.push_stack.last(), self.pop_stack.last()) {
-            (None, None) => None,
-            (Some((_, max)), None) => Some(max),
-            (None, Some((_, max))) => Some(max),
-            (Some((_, a)), Some((_, b))) => Some(if a > b { a } else { b }),
-        }
-    }
-
-    /// Pushes a new element into the sliding window.
-    #[inline]
-    pub fn push(&mut self, val: T) {
-        self.push_stack.push(match self.push_stack.last() {
-            Some((_, max)) => {
-                if val < *max {
-                    (val, max.clone())
-                } else {
-                    (val.clone(), val)
-                }
-            }
-            None => (val.clone(), val),
-        });
-    }
-
-    /// Removes and returns the last value of the sliding window.
-    #[inline]
-    pub fn pop(&mut self) -> Option<T> {
-        if self.pop_stack.is_empty() {
-            match self.push_stack.pop() {
-                Some((val, _)) => {
-                    let mut last = (val.clone(), val);
-                    self.pop_stack.push(last.clone());
-                    while let Some((val, _)) = self.push_stack.pop() {
-                        let max = if last.1 > val {
-                            last.1.clone()
-                        } else {
-                            val.clone()
-                        };
-                        last = (val.clone(), max);
-                        self.pop_stack.push(last.clone());
-                    }
-                }
-                None => return None,
-            }
-        }
-        self.pop_stack.pop().map(|(val, _)| val)
-    }
-
-    /// Returns the number of elements stored in the sliding window.
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.push_stack.len() + self.pop_stack.len()
-    }
-
-    /// Returns `true` if the moving window contains no elements.
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use datafusion_common::Result;
-    use rand::Rng;
-
-    fn get_random_vec_i32(len: usize) -> Vec<i32> {
-        let mut rng = rand::thread_rng();
-        let mut input = Vec::with_capacity(len);
-        for _i in 0..len {
-            input.push(rng.gen_range(0..100));
-        }
-        input
-    }
-
-    fn moving_min_i32(len: usize, n_sliding_window: usize) -> Result<()> {
-        let data = get_random_vec_i32(len);
-        let mut expected = Vec::with_capacity(len);
-        let mut moving_min = MovingMin::<i32>::new();
-        let mut res = Vec::with_capacity(len);
-        for i in 0..len {
-            let start = i.saturating_sub(n_sliding_window);
-            expected.push(*data[start..i + 1].iter().min().unwrap());
-
-            moving_min.push(data[i]);
-            if i > n_sliding_window {
-                moving_min.pop();
-            }
-            res.push(*moving_min.min().unwrap());
-        }
-        assert_eq!(res, expected);
-        Ok(())
-    }
-
-    fn moving_max_i32(len: usize, n_sliding_window: usize) -> Result<()> {
-        let data = get_random_vec_i32(len);
-        let mut expected = Vec::with_capacity(len);
-        let mut moving_max = MovingMax::<i32>::new();
-        let mut res = Vec::with_capacity(len);
-        for i in 0..len {
-            let start = i.saturating_sub(n_sliding_window);
-            expected.push(*data[start..i + 1].iter().max().unwrap());
-
-            moving_max.push(data[i]);
-            if i > n_sliding_window {
-                moving_max.pop();
-            }
-            res.push(*moving_max.max().unwrap());
-        }
-        assert_eq!(res, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn moving_min_tests() -> Result<()> {
-        moving_min_i32(100, 10)?;
-        moving_min_i32(100, 20)?;
-        moving_min_i32(100, 50)?;
-        moving_min_i32(100, 100)?;
-        Ok(())
-    }
-
-    #[test]
-    fn moving_max_tests() -> Result<()> {
-        moving_max_i32(100, 10)?;
-        moving_max_i32(100, 20)?;
-        moving_max_i32(100, 50)?;
-        moving_max_i32(100, 100)?;
-        Ok(())
-    }
-}
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index b65f976a22511..8f9f1dd78f93c 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -295,7 +295,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
             sql: "SELECT string_count FROM (
                     SELECT
                         j1_id,
-                        MIN(j2_string)
+                        min(j2_string)
                     FROM
                         j1 LEFT OUTER JOIN j2 ON
                                     j1_id = j2_id
@@ -303,7 +303,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
                         j1_id
                 ) AS agg (id, string_count)
             ",
-            expected: r#"SELECT agg.string_count FROM (SELECT j1.j1_id, MIN(j2.j2_string) FROM j1 LEFT JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)"#,
+            expected: r#"SELECT agg.string_count FROM (SELECT j1.j1_id, min(j2.j2_string) FROM j1 LEFT JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)"#,
             parser_dialect: Box::new(GenericDialect {}),
             unparser_dialect: Box::new(UnparserDefaultDialect {}),
         },
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index e61c29f1c80d2..8a5510eb69f3e 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1016,7 +1016,7 @@ fn select_with_having_refers_to_invalid_column() {
                    HAVING first_name = 'M'";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-            "Error during planning: HAVING clause references non-aggregate values: Expression person.first_name could not be resolved from available columns: person.id, MAX(person.age)",
+            "Error during planning: HAVING clause references non-aggregate values: Expression person.first_name could not be resolved from available columns: person.id, max(person.age)",
             err.strip_backtrace()
         );
 }
@@ -1040,7 +1040,7 @@ fn select_with_having_with_aggregate_not_in_select() {
                    HAVING MAX(age) > 100";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-            "Error during planning: Projection references non-aggregate values: Expression person.first_name could not be resolved from available columns: MAX(person.age)",
+            "Error during planning: Projection references non-aggregate values: Expression person.first_name could not be resolved from available columns: max(person.age)",
             err.strip_backtrace()
         );
 }
@@ -1050,21 +1050,21 @@ fn select_aggregate_with_having_that_reuses_aggregate() {
     let sql = "SELECT MAX(age)
                    FROM person
                    HAVING MAX(age) < 30";
-    let expected = "Projection: MAX(person.age)\
-                        \n  Filter: MAX(person.age) < Int64(30)\
-                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: max(person.age)\
+                        \n  Filter: max(person.age) < Int64(30)\
+                        \n    Aggregate: groupBy=[[]], aggr=[[max(person.age)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
 
 #[test]
 fn select_aggregate_with_having_with_aggregate_not_in_select() {
-    let sql = "SELECT MAX(age)
+    let sql = "SELECT max(age)
                    FROM person
-                   HAVING MAX(first_name) > 'M'";
-    let expected = "Projection: MAX(person.age)\
-                        \n  Filter: MAX(person.first_name) > Utf8(\"M\")\
-                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(person.age), MAX(person.first_name)]]\
+                   HAVING max(first_name) > 'M'";
+    let expected = "Projection: max(person.age)\
+                        \n  Filter: max(person.first_name) > Utf8(\"M\")\
+                        \n    Aggregate: groupBy=[[]], aggr=[[max(person.age), max(person.first_name)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1087,21 +1087,21 @@ fn select_aggregate_aliased_with_having_referencing_aggregate_by_its_alias() {
                    FROM person
                    HAVING max_age < 30";
     // FIXME: add test for having in execution
-    let expected = "Projection: MAX(person.age) AS max_age\
-                        \n  Filter: MAX(person.age) < Int64(30)\
-                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: max(person.age) AS max_age\
+                        \n  Filter: max(person.age) < Int64(30)\
+                        \n    Aggregate: groupBy=[[]], aggr=[[max(person.age)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
 
 #[test]
 fn select_aggregate_aliased_with_having_that_reuses_aggregate_but_not_by_its_alias() {
-    let sql = "SELECT MAX(age) as max_age
+    let sql = "SELECT max(age) as max_age
                    FROM person
-                   HAVING MAX(age) < 30";
-    let expected = "Projection: MAX(person.age) AS max_age\
-                        \n  Filter: MAX(person.age) < Int64(30)\
-                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(person.age)]]\
+                   HAVING max(age) < 30";
+    let expected = "Projection: max(person.age) AS max_age\
+                        \n  Filter: max(person.age) < Int64(30)\
+                        \n    Aggregate: groupBy=[[]], aggr=[[max(person.age)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1112,23 +1112,23 @@ fn select_aggregate_with_group_by_with_having() {
                    FROM person
                    GROUP BY first_name
                    HAVING first_name = 'M'";
-    let expected = "Projection: person.first_name, MAX(person.age)\
+    let expected = "Projection: person.first_name, max(person.age)\
                         \n  Filter: person.first_name = Utf8(\"M\")\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age)]]\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
 
 #[test]
 fn select_aggregate_with_group_by_with_having_and_where() {
-    let sql = "SELECT first_name, MAX(age)
+    let sql = "SELECT first_name, max(age)
                    FROM person
                    WHERE id > 5
                    GROUP BY first_name
                    HAVING MAX(age) < 100";
-    let expected = "Projection: person.first_name, MAX(person.age)\
-                        \n  Filter: MAX(person.age) < Int64(100)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: person.first_name, max(person.age)\
+                        \n  Filter: max(person.age) < Int64(100)\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
                         \n      Filter: person.id > Int64(5)\
                         \n        TableScan: person";
     quick_test(sql, expected);
@@ -1141,9 +1141,9 @@ fn select_aggregate_with_group_by_with_having_and_where_filtering_on_aggregate_c
                    WHERE id > 5 AND age > 18
                    GROUP BY first_name
                    HAVING MAX(age) < 100";
-    let expected = "Projection: person.first_name, MAX(person.age)\
-                        \n  Filter: MAX(person.age) < Int64(100)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: person.first_name, max(person.age)\
+                        \n  Filter: max(person.age) < Int64(100)\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
                         \n      Filter: person.id > Int64(5) AND person.age > Int64(18)\
                         \n        TableScan: person";
     quick_test(sql, expected);
@@ -1155,9 +1155,9 @@ fn select_aggregate_with_group_by_with_having_using_column_by_alias() {
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 2 AND fn = 'M'";
-    let expected = "Projection: person.first_name AS fn, MAX(person.age)\
-                        \n  Filter: MAX(person.age) > Int64(2) AND person.first_name = Utf8(\"M\")\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: person.first_name AS fn, max(person.age)\
+                        \n  Filter: max(person.age) > Int64(2) AND person.first_name = Utf8(\"M\")\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1169,9 +1169,9 @@ fn select_aggregate_with_group_by_with_having_using_columns_with_and_without_the
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 2 AND max_age < 5 AND first_name = 'M' AND fn = 'N'";
-    let expected = "Projection: person.first_name AS fn, MAX(person.age) AS max_age\
-                        \n  Filter: MAX(person.age) > Int64(2) AND MAX(person.age) < Int64(5) AND person.first_name = Utf8(\"M\") AND person.first_name = Utf8(\"N\")\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: person.first_name AS fn, max(person.age) AS max_age\
+                        \n  Filter: max(person.age) > Int64(2) AND max(person.age) < Int64(5) AND person.first_name = Utf8(\"M\") AND person.first_name = Utf8(\"N\")\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1182,9 +1182,9 @@ fn select_aggregate_with_group_by_with_having_that_reuses_aggregate() {
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 100";
-    let expected = "Projection: person.first_name, MAX(person.age)\
-                        \n  Filter: MAX(person.age) > Int64(100)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: person.first_name, max(person.age)\
+                        \n  Filter: max(person.age) > Int64(100)\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1197,7 +1197,7 @@ fn select_aggregate_with_group_by_with_having_referencing_column_not_in_group_by
                    HAVING MAX(age) > 10 AND last_name = 'M'";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-        "Error during planning: HAVING clause references non-aggregate values: Expression person.last_name could not be resolved from available columns: person.first_name, MAX(person.age)",
+        "Error during planning: HAVING clause references non-aggregate values: Expression person.last_name could not be resolved from available columns: person.first_name, max(person.age)",
         err.strip_backtrace()
     );
 }
@@ -1208,9 +1208,9 @@ fn select_aggregate_with_group_by_with_having_that_reuses_aggregate_multiple_tim
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 100 AND MAX(age) < 200";
-    let expected = "Projection: person.first_name, MAX(person.age)\
-                        \n  Filter: MAX(person.age) > Int64(100) AND MAX(person.age) < Int64(200)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: person.first_name, max(person.age)\
+                        \n  Filter: max(person.age) > Int64(100) AND max(person.age) < Int64(200)\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1221,9 +1221,9 @@ fn select_aggregate_with_group_by_with_having_using_aggregate_not_in_select() {
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 100 AND MIN(id) < 50";
-    let expected = "Projection: person.first_name, MAX(person.age)\
-                        \n  Filter: MAX(person.age) > Int64(100) AND MIN(person.id) < Int64(50)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age), MIN(person.id)]]\
+    let expected = "Projection: person.first_name, max(person.age)\
+                        \n  Filter: max(person.age) > Int64(100) AND min(person.id) < Int64(50)\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1235,9 +1235,9 @@ fn select_aggregate_aliased_with_group_by_with_having_referencing_aggregate_by_i
                    FROM person
                    GROUP BY first_name
                    HAVING max_age > 100";
-    let expected = "Projection: person.first_name, MAX(person.age) AS max_age\
-                        \n  Filter: MAX(person.age) > Int64(100)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: person.first_name, max(person.age) AS max_age\
+                        \n  Filter: max(person.age) > Int64(100)\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1249,9 +1249,9 @@ fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compo
                    FROM person
                    GROUP BY first_name
                    HAVING max_age_plus_one > 100";
-    let expected = "Projection: person.first_name, MAX(person.age) + Int64(1) AS max_age_plus_one\
-                        \n  Filter: MAX(person.age) + Int64(1) > Int64(100)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: person.first_name, max(person.age) + Int64(1) AS max_age_plus_one\
+                        \n  Filter: max(person.age) + Int64(1) > Int64(100)\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1263,9 +1263,9 @@ fn select_aggregate_with_group_by_with_having_using_derived_column_aggregate_not
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 100 AND MIN(id - 2) < 50";
-    let expected = "Projection: person.first_name, MAX(person.age)\
-                        \n  Filter: MAX(person.age) > Int64(100) AND MIN(person.id - Int64(2)) < Int64(50)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age), MIN(person.id - Int64(2))]]\
+    let expected = "Projection: person.first_name, max(person.age)\
+                        \n  Filter: max(person.age) > Int64(100) AND min(person.id - Int64(2)) < Int64(50)\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id - Int64(2))]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1276,9 +1276,9 @@ fn select_aggregate_with_group_by_with_having_using_count_star_not_in_select() {
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 100 AND count(*) < 50";
-    let expected = "Projection: person.first_name, MAX(person.age)\
-                        \n  Filter: MAX(person.age) > Int64(100) AND count(*) < Int64(50)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.age), count(*)]]\
+    let expected = "Projection: person.first_name, max(person.age)\
+                        \n  Filter: max(person.age) > Int64(100) AND count(*) < Int64(50)\
+                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), count(*)]]\
                         \n      TableScan: person";
     quick_test(sql, expected);
 }
@@ -1321,8 +1321,8 @@ fn select_wildcard_with_groupby() {
 fn select_simple_aggregate() {
     quick_test(
         "SELECT MIN(age) FROM person",
-        "Projection: MIN(person.age)\
-            \n  Aggregate: groupBy=[[]], aggr=[[MIN(person.age)]]\
+        "Projection: min(person.age)\
+            \n  Aggregate: groupBy=[[]], aggr=[[min(person.age)]]\
             \n    TableScan: person",
     );
 }
@@ -1349,7 +1349,7 @@ fn select_simple_aggregate_repeated_aggregate() {
     let sql = "SELECT MIN(age), MIN(age) FROM person";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"MIN(person.age)\" at position 0 and \"MIN(person.age)\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.",
+        "Error during planning: Projections require unique expression names but the expression \"min(person.age)\" at position 0 and \"min(person.age)\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.",
         err.strip_backtrace()
     );
 }
@@ -1358,8 +1358,8 @@ fn select_simple_aggregate_repeated_aggregate() {
 fn select_simple_aggregate_repeated_aggregate_with_single_alias() {
     quick_test(
         "SELECT MIN(age), MIN(age) AS a FROM person",
-        "Projection: MIN(person.age), MIN(person.age) AS a\
-             \n  Aggregate: groupBy=[[]], aggr=[[MIN(person.age)]]\
+        "Projection: min(person.age), min(person.age) AS a\
+             \n  Aggregate: groupBy=[[]], aggr=[[min(person.age)]]\
              \n    TableScan: person",
     );
 }
@@ -1368,8 +1368,8 @@ fn select_simple_aggregate_repeated_aggregate_with_single_alias() {
 fn select_simple_aggregate_repeated_aggregate_with_unique_aliases() {
     quick_test(
         "SELECT MIN(age) AS a, MIN(age) AS b FROM person",
-        "Projection: MIN(person.age) AS a, MIN(person.age) AS b\
-             \n  Aggregate: groupBy=[[]], aggr=[[MIN(person.age)]]\
+        "Projection: min(person.age) AS a, min(person.age) AS b\
+             \n  Aggregate: groupBy=[[]], aggr=[[min(person.age)]]\
              \n    TableScan: person",
     );
 }
@@ -1390,7 +1390,7 @@ fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() {
     let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"MIN(person.age) AS a\" at position 0 and \"MIN(person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.",
+        "Error during planning: Projections require unique expression names but the expression \"min(person.age) AS a\" at position 0 and \"min(person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.",
         err.strip_backtrace()
     );
 }
@@ -1399,8 +1399,8 @@ fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() {
 fn select_simple_aggregate_with_groupby() {
     quick_test(
         "SELECT state, MIN(age), MAX(age) FROM person GROUP BY state",
-        "Projection: person.state, MIN(person.age), MAX(person.age)\
-            \n  Aggregate: groupBy=[[person.state]], aggr=[[MIN(person.age), MAX(person.age)]]\
+        "Projection: person.state, min(person.age), max(person.age)\
+            \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.age)]]\
             \n    TableScan: person",
     );
 }
@@ -1409,8 +1409,8 @@ fn select_simple_aggregate_with_groupby() {
 fn select_simple_aggregate_with_groupby_with_aliases() {
     quick_test(
         "SELECT state AS a, MIN(age) AS b FROM person GROUP BY state",
-        "Projection: person.state AS a, MIN(person.age) AS b\
-             \n  Aggregate: groupBy=[[person.state]], aggr=[[MIN(person.age)]]\
+        "Projection: person.state AS a, min(person.age) AS b\
+             \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]\
              \n    TableScan: person",
     );
 }
@@ -1420,7 +1420,7 @@ fn select_simple_aggregate_with_groupby_with_aliases_repeated() {
     let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"person.state AS a\" at position 0 and \"MIN(person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.",
+        "Error during planning: Projections require unique expression names but the expression \"person.state AS a\" at position 0 and \"min(person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.",
         err.strip_backtrace()
     );
 }
@@ -1429,8 +1429,8 @@ fn select_simple_aggregate_with_groupby_with_aliases_repeated() {
 fn select_simple_aggregate_with_groupby_column_unselected() {
     quick_test(
         "SELECT MIN(age), MAX(age) FROM person GROUP BY state",
-        "Projection: MIN(person.age), MAX(person.age)\
-             \n  Aggregate: groupBy=[[person.state]], aggr=[[MIN(person.age), MAX(person.age)]]\
+        "Projection: min(person.age), max(person.age)\
+             \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.age)]]\
              \n    TableScan: person",
     );
 }
@@ -1514,8 +1514,8 @@ fn recursive_ctes_disabled() {
 fn select_simple_aggregate_with_groupby_and_column_is_in_aggregate_and_groupby() {
     quick_test(
         "SELECT MAX(first_name) FROM person GROUP BY first_name",
-        "Projection: MAX(person.first_name)\
-             \n  Aggregate: groupBy=[[person.first_name]], aggr=[[MAX(person.first_name)]]\
+        "Projection: max(person.first_name)\
+             \n  Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.first_name)]]\
              \n    TableScan: person",
     );
 }
@@ -1557,8 +1557,8 @@ fn select_simple_aggregate_with_groupby_position_out_of_range() {
 fn select_simple_aggregate_with_groupby_can_use_alias() {
     quick_test(
         "SELECT state AS a, MIN(age) AS b FROM person GROUP BY a",
-        "Projection: person.state AS a, MIN(person.age) AS b\
-             \n  Aggregate: groupBy=[[person.state]], aggr=[[MIN(person.age)]]\
+        "Projection: person.state AS a, min(person.age) AS b\
+             \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]\
              \n    TableScan: person",
     );
 }
@@ -1568,7 +1568,7 @@ fn select_simple_aggregate_with_groupby_aggregate_repeated() {
     let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"MIN(person.age)\" at position 1 and \"MIN(person.age)\" at position 2 have the same name. Consider aliasing (\"AS\") one of them.",
+        "Error during planning: Projections require unique expression names but the expression \"min(person.age)\" at position 1 and \"min(person.age)\" at position 2 have the same name. Consider aliasing (\"AS\") one of them.",
         err.strip_backtrace()
     );
 }
@@ -1577,8 +1577,8 @@ fn select_simple_aggregate_with_groupby_aggregate_repeated() {
 fn select_simple_aggregate_with_groupby_aggregate_repeated_and_one_has_alias() {
     quick_test(
         "SELECT state, MIN(age), MIN(age) AS ma FROM person GROUP BY state",
-        "Projection: person.state, MIN(person.age), MIN(person.age) AS ma\
-             \n  Aggregate: groupBy=[[person.state]], aggr=[[MIN(person.age)]]\
+        "Projection: person.state, min(person.age), min(person.age) AS ma\
+             \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]\
              \n    TableScan: person",
     )
 }
@@ -1587,8 +1587,8 @@ fn select_simple_aggregate_with_groupby_aggregate_repeated_and_one_has_alias() {
 fn select_simple_aggregate_with_groupby_non_column_expression_unselected() {
     quick_test(
         "SELECT MIN(first_name) FROM person GROUP BY age + 1",
-        "Projection: MIN(person.first_name)\
-             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[MIN(person.first_name)]]\
+        "Projection: min(person.first_name)\
+             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]\
              \n    TableScan: person",
     );
 }
@@ -1597,14 +1597,14 @@ fn select_simple_aggregate_with_groupby_non_column_expression_unselected() {
 fn select_simple_aggregate_with_groupby_non_column_expression_selected_and_resolvable() {
     quick_test(
         "SELECT age + 1, MIN(first_name) FROM person GROUP BY age + 1",
-        "Projection: person.age + Int64(1), MIN(person.first_name)\
-             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[MIN(person.first_name)]]\
+        "Projection: person.age + Int64(1), min(person.first_name)\
+             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]\
              \n    TableScan: person",
     );
     quick_test(
         "SELECT MIN(first_name), age + 1 FROM person GROUP BY age + 1",
-        "Projection: MIN(person.first_name), person.age + Int64(1)\
-             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[MIN(person.first_name)]]\
+        "Projection: min(person.first_name), person.age + Int64(1)\
+             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]\
              \n    TableScan: person",
     );
 }
@@ -1613,8 +1613,8 @@ fn select_simple_aggregate_with_groupby_non_column_expression_selected_and_resol
 fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_resolvable() {
     quick_test(
             "SELECT ((age + 1) / 2) * (age + 1), MIN(first_name) FROM person GROUP BY age + 1",
-            "Projection: person.age + Int64(1) / Int64(2) * person.age + Int64(1), MIN(person.first_name)\
-             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[MIN(person.first_name)]]\
+            "Projection: person.age + Int64(1) / Int64(2) * person.age + Int64(1), min(person.first_name)\
+             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]\
              \n    TableScan: person",
         );
 }
@@ -1626,7 +1626,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_not_res
     let sql = "SELECT ((age + 1) / 2) * (age + 9), MIN(first_name) FROM person GROUP BY age + 1";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-        "Error during planning: Projection references non-aggregate values: Expression person.age could not be resolved from available columns: person.age + Int64(1), MIN(person.first_name)",
+        "Error during planning: Projection references non-aggregate values: Expression person.age could not be resolved from available columns: person.age + Int64(1), min(person.first_name)",
             err.strip_backtrace()
         );
 }
@@ -1636,7 +1636,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_and_its_column_sel
     let sql = "SELECT age, MIN(first_name) FROM person GROUP BY age + 1";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-        "Error during planning: Projection references non-aggregate values: Expression person.age could not be resolved from available columns: person.age + Int64(1), MIN(person.first_name)",
+        "Error during planning: Projection references non-aggregate values: Expression person.age could not be resolved from available columns: person.age + Int64(1), min(person.first_name)",
             err.strip_backtrace()
         );
 }
@@ -1645,8 +1645,8 @@ fn select_simple_aggregate_with_groupby_non_column_expression_and_its_column_sel
 fn select_simple_aggregate_nested_in_binary_expr_with_groupby() {
     quick_test(
         "SELECT state, MIN(age) < 10 FROM person GROUP BY state",
-        "Projection: person.state, MIN(person.age) < Int64(10)\
-             \n  Aggregate: groupBy=[[person.state]], aggr=[[MIN(person.age)]]\
+        "Projection: person.state, min(person.age) < Int64(10)\
+             \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]\
              \n    TableScan: person",
     );
 }
@@ -1655,8 +1655,8 @@ fn select_simple_aggregate_nested_in_binary_expr_with_groupby() {
 fn select_simple_aggregate_and_nested_groupby_column() {
     quick_test(
         "SELECT age + 1, MAX(first_name) FROM person GROUP BY age",
-        "Projection: person.age + Int64(1), MAX(person.first_name)\
-             \n  Aggregate: groupBy=[[person.age]], aggr=[[MAX(person.first_name)]]\
+        "Projection: person.age + Int64(1), max(person.first_name)\
+             \n  Aggregate: groupBy=[[person.age]], aggr=[[max(person.first_name)]]\
              \n    TableScan: person",
     );
 }
@@ -1665,8 +1665,8 @@ fn select_simple_aggregate_and_nested_groupby_column() {
 fn select_aggregate_compounded_with_groupby_column() {
     quick_test(
         "SELECT age + MIN(salary) FROM person GROUP BY age",
-        "Projection: person.age + MIN(person.salary)\
-             \n  Aggregate: groupBy=[[person.age]], aggr=[[MIN(person.salary)]]\
+        "Projection: person.age + min(person.salary)\
+             \n  Aggregate: groupBy=[[person.age]], aggr=[[min(person.salary)]]\
              \n    TableScan: person",
     );
 }
@@ -1675,8 +1675,8 @@ fn select_aggregate_compounded_with_groupby_column() {
 fn select_aggregate_with_non_column_inner_expression_with_groupby() {
     quick_test(
         "SELECT state, MIN(age + 1) FROM person GROUP BY state",
-        "Projection: person.state, MIN(person.age + Int64(1))\
-            \n  Aggregate: groupBy=[[person.state]], aggr=[[MIN(person.age + Int64(1))]]\
+        "Projection: person.state, min(person.age + Int64(1))\
+            \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age + Int64(1))]]\
             \n    TableScan: person",
     );
 }
@@ -1864,8 +1864,8 @@ fn select_group_by() {
 #[test]
 fn select_group_by_columns_not_in_select() {
     let sql = "SELECT MAX(age) FROM person GROUP BY state";
-    let expected = "Projection: MAX(person.age)\
-                        \n  Aggregate: groupBy=[[person.state]], aggr=[[MAX(person.age)]]\
+    let expected = "Projection: max(person.age)\
+                        \n  Aggregate: groupBy=[[person.state]], aggr=[[max(person.age)]]\
                         \n    TableScan: person";
 
     quick_test(sql, expected);
@@ -1895,8 +1895,8 @@ fn select_group_by_needs_projection() {
 #[test]
 fn select_7480_1() {
     let sql = "SELECT c1, MIN(c12) FROM aggregate_test_100 GROUP BY c1, c13";
-    let expected = "Projection: aggregate_test_100.c1, MIN(aggregate_test_100.c12)\
-                       \n  Aggregate: groupBy=[[aggregate_test_100.c1, aggregate_test_100.c13]], aggr=[[MIN(aggregate_test_100.c12)]]\
+    let expected = "Projection: aggregate_test_100.c1, min(aggregate_test_100.c12)\
+                       \n  Aggregate: groupBy=[[aggregate_test_100.c1, aggregate_test_100.c13]], aggr=[[min(aggregate_test_100.c12)]]\
                        \n    TableScan: aggregate_test_100";
     quick_test(sql, expected);
 }
@@ -1906,7 +1906,7 @@ fn select_7480_2() {
     let sql = "SELECT c1, c13, MIN(c12) FROM aggregate_test_100 GROUP BY c1";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-        "Error during planning: Projection references non-aggregate values: Expression aggregate_test_100.c13 could not be resolved from available columns: aggregate_test_100.c1, MIN(aggregate_test_100.c12)",
+        "Error during planning: Projection references non-aggregate values: Expression aggregate_test_100.c13 could not be resolved from available columns: aggregate_test_100.c1, min(aggregate_test_100.c12)",
         err.strip_backtrace()
     );
 }
@@ -2319,8 +2319,8 @@ fn union_with_incompatible_data_types() {
 fn empty_over() {
     let sql = "SELECT order_id, MAX(order_id) OVER () from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n  WindowAggr: windowExpr=[[MAX(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
+        Projection: orders.order_id, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
+        \n  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
         \n    TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2329,8 +2329,8 @@ fn empty_over() {
 fn empty_over_with_alias() {
     let sql = "SELECT order_id oid, MAX(order_id) OVER () max_oid from orders";
     let expected = "\
-        Projection: orders.order_id AS oid, MAX(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid\
-        \n  WindowAggr: windowExpr=[[MAX(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
+        Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid\
+        \n  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
         \n    TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2339,8 +2339,8 @@ fn empty_over_with_alias() {
 fn empty_over_dup_with_alias() {
     let sql = "SELECT order_id oid, MAX(order_id) OVER () max_oid, MAX(order_id) OVER () max_oid_dup from orders";
     let expected = "\
-        Projection: orders.order_id AS oid, MAX(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid, MAX(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid_dup\
-        \n  WindowAggr: windowExpr=[[MAX(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
+        Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid_dup\
+        \n  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
         \n    TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2349,9 +2349,9 @@ fn empty_over_dup_with_alias() {
 fn empty_over_dup_with_different_sort() {
     let sql = "SELECT order_id oid, MAX(order_id) OVER (), MAX(order_id) OVER (ORDER BY order_id) from orders";
     let expected = "\
-        Projection: orders.order_id AS oid, MAX(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MAX(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[MAX(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    WindowAggr: windowExpr=[[MAX(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, max(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
+        \n    WindowAggr: windowExpr=[[max(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n      TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2360,8 +2360,8 @@ fn empty_over_dup_with_different_sort() {
 fn empty_over_plus() {
     let sql = "SELECT order_id, MAX(qty * 1.1) OVER () from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
+        Projection: orders.order_id, max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
+        \n  WindowAggr: windowExpr=[[max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
         \n    TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2370,8 +2370,8 @@ fn empty_over_plus() {
 fn empty_over_multiple() {
     let sql = "SELECT order_id, MAX(qty) OVER (), min(qty) over (), avg(qty) OVER () from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
+        Projection: orders.order_id, max(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
+        \n  WindowAggr: windowExpr=[[max(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
         \n    TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2389,8 +2389,8 @@ fn empty_over_multiple() {
 fn over_partition_by() {
     let sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
+        Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
+        \n  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
         \n    TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2411,9 +2411,9 @@ fn over_partition_by() {
 fn over_order_by() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id), MIN(qty) OVER (ORDER BY order_id DESC) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, MIN(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[MIN(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        \n    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n      TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2422,9 +2422,9 @@ fn over_order_by() {
 fn over_order_by_with_window_frame_double_end() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id ROWS BETWEEN 3 PRECEDING and 3 FOLLOWING), MIN(qty) OVER (ORDER BY order_id DESC) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING, MIN(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING]]\
-        \n    WindowAggr: windowExpr=[[MIN(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING]]\
+        \n    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n      TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2433,9 +2433,9 @@ fn over_order_by_with_window_frame_double_end() {
 fn over_order_by_with_window_frame_single_end() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id ROWS 3 PRECEDING), MIN(qty) OVER (ORDER BY order_id DESC) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW, MIN(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[MIN(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW]]\
+        \n    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n      TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2444,9 +2444,9 @@ fn over_order_by_with_window_frame_single_end() {
 fn over_order_by_with_window_frame_single_end_groups() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id GROUPS 3 PRECEDING), MIN(qty) OVER (ORDER BY order_id DESC) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW, MIN(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[MIN(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW]]\
+        \n    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n      TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2467,9 +2467,9 @@ fn over_order_by_with_window_frame_single_end_groups() {
 fn over_order_by_two_sort_keys() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id), MIN(qty) OVER (ORDER BY (order_id + 1)) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, MIN(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[MIN(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        \n    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n      TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2491,10 +2491,10 @@ fn over_order_by_two_sort_keys() {
 fn over_order_by_sort_keys_sorting() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY qty, order_id), sum(qty) OVER (), MIN(qty) OVER (ORDER BY order_id, qty) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
         \n  WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    WindowAggr: windowExpr=[[MAX(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      WindowAggr: windowExpr=[[MIN(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        \n    WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        \n      WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n        TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2514,10 +2514,10 @@ fn over_order_by_sort_keys_sorting() {
 fn over_order_by_sort_keys_sorting_prefix_compacting() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id), sum(qty) OVER (), MIN(qty) OVER (ORDER BY order_id, qty) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
         \n  WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    WindowAggr: windowExpr=[[MAX(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      WindowAggr: windowExpr=[[MIN(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        \n    WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        \n      WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n        TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2543,10 +2543,10 @@ fn over_order_by_sort_keys_sorting_global_order_compacting() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY qty, order_id), sum(qty) OVER (), MIN(qty) OVER (ORDER BY order_id, qty) from orders ORDER BY order_id";
     let expected = "\
         Sort: orders.order_id ASC NULLS LAST\
-        \n  Projection: orders.order_id, MAX(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  Projection: orders.order_id, max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
         \n    WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n      WindowAggr: windowExpr=[[MAX(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n        WindowAggr: windowExpr=[[MIN(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        \n      WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        \n        WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n          TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2565,8 +2565,8 @@ fn over_partition_by_order_by() {
     let sql =
         "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id ORDER BY qty) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n    TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2585,8 +2585,8 @@ fn over_partition_by_order_by_no_dup() {
     let sql =
         "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n    TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2608,9 +2608,9 @@ fn over_partition_by_order_by_mix_up() {
     let sql =
             "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty), MIN(qty) OVER (PARTITION BY qty ORDER BY order_id) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, MIN(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[MIN(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[MAX(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        \n    WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n      TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2631,9 +2631,9 @@ fn over_partition_by_order_by_mix_up_prefix() {
     let sql =
             "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id ORDER BY qty), MIN(qty) OVER (PARTITION BY order_id, qty ORDER BY price) from orders";
     let expected = "\
-        Projection: orders.order_id, MAX(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, MIN(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[MAX(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[MIN(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
+        \n  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
+        \n    WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
         \n      TableScan: orders";
     quick_test(sql, expected);
 }
@@ -3042,8 +3042,8 @@ fn scalar_subquery() {
 
     let expected = "Projection: p.id, (<subquery>)\
         \n  Subquery:\
-        \n    Projection: MAX(person.id)\
-        \n      Aggregate: groupBy=[[]], aggr=[[MAX(person.id)]]\
+        \n    Projection: max(person.id)\
+        \n      Aggregate: groupBy=[[]], aggr=[[max(person.id)]]\
         \n        Filter: person.last_name = outer_ref(p.last_name)\
         \n          TableScan: person\
         \n  SubqueryAlias: p\
@@ -4002,8 +4002,8 @@ fn test_prepare_statement_infer_types_subquery() {
 Projection: person.id, person.age
   Filter: person.age = (<subquery>)
     Subquery:
-      Projection: MAX(person.age)
-        Aggregate: groupBy=[[]], aggr=[[MAX(person.age)]]
+      Projection: max(person.age)
+        Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
           Filter: person.id = $1
             TableScan: person
     TableScan: person
@@ -4023,8 +4023,8 @@ Projection: person.id, person.age
 Projection: person.id, person.age
   Filter: person.age = (<subquery>)
     Subquery:
-      Projection: MAX(person.age)
-        Aggregate: groupBy=[[]], aggr=[[MAX(person.age)]]
+      Projection: max(person.age)
+        Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
           Filter: person.id = UInt32(10)
             TableScan: person
     TableScan: person
@@ -4396,7 +4396,7 @@ fn test_field_not_found_window_function() {
 
     let qualified_sql =
         "SELECT order_id, MAX(qty) OVER (PARTITION BY orders.order_id) from orders";
-    let expected = "Projection: orders.order_id, MAX(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n  WindowAggr: windowExpr=[[MAX(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\n    TableScan: orders";
+    let expected = "Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\n    TableScan: orders";
     quick_test(qualified_sql, expected);
 }
 
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 6513258f879e2..9625f02afbb48 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -4645,16 +4645,16 @@ query TT
 EXPLAIN SELECT max(c1), c2, c3 FROM aggregate_test_100 group by c2, c3 limit 5;
 ----
 logical_plan
-01)Projection: MAX(aggregate_test_100.c1), aggregate_test_100.c2, aggregate_test_100.c3
+01)Projection: max(aggregate_test_100.c1), aggregate_test_100.c2, aggregate_test_100.c3
 02)--Limit: skip=0, fetch=5
-03)----Aggregate: groupBy=[[aggregate_test_100.c2, aggregate_test_100.c3]], aggr=[[MAX(aggregate_test_100.c1)]]
+03)----Aggregate: groupBy=[[aggregate_test_100.c2, aggregate_test_100.c3]], aggr=[[max(aggregate_test_100.c1)]]
 04)------TableScan: aggregate_test_100 projection=[c1, c2, c3]
 physical_plan
-01)ProjectionExec: expr=[MAX(aggregate_test_100.c1)@2 as MAX(aggregate_test_100.c1), c2@0 as c2, c3@1 as c3]
+01)ProjectionExec: expr=[max(aggregate_test_100.c1)@2 as max(aggregate_test_100.c1), c2@0 as c2, c3@1 as c3]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----AggregateExec: mode=Final, gby=[c2@0 as c2, c3@1 as c3], aggr=[MAX(aggregate_test_100.c1)]
+03)----AggregateExec: mode=Final, gby=[c2@0 as c2, c3@1 as c3], aggr=[max(aggregate_test_100.c1)]
 04)------CoalescePartitionsExec
-05)--------AggregateExec: mode=Partial, gby=[c2@1 as c2, c3@2 as c3], aggr=[MAX(aggregate_test_100.c1)]
+05)--------AggregateExec: mode=Partial, gby=[c2@1 as c2, c3@2 as c3], aggr=[max(aggregate_test_100.c1)]
 06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true
 
@@ -5280,20 +5280,20 @@ query TT
 EXPLAIN SELECT MIN(col0) FROM empty;
 ----
 logical_plan
-01)Aggregate: groupBy=[[]], aggr=[[MIN(empty.col0)]]
+01)Aggregate: groupBy=[[]], aggr=[[min(empty.col0)]]
 02)--TableScan: empty projection=[col0]
 physical_plan
-01)ProjectionExec: expr=[NULL as MIN(empty.col0)]
+01)ProjectionExec: expr=[NULL as min(empty.col0)]
 02)--PlaceholderRowExec
 
 query TT
 EXPLAIN SELECT MAX(col0) FROM empty;
 ----
 logical_plan
-01)Aggregate: groupBy=[[]], aggr=[[MAX(empty.col0)]]
+01)Aggregate: groupBy=[[]], aggr=[[max(empty.col0)]]
 02)--TableScan: empty projection=[col0]
 physical_plan
-01)ProjectionExec: expr=[NULL as MAX(empty.col0)]
+01)ProjectionExec: expr=[NULL as max(empty.col0)]
 02)--PlaceholderRowExec
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/aggregates_topk.slt b/datafusion/sqllogictest/test_files/aggregates_topk.slt
index 03555989a277b..8e67f501dbd76 100644
--- a/datafusion/sqllogictest/test_files/aggregates_topk.slt
+++ b/datafusion/sqllogictest/test_files/aggregates_topk.slt
@@ -41,18 +41,18 @@ explain select trace_id, MAX(timestamp) from traces group by trace_id order by M
 ----
 logical_plan
 01)Limit: skip=0, fetch=4
-02)--Sort: MAX(traces.timestamp) DESC NULLS FIRST, fetch=4
-03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
+02)--Sort: max(traces.timestamp) DESC NULLS FIRST, fetch=4
+03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
 04)------TableScan: traces projection=[trace_id, timestamp]
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [MAX(traces.timestamp)@1 DESC], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[MAX(traces.timestamp)@1 DESC], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+02)--SortPreservingMergeExec: [max(traces.timestamp)@1 DESC], fetch=4
+03)----SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 DESC], preserve_partitioning=[true]
+04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
 09)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
 
@@ -96,18 +96,18 @@ explain select trace_id, MAX(timestamp) from traces group by trace_id order by M
 ----
 logical_plan
 01)Limit: skip=0, fetch=4
-02)--Sort: MAX(traces.timestamp) DESC NULLS FIRST, fetch=4
-03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
+02)--Sort: max(traces.timestamp) DESC NULLS FIRST, fetch=4
+03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
 04)------TableScan: traces projection=[trace_id, timestamp]
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [MAX(traces.timestamp)@1 DESC], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[MAX(traces.timestamp)@1 DESC], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)], lim=[4]
+02)--SortPreservingMergeExec: [max(traces.timestamp)@1 DESC], fetch=4
+03)----SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 DESC], preserve_partitioning=[true]
+04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)], lim=[4]
+08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
 09)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TT
@@ -115,18 +115,18 @@ explain select trace_id, MIN(timestamp) from traces group by trace_id order by M
 ----
 logical_plan
 01)Limit: skip=0, fetch=4
-02)--Sort: MIN(traces.timestamp) DESC NULLS FIRST, fetch=4
-03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MIN(traces.timestamp)]]
+02)--Sort: min(traces.timestamp) DESC NULLS FIRST, fetch=4
+03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[min(traces.timestamp)]]
 04)------TableScan: traces projection=[trace_id, timestamp]
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [MIN(traces.timestamp)@1 DESC], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[MIN(traces.timestamp)@1 DESC], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MIN(traces.timestamp)]
+02)--SortPreservingMergeExec: [min(traces.timestamp)@1 DESC], fetch=4
+03)----SortExec: TopK(fetch=4), expr=[min(traces.timestamp)@1 DESC], preserve_partitioning=[true]
+04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MIN(traces.timestamp)]
+08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
 09)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TT
@@ -134,18 +134,18 @@ explain select trace_id, MAX(timestamp) from traces group by trace_id order by M
 ----
 logical_plan
 01)Limit: skip=0, fetch=4
-02)--Sort: MAX(traces.timestamp) ASC NULLS LAST, fetch=4
-03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
+02)--Sort: max(traces.timestamp) ASC NULLS LAST, fetch=4
+03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
 04)------TableScan: traces projection=[trace_id, timestamp]
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [MAX(traces.timestamp)@1 ASC NULLS LAST], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[MAX(traces.timestamp)@1 ASC NULLS LAST], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+02)--SortPreservingMergeExec: [max(traces.timestamp)@1 ASC NULLS LAST], fetch=4
+03)----SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 ASC NULLS LAST], preserve_partitioning=[true]
+04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
 09)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TT
@@ -154,21 +154,21 @@ explain select trace_id, MAX(timestamp) from traces group by trace_id order by t
 logical_plan
 01)Limit: skip=0, fetch=4
 02)--Sort: traces.trace_id ASC NULLS LAST, fetch=4
-03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[MAX(traces.timestamp)]]
+03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
 04)------TableScan: traces projection=[trace_id, timestamp]
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=4
 02)--SortPreservingMergeExec: [trace_id@0 ASC NULLS LAST], fetch=4
 03)----SortExec: TopK(fetch=4), expr=[trace_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
+08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
 09)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TI
-select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
+select trace_id, max(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
 ----
 c 4
 b 3
@@ -176,7 +176,7 @@ a 1
 NULL 0
 
 query TI
-select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) asc limit 4;
+select trace_id, min(timestamp) from traces group by trace_id order by MIN(timestamp) asc limit 4;
 ----
 b -2
 a -1
@@ -184,21 +184,21 @@ NULL 0
 c 2
 
 query TI
-select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 3;
+select trace_id, max(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 3;
 ----
 c 4
 b 3
 a 1
 
 query TI
-select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) asc limit 3;
+select trace_id, min(timestamp) from traces group by trace_id order by MIN(timestamp) asc limit 3;
 ----
 b -2
 a -1
 NULL 0
 
 query TII
-select trace_id, other, MIN(timestamp) from traces group by trace_id, other order by MIN(timestamp) asc limit 4;
+select trace_id, other, min(timestamp) from traces group by trace_id, other order by MIN(timestamp) asc limit 4;
 ----
 b 0 -2
 a -1 -1
@@ -206,7 +206,7 @@ NULL 0 0
 a 1 1
 
 query TII
-select trace_id, MIN(other), MIN(timestamp) from traces group by trace_id order by MIN(timestamp), MIN(other) limit 4;
+select trace_id, min(other), MIN(timestamp) from traces group by trace_id order by MIN(timestamp), MIN(other) limit 4;
 ----
 b 0 -2
 a -1 -1
diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt
index b08d329d4a863..1d5f9ba23d580 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -2606,12 +2606,12 @@ query TT
 explain select min(a) filter (where a > 1) as x from t;
 ----
 logical_plan
-01)Projection: MIN(t.a) FILTER (WHERE t.a > Int64(1)) AS x
-02)--Aggregate: groupBy=[[]], aggr=[[MIN(t.a) FILTER (WHERE t.a > Float32(1)) AS MIN(t.a) FILTER (WHERE t.a > Int64(1))]]
+01)Projection: min(t.a) FILTER (WHERE t.a > Int64(1)) AS x
+02)--Aggregate: groupBy=[[]], aggr=[[min(t.a) FILTER (WHERE t.a > Float32(1)) AS min(t.a) FILTER (WHERE t.a > Int64(1))]]
 03)----TableScan: t projection=[a]
 physical_plan
-01)ProjectionExec: expr=[MIN(t.a) FILTER (WHERE t.a > Int64(1))@0 as x]
-02)--AggregateExec: mode=Single, gby=[], aggr=[MIN(t.a) FILTER (WHERE t.a > Int64(1))]
+01)ProjectionExec: expr=[min(t.a) FILTER (WHERE t.a > Int64(1))@0 as x]
+02)--AggregateExec: mode=Single, gby=[], aggr=[min(t.a) FILTER (WHERE t.a > Int64(1))]
 03)----MemoryExec: partitions=1, partition_sizes=[1]
 
 
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index a97c979c43a34..bd096f61fb5da 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -4169,33 +4169,33 @@ query TT
 EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT x) FROM t1 GROUP BY y;
 ----
 logical_plan
-01)Projection: sum(DISTINCT t1.x), MAX(DISTINCT t1.x)
-02)--Aggregate: groupBy=[[t1.y]], aggr=[[sum(DISTINCT CAST(t1.x AS Float64)), MAX(DISTINCT t1.x)]]
+01)Projection: sum(DISTINCT t1.x), max(DISTINCT t1.x)
+02)--Aggregate: groupBy=[[t1.y]], aggr=[[sum(DISTINCT CAST(t1.x AS Float64)), max(DISTINCT t1.x)]]
 03)----TableScan: t1 projection=[x, y]
 physical_plan
-01)ProjectionExec: expr=[sum(DISTINCT t1.x)@1 as sum(DISTINCT t1.x), MAX(DISTINCT t1.x)@2 as MAX(DISTINCT t1.x)]
-02)--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[sum(DISTINCT t1.x), MAX(DISTINCT t1.x)]
+01)ProjectionExec: expr=[sum(DISTINCT t1.x)@1 as sum(DISTINCT t1.x), max(DISTINCT t1.x)@2 as max(DISTINCT t1.x)]
+02)--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[sum(DISTINCT t1.x), max(DISTINCT t1.x)]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
 05)--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-06)----------AggregateExec: mode=Partial, gby=[y@1 as y], aggr=[sum(DISTINCT t1.x), MAX(DISTINCT t1.x)]
+06)----------AggregateExec: mode=Partial, gby=[y@1 as y], aggr=[sum(DISTINCT t1.x), max(DISTINCT t1.x)]
 07)------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TT
 EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT CAST(x AS DOUBLE)) FROM t1 GROUP BY y;
 ----
 logical_plan
-01)Projection: sum(alias1) AS sum(DISTINCT t1.x), MAX(alias1) AS MAX(DISTINCT t1.x)
-02)--Aggregate: groupBy=[[t1.y]], aggr=[[sum(alias1), MAX(alias1)]]
+01)Projection: sum(alias1) AS sum(DISTINCT t1.x), max(alias1) AS max(DISTINCT t1.x)
+02)--Aggregate: groupBy=[[t1.y]], aggr=[[sum(alias1), max(alias1)]]
 03)----Aggregate: groupBy=[[t1.y, __common_expr_1 AS t1.x AS alias1]], aggr=[[]]
 04)------Projection: CAST(t1.x AS Float64) AS __common_expr_1, t1.y
 05)--------TableScan: t1 projection=[x, y]
 physical_plan
-01)ProjectionExec: expr=[sum(alias1)@1 as sum(DISTINCT t1.x), MAX(alias1)@2 as MAX(DISTINCT t1.x)]
-02)--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[sum(alias1), MAX(alias1)]
+01)ProjectionExec: expr=[sum(alias1)@1 as sum(DISTINCT t1.x), max(alias1)@2 as max(DISTINCT t1.x)]
+02)--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[sum(alias1), max(alias1)]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
-05)--------AggregateExec: mode=Partial, gby=[y@0 as y], aggr=[sum(alias1), MAX(alias1)]
+05)--------AggregateExec: mode=Partial, gby=[y@0 as y], aggr=[sum(alias1), max(alias1)]
 06)----------AggregateExec: mode=FinalPartitioned, gby=[y@0 as y, alias1@1 as alias1], aggr=[]
 07)------------CoalesceBatchesExec: target_batch_size=2
 08)--------------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8), input_partitions=8
@@ -4396,18 +4396,18 @@ EXPLAIN SELECT c1, count(distinct c2), min(distinct c2), sum(c3), max(c4) FROM a
 ----
 logical_plan
 01)Sort: aggregate_test_100.c1 ASC NULLS LAST
-02)--Projection: aggregate_test_100.c1, count(alias1) AS count(DISTINCT aggregate_test_100.c2), MIN(alias1) AS MIN(DISTINCT aggregate_test_100.c2), sum(alias2) AS sum(aggregate_test_100.c3), MAX(alias3) AS MAX(aggregate_test_100.c4)
-03)----Aggregate: groupBy=[[aggregate_test_100.c1]], aggr=[[count(alias1), MIN(alias1), sum(alias2), MAX(alias3)]]
-04)------Aggregate: groupBy=[[aggregate_test_100.c1, aggregate_test_100.c2 AS alias1]], aggr=[[sum(CAST(aggregate_test_100.c3 AS Int64)) AS alias2, MAX(aggregate_test_100.c4) AS alias3]]
+02)--Projection: aggregate_test_100.c1, count(alias1) AS count(DISTINCT aggregate_test_100.c2), min(alias1) AS min(DISTINCT aggregate_test_100.c2), sum(alias2) AS sum(aggregate_test_100.c3), max(alias3) AS max(aggregate_test_100.c4)
+03)----Aggregate: groupBy=[[aggregate_test_100.c1]], aggr=[[count(alias1), min(alias1), sum(alias2), max(alias3)]]
+04)------Aggregate: groupBy=[[aggregate_test_100.c1, aggregate_test_100.c2 AS alias1]], aggr=[[sum(CAST(aggregate_test_100.c3 AS Int64)) AS alias2, max(aggregate_test_100.c4) AS alias3]]
 05)--------TableScan: aggregate_test_100 projection=[c1, c2, c3, c4]
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
 02)--SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----ProjectionExec: expr=[c1@0 as c1, count(alias1)@1 as count(DISTINCT aggregate_test_100.c2), MIN(alias1)@2 as MIN(DISTINCT aggregate_test_100.c2), sum(alias2)@3 as sum(aggregate_test_100.c3), MAX(alias3)@4 as MAX(aggregate_test_100.c4)]
-04)------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[count(alias1), MIN(alias1), sum(alias2), MAX(alias3)]
+03)----ProjectionExec: expr=[c1@0 as c1, count(alias1)@1 as count(DISTINCT aggregate_test_100.c2), min(alias1)@2 as min(DISTINCT aggregate_test_100.c2), sum(alias2)@3 as sum(aggregate_test_100.c3), max(alias3)@4 as max(aggregate_test_100.c4)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[count(alias1), min(alias1), sum(alias2), max(alias3)]
 05)--------CoalesceBatchesExec: target_batch_size=2
 06)----------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[count(alias1), MIN(alias1), sum(alias2), MAX(alias3)]
+07)------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[count(alias1), min(alias1), sum(alias2), max(alias3)]
 08)--------------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1, alias1@1 as alias1], aggr=[alias2, alias3]
 09)----------------CoalesceBatchesExec: target_batch_size=2
 10)------------------RepartitionExec: partitioning=Hash([c1@0, alias1@1], 8), input_partitions=8
@@ -4576,17 +4576,17 @@ LIMIT 4;
 ----
 logical_plan
 01)Limit: skip=0, fetch=4
-02)--Sort: MAX(timestamp_table.t1) DESC NULLS FIRST, fetch=4
-03)----Aggregate: groupBy=[[timestamp_table.c2]], aggr=[[MAX(timestamp_table.t1)]]
+02)--Sort: max(timestamp_table.t1) DESC NULLS FIRST, fetch=4
+03)----Aggregate: groupBy=[[timestamp_table.c2]], aggr=[[max(timestamp_table.t1)]]
 04)------TableScan: timestamp_table projection=[t1, c2]
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [MAX(timestamp_table.t1)@1 DESC], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[MAX(timestamp_table.t1)@1 DESC], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[c2@0 as c2], aggr=[MAX(timestamp_table.t1)], lim=[4]
+02)--SortPreservingMergeExec: [max(timestamp_table.t1)@1 DESC], fetch=4
+03)----SortExec: TopK(fetch=4), expr=[max(timestamp_table.t1)@1 DESC], preserve_partitioning=[true]
+04)------AggregateExec: mode=FinalPartitioned, gby=[c2@0 as c2], aggr=[max(timestamp_table.t1)], lim=[4]
 05)--------CoalesceBatchesExec: target_batch_size=2
 06)----------RepartitionExec: partitioning=Hash([c2@0], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[c2@1 as c2], aggr=[MAX(timestamp_table.t1)], lim=[4]
+07)------------AggregateExec: mode=Partial, gby=[c2@1 as c2], aggr=[max(timestamp_table.t1)], lim=[4]
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=4
 09)----------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/0.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/2.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/3.csv]]}, projection=[t1, c2], has_header=true
 
diff --git a/datafusion/sqllogictest/test_files/tpch/q15.slt.part b/datafusion/sqllogictest/test_files/tpch/q15.slt.part
index 630237779bd71..087ad3ffbba04 100644
--- a/datafusion/sqllogictest/test_files/tpch/q15.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q15.slt.part
@@ -52,7 +52,7 @@ order by
 logical_plan
 01)Sort: supplier.s_suppkey ASC NULLS LAST
 02)--Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue
-03)----Inner Join: revenue0.total_revenue = __scalar_sq_1.MAX(revenue0.total_revenue)
+03)----Inner Join: revenue0.total_revenue = __scalar_sq_1.max(revenue0.total_revenue)
 04)------Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue
 05)--------Inner Join: supplier.s_suppkey = revenue0.supplier_no
 06)----------TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone]
@@ -63,7 +63,7 @@ logical_plan
 11)------------------Filter: lineitem.l_shipdate >= Date32("1996-01-01") AND lineitem.l_shipdate < Date32("1996-04-01")
 12)--------------------TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1996-01-01"), lineitem.l_shipdate < Date32("1996-04-01")]
 13)------SubqueryAlias: __scalar_sq_1
-14)--------Aggregate: groupBy=[[]], aggr=[[MAX(revenue0.total_revenue)]]
+14)--------Aggregate: groupBy=[[]], aggr=[[max(revenue0.total_revenue)]]
 15)----------SubqueryAlias: revenue0
 16)------------Projection: sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue
 17)--------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
@@ -74,7 +74,7 @@ physical_plan
 01)SortPreservingMergeExec: [s_suppkey@0 ASC NULLS LAST]
 02)--SortExec: expr=[s_suppkey@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----CoalesceBatchesExec: target_batch_size=8192
-04)------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(total_revenue@4, MAX(revenue0.total_revenue)@0)], projection=[s_suppkey@0, s_name@1, s_address@2, s_phone@3, total_revenue@4]
+04)------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(total_revenue@4, max(revenue0.total_revenue)@0)], projection=[s_suppkey@0, s_name@1, s_address@2, s_phone@3, total_revenue@4]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([total_revenue@4], 4), input_partitions=4
 07)------------CoalesceBatchesExec: target_batch_size=8192
@@ -93,10 +93,10 @@ physical_plan
 20)------------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01
 21)--------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], has_header=false
 22)--------CoalesceBatchesExec: target_batch_size=8192
-23)----------RepartitionExec: partitioning=Hash([MAX(revenue0.total_revenue)@0], 4), input_partitions=1
-24)------------AggregateExec: mode=Final, gby=[], aggr=[MAX(revenue0.total_revenue)]
+23)----------RepartitionExec: partitioning=Hash([max(revenue0.total_revenue)@0], 4), input_partitions=1
+24)------------AggregateExec: mode=Final, gby=[], aggr=[max(revenue0.total_revenue)]
 25)--------------CoalescePartitionsExec
-26)----------------AggregateExec: mode=Partial, gby=[], aggr=[MAX(revenue0.total_revenue)]
+26)----------------AggregateExec: mode=Partial, gby=[], aggr=[max(revenue0.total_revenue)]
 27)------------------ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]
 28)--------------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
 29)----------------------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/tpch/q2.slt.part b/datafusion/sqllogictest/test_files/tpch/q2.slt.part
index 1b1293cd250e6..85dfefcd03f46 100644
--- a/datafusion/sqllogictest/test_files/tpch/q2.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q2.slt.part
@@ -66,7 +66,7 @@ logical_plan
 01)Limit: skip=0, fetch=10
 02)--Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST, fetch=10
 03)----Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
-04)------Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.MIN(partsupp.ps_supplycost)
+04)------Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.min(partsupp.ps_supplycost)
 05)--------Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name
 06)----------Inner Join: nation.n_regionkey = region.r_regionkey
 07)------------Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name, nation.n_regionkey
@@ -85,8 +85,8 @@ logical_plan
 20)--------------Filter: region.r_name = Utf8("EUROPE")
 21)----------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
 22)--------SubqueryAlias: __scalar_sq_1
-23)----------Projection: MIN(partsupp.ps_supplycost), partsupp.ps_partkey
-24)------------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
+23)----------Projection: min(partsupp.ps_supplycost), partsupp.ps_partkey
+24)------------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[min(partsupp.ps_supplycost)]]
 25)--------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost
 26)----------------Inner Join: nation.n_regionkey = region.r_regionkey
 27)------------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_regionkey
@@ -105,7 +105,7 @@ physical_plan
 03)----SortExec: TopK(fetch=10), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true]
 04)------ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment]
 05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, MIN(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8]
+06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 4), input_partitions=4
 09)----------------CoalesceBatchesExec: target_batch_size=8192
@@ -149,12 +149,12 @@ physical_plan
 47)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 48)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], has_header=false
 49)------------CoalesceBatchesExec: target_batch_size=8192
-50)--------------RepartitionExec: partitioning=Hash([ps_partkey@1, MIN(partsupp.ps_supplycost)@0], 4), input_partitions=4
-51)----------------ProjectionExec: expr=[MIN(partsupp.ps_supplycost)@1 as MIN(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]
-52)------------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[MIN(partsupp.ps_supplycost)]
+50)--------------RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4
+51)----------------ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]
+52)------------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
 53)--------------------CoalesceBatchesExec: target_batch_size=8192
 54)----------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-55)------------------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[MIN(partsupp.ps_supplycost)]
+55)------------------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
 56)--------------------------CoalesceBatchesExec: target_batch_size=8192
 57)----------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@2, r_regionkey@0)], projection=[ps_partkey@0, ps_supplycost@1]
 58)------------------------------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 126996e7398aa..aedbee35400c2 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -588,8 +588,8 @@ logical_plan
 06)----------EmptyRelation
 07)--Projection: b.x AS count, b.y AS n
 08)----SubqueryAlias: b
-09)------Projection: Int64(1) AS x, MAX(Int64(10)) AS y
-10)--------Aggregate: groupBy=[[]], aggr=[[MAX(Int64(10))]]
+09)------Projection: Int64(1) AS x, max(Int64(10)) AS y
+10)--------Aggregate: groupBy=[[]], aggr=[[max(Int64(10))]]
 11)----------EmptyRelation
 physical_plan
 01)UnionExec
@@ -600,8 +600,8 @@ physical_plan
 06)----------AggregateExec: mode=Partial, gby=[n@0 as n], aggr=[count(*)], ordering_mode=Sorted
 07)------------ProjectionExec: expr=[5 as n]
 08)--------------PlaceholderRowExec
-09)--ProjectionExec: expr=[1 as count, MAX(Int64(10))@0 as n]
-10)----AggregateExec: mode=Single, gby=[], aggr=[MAX(Int64(10))]
+09)--ProjectionExec: expr=[1 as count, max(Int64(10))@0 as n]
+10)----AggregateExec: mode=Single, gby=[], aggr=[max(Int64(10))]
 11)------PlaceholderRowExec
 
 
diff --git a/datafusion/sqllogictest/test_files/update.slt b/datafusion/sqllogictest/test_files/update.slt
index 3d455d7a88ca7..59133379d4431 100644
--- a/datafusion/sqllogictest/test_files/update.slt
+++ b/datafusion/sqllogictest/test_files/update.slt
@@ -53,8 +53,8 @@ logical_plan
 01)Dml: op=[Update] table=[t1]
 02)--Projection: t1.a AS a, (<subquery>) AS b, t1.c AS c, t1.d AS d
 03)----Subquery:
-04)------Projection: MAX(t2.b)
-05)--------Aggregate: groupBy=[[]], aggr=[[MAX(t2.b)]]
+04)------Projection: max(t2.b)
+05)--------Aggregate: groupBy=[[]], aggr=[[max(t2.b)]]
 06)----------Filter: outer_ref(t1.a) = t2.a
 07)------------TableScan: t2
 08)----TableScan: t1
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 212daa05a5d21..4f4b9749c561e 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -254,8 +254,8 @@ WITH _sample_data AS (
 ----
 logical_plan
 01)Sort: d.b ASC NULLS LAST
-02)--Projection: d.b, MAX(d.a) AS max_a
-03)----Aggregate: groupBy=[[d.b]], aggr=[[MAX(d.a)]]
+02)--Projection: d.b, max(d.a) AS max_a
+03)----Aggregate: groupBy=[[d.b]], aggr=[[max(d.a)]]
 04)------SubqueryAlias: d
 05)--------SubqueryAlias: _data2
 06)----------SubqueryAlias: s
@@ -272,11 +272,11 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [b@0 ASC NULLS LAST]
 02)--SortExec: expr=[b@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----ProjectionExec: expr=[b@0 as b, MAX(d.a)@1 as max_a]
-04)------AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[MAX(d.a)]
+03)----ProjectionExec: expr=[b@0 as b, max(d.a)@1 as max_a]
+04)------AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[max(d.a)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[MAX(d.a)], ordering_mode=Sorted
+07)------------AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[max(d.a)], ordering_mode=Sorted
 08)--------------UnionExec
 09)----------------ProjectionExec: expr=[1 as a, aa as b]
 10)------------------PlaceholderRowExec
@@ -337,8 +337,8 @@ WITH _sample_data AS (
 ----
 logical_plan
 01)Sort: d.b ASC NULLS LAST
-02)--Projection: d.b, MAX(d.a) AS max_a, MAX(d.seq)
-03)----Aggregate: groupBy=[[d.b]], aggr=[[MAX(d.a), MAX(d.seq)]]
+02)--Projection: d.b, max(d.a) AS max_a, max(d.seq)
+03)----Aggregate: groupBy=[[d.b]], aggr=[[max(d.a), max(d.seq)]]
 04)------SubqueryAlias: d
 05)--------SubqueryAlias: _data2
 06)----------Projection: ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS seq, s.a, s.b
@@ -356,8 +356,8 @@ logical_plan
 18)----------------------EmptyRelation
 physical_plan
 01)SortPreservingMergeExec: [b@0 ASC NULLS LAST]
-02)--ProjectionExec: expr=[b@0 as b, MAX(d.a)@1 as max_a, MAX(d.seq)@2 as MAX(d.seq)]
-03)----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[MAX(d.a), MAX(d.seq)], ordering_mode=Sorted
+02)--ProjectionExec: expr=[b@0 as b, max(d.a)@1 as max_a, max(d.seq)@2 as max(d.seq)]
+03)----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[max(d.a), max(d.seq)], ordering_mode=Sorted
 04)------ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b]
 05)--------BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 06)----------SortExec: expr=[b@1 ASC NULLS LAST,a@0 ASC NULLS LAST], preserve_partitioning=[true]
@@ -1254,16 +1254,16 @@ query TT
 EXPLAIN SELECT c2, MAX(c9) OVER (ORDER BY c2), SUM(c9) OVER (), MIN(c9) OVER (ORDER BY c2, c9) from aggregate_test_100
 ----
 logical_plan
-01)Projection: aggregate_test_100.c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+01)Projection: aggregate_test_100.c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
 02)--WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-03)----WindowAggr: windowExpr=[[MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-04)------WindowAggr: windowExpr=[[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+03)----WindowAggr: windowExpr=[[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------WindowAggr: windowExpr=[[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 05)--------TableScan: aggregate_test_100 projection=[c2, c9]
 physical_plan
-01)ProjectionExec: expr=[c2@0 as c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+01)ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-03)----BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 05)--------SortExec: expr=[c2@0 ASC NULLS LAST,c9@1 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], has_header=true
 
@@ -1277,18 +1277,18 @@ EXPLAIN SELECT c2, MAX(c9) OVER (ORDER BY c9, c2), SUM(c9) OVER (), MIN(c9) OVER
 ----
 logical_plan
 01)Sort: aggregate_test_100.c2 ASC NULLS LAST
-02)--Projection: aggregate_test_100.c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+02)--Projection: aggregate_test_100.c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
 03)----WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-04)------WindowAggr: windowExpr=[[MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-05)--------WindowAggr: windowExpr=[[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------WindowAggr: windowExpr=[[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+05)--------WindowAggr: windowExpr=[[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c2, c9]
 physical_plan
 01)SortExec: expr=[c2@0 ASC NULLS LAST], preserve_partitioning=[false]
-02)--ProjectionExec: expr=[c2@0 as c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+02)--ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-04)------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST,c2@0 ASC NULLS LAST], preserve_partitioning=[false]
-06)----------BoundedWindowAggExec: wdw=[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 07)------------SortExec: expr=[c2@0 ASC NULLS LAST,c9@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], has_header=true
 
@@ -2569,21 +2569,21 @@ logical_plan
 01)Projection: sum1, sum2, sum3, min1, min2, min3, max1, max2, max3, cnt1, cnt2, sumr1, sumr2, sumr3, minr1, minr2, minr3, maxr1, maxr2, maxr3, cntr1, cntr2, sum4, cnt3
 02)--Limit: skip=0, fetch=5
 03)----Sort: annotated_data_finite.inc_col DESC NULLS FIRST, fetch=5
-04)------Projection: sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS sum3, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS min1, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS min2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS min3, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS max1, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS max2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS max3, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING AS cnt1, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING AS sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING AS sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sumr3, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS minr1, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS minr2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS minr3, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS maxr1, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS maxr2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS maxr3, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS cntr1, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS sum4, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cnt3, annotated_data_finite.inc_col
+04)------Projection: sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS sum3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS min1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS min2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS min3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS max1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS max2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS max3, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING AS cnt1, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING AS sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING AS sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sumr3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS minr1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS minr2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS minr3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS maxr1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS maxr2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS maxr3, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS cntr1, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS sum4, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cnt3, annotated_data_finite.inc_col
 05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
-06)----------Projection: __common_expr_1, annotated_data_finite.inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING
-07)------------WindowAggr: windowExpr=[[sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
-08)--------------WindowAggr: windowExpr=[[sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
+06)----------Projection: __common_expr_1, annotated_data_finite.inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING
+07)------------WindowAggr: windowExpr=[[sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
+08)--------------WindowAggr: windowExpr=[[sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
 09)----------------Projection: CAST(annotated_data_finite.desc_col AS Int64) AS __common_expr_1, CAST(annotated_data_finite.inc_col AS Int64) AS __common_expr_2, annotated_data_finite.ts, annotated_data_finite.inc_col, annotated_data_finite.desc_col
 10)------------------TableScan: annotated_data_finite projection=[ts, inc_col, desc_col]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, sum3@2 as sum3, min1@3 as min1, min2@4 as min2, min3@5 as min3, max1@6 as max1, max2@7 as max2, max3@8 as max3, cnt1@9 as cnt1, cnt2@10 as cnt2, sumr1@11 as sumr1, sumr2@12 as sumr2, sumr3@13 as sumr3, minr1@14 as minr1, minr2@15 as minr2, minr3@16 as minr3, maxr1@17 as maxr1, maxr2@18 as maxr2, maxr3@19 as maxr3, cntr1@20 as cntr1, cntr2@21 as cntr2, sum4@22 as sum4, cnt3@23 as cnt3]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@24 DESC], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@14 as sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@15 as sum3, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as min1, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as min2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as min3, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as max1, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as max2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as max3, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@22 as cnt1, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@23 as cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@2 as sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@3 as sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sumr3, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as minr1, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@6 as minr2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@7 as minr3, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as maxr1, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as maxr2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as maxr3, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@11 as cntr1, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@12 as cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as sum4, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@25 as cnt3, inc_col@1 as inc_col]
+03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@14 as sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@15 as sum3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as min1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as min2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as min3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as max1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as max2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as max3, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@22 as cnt1, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@23 as cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@2 as sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@3 as sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sumr3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as minr1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@6 as minr2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@7 as minr3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as maxr1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as maxr2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as maxr3, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@11 as cntr1, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@12 as cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as sum4, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@25 as cnt3, inc_col@1 as inc_col]
 04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(8)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(8)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(2)), end_bound: Following(Int32(6)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(8)), is_causal: false }], mode=[Sorted]
+05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(8)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(8)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(2)), end_bound: Following(Int32(6)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(8)), is_causal: false }], mode=[Sorted]
 08)--------------ProjectionExec: expr=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col, desc_col@2 as desc_col]
 09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
@@ -2731,17 +2731,17 @@ logical_plan
 01)Projection: sum1, sum2, min1, min2, max1, max2, count1, count2, avg1, avg2
 02)--Limit: skip=0, fetch=5
 03)----Sort: annotated_data_finite.inc_col ASC NULLS LAST, fetch=5
-04)------Projection: sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS sum2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS min1, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS min2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS max1, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS avg2, annotated_data_finite.inc_col
-05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING]]
-06)----------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, avg(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING]]
+04)------Projection: sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS avg2, annotated_data_finite.inc_col
+05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING]]
+06)----------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, avg(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING]]
 07)------------Projection: CAST(annotated_data_finite.inc_col AS Int64) AS __common_expr_1, CAST(annotated_data_finite.inc_col AS Float64) AS __common_expr_2, annotated_data_finite.ts, annotated_data_finite.inc_col
 08)--------------TableScan: annotated_data_finite projection=[ts, inc_col]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, min1@2 as min1, min2@3 as min2, max1@4 as max1, max2@5 as max2, count1@6 as count1, count2@7 as count2, avg1@8 as avg1, avg2@9 as avg2]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@10 ASC NULLS LAST], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }], mode=[Sorted]
+03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }], mode=[Sorted]
 06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col]
 07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
@@ -3333,18 +3333,18 @@ EXPLAIN SELECT
 logical_plan
 01)Limit: skip=0, fetch=5
 02)--Sort: aggregate_test_100.c3 ASC NULLS LAST, fetch=5
-03)----Projection: aggregate_test_100.c3, MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max1
-04)------WindowAggr: windowExpr=[[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-05)--------Projection: aggregate_test_100.c3, aggregate_test_100.c12, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-06)----------WindowAggr: windowExpr=[[MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+03)----Projection: aggregate_test_100.c3, max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max1
+04)------WindowAggr: windowExpr=[[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+05)--------Projection: aggregate_test_100.c3, aggregate_test_100.c12, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+06)----------WindowAggr: windowExpr=[[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 07)------------TableScan: aggregate_test_100 projection=[c3, c11, c12]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c3@0 ASC NULLS LAST], preserve_partitioning=[false]
-02)--ProjectionExec: expr=[c3@0 as c3, MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as max1]
-03)----BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+02)--ProjectionExec: expr=[c3@0 as c3, max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as max1]
+03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------ProjectionExec: expr=[c3@0 as c3, c12@2 as c12, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-06)----------WindowAggExec: wdw=[MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------ProjectionExec: expr=[c3@0 as c3, c12@2 as c12, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
+06)----------WindowAggExec: wdw=[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 07)------------SortExec: expr=[c11@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c11, c12], has_header=true
 
@@ -3379,14 +3379,14 @@ logical_plan
 01)Projection: min1, max1
 02)--Limit: skip=0, fetch=5
 03)----Sort: aggregate_test_100.c3 ASC NULLS LAST, fetch=5
-04)------Projection: MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS max1, aggregate_test_100.c3
-05)--------WindowAggr: windowExpr=[[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------Projection: max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS max1, aggregate_test_100.c3
+05)--------WindowAggr: windowExpr=[[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c3, c12]
 physical_plan
 01)ProjectionExec: expr=[min1@0 as min1, max1@1 as max1]
 02)--SortExec: TopK(fetch=5), expr=[c3@2 ASC NULLS LAST], preserve_partitioning=[false]
-03)----ProjectionExec: expr=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min1, MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max1, c3@0 as c3]
-04)------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }, MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+03)----ProjectionExec: expr=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min1, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max1, c3@0 as c3]
+04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 05)--------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c12], has_header=true
 
@@ -3486,16 +3486,16 @@ EXPLAIN SELECT MIN(d) OVER(ORDER BY c ASC) as min1,
 FROM multiple_ordered_table
 ----
 logical_plan
-01)Projection: MIN(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS max1
-02)--WindowAggr: windowExpr=[[MIN(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-03)----Projection: multiple_ordered_table.c, multiple_ordered_table.d, MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-04)------WindowAggr: windowExpr=[[MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+01)Projection: min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS max1
+02)--WindowAggr: windowExpr=[[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+03)----Projection: multiple_ordered_table.c, multiple_ordered_table.d, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+04)------WindowAggr: windowExpr=[[max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 05)--------TableScan: multiple_ordered_table projection=[a, b, c, d]
 physical_plan
-01)ProjectionExec: expr=[MIN(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max1]
-02)--BoundedWindowAggExec: wdw=[MIN(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-03)----ProjectionExec: expr=[c@2 as c, d@3 as d, MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-04)------BoundedWindowAggExec: wdw=[MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+01)ProjectionExec: expr=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max1]
+02)--BoundedWindowAggExec: wdw=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+03)----ProjectionExec: expr=[c@2 as c, d@3 as d, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+04)------BoundedWindowAggExec: wdw=[max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_orderings=[[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], has_header=true
 
 query TT
@@ -3506,13 +3506,13 @@ FROM(
   WHERE d=0)
 ----
 logical_plan
-01)Projection: MAX(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS max_c
-02)--WindowAggr: windowExpr=[[MAX(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+01)Projection: max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS max_c
+02)--WindowAggr: windowExpr=[[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 03)----Filter: multiple_ordered_table.d = Int32(0)
 04)------TableScan: multiple_ordered_table projection=[c, d], partial_filters=[multiple_ordered_table.d = Int32(0)]
 physical_plan
-01)ProjectionExec: expr=[MAX(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max_c]
-02)--BoundedWindowAggExec: wdw=[MAX(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+01)ProjectionExec: expr=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max_c]
+02)--BoundedWindowAggExec: wdw=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------FilterExec: d@1 = 0
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], output_ordering=[c@0 ASC NULLS LAST], has_header=true
diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs
index fc5f82127d052..360377c231a36 100644
--- a/datafusion/substrait/tests/cases/consumer_integration.rs
+++ b/datafusion/substrait/tests/cases/consumer_integration.rs
@@ -98,7 +98,7 @@ mod tests {
             \n      Projection: FILENAME_PLACEHOLDER_1.s_acctbal, FILENAME_PLACEHOLDER_1.s_name, FILENAME_PLACEHOLDER_3.n_name, FILENAME_PLACEHOLDER_0.p_partkey, FILENAME_PLACEHOLDER_0.p_mfgr, FILENAME_PLACEHOLDER_1.s_address, FILENAME_PLACEHOLDER_1.s_phone, FILENAME_PLACEHOLDER_1.s_comment\
             \n        Filter: FILENAME_PLACEHOLDER_0.p_partkey = FILENAME_PLACEHOLDER_2.ps_partkey AND FILENAME_PLACEHOLDER_1.s_suppkey = FILENAME_PLACEHOLDER_2.ps_suppkey AND FILENAME_PLACEHOLDER_0.p_size = Int32(15) AND FILENAME_PLACEHOLDER_0.p_type LIKE CAST(Utf8(\"%BRASS\") AS Utf8) AND FILENAME_PLACEHOLDER_1.s_nationkey = FILENAME_PLACEHOLDER_3.n_nationkey AND FILENAME_PLACEHOLDER_3.n_regionkey = FILENAME_PLACEHOLDER_4.r_regionkey AND FILENAME_PLACEHOLDER_4.r_name = CAST(Utf8(\"EUROPE\") AS Utf8) AND FILENAME_PLACEHOLDER_2.ps_supplycost = (<subquery>)\
             \n          Subquery:\
-            \n            Aggregate: groupBy=[[]], aggr=[[MIN(FILENAME_PLACEHOLDER_5.ps_supplycost)]]\
+            \n            Aggregate: groupBy=[[]], aggr=[[min(FILENAME_PLACEHOLDER_5.ps_supplycost)]]\
             \n              Projection: FILENAME_PLACEHOLDER_5.ps_supplycost\
             \n                Filter: FILENAME_PLACEHOLDER_5.ps_partkey = FILENAME_PLACEHOLDER_5.ps_partkey AND FILENAME_PLACEHOLDER_6.s_suppkey = FILENAME_PLACEHOLDER_5.ps_suppkey AND FILENAME_PLACEHOLDER_6.s_nationkey = FILENAME_PLACEHOLDER_7.n_nationkey AND FILENAME_PLACEHOLDER_7.n_regionkey = FILENAME_PLACEHOLDER_8.r_regionkey AND FILENAME_PLACEHOLDER_8.r_name = CAST(Utf8(\"EUROPE\") AS Utf8)\
             \n                  Inner Join:  Filter: Boolean(true)\
diff --git a/docs/source/library-user-guide/using-the-sql-api.md b/docs/source/library-user-guide/using-the-sql-api.md
index 9c32004db4359..f78cf16f4cb67 100644
--- a/docs/source/library-user-guide/using-the-sql-api.md
+++ b/docs/source/library-user-guide/using-the-sql-api.md
@@ -52,13 +52,13 @@ async fn main() -> Result<()> {
     // register the "example" table
     ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
     // create a plan to run a SQL query
-    let df = ctx.sql("SELECT a, MIN(b) FROM example WHERE a <= b GROUP BY a LIMIT 100").await?;
+    let df = ctx.sql("SELECT a, min(b) FROM example WHERE a <= b GROUP BY a LIMIT 100").await?;
     // execute the plan and collect the results as Vec<RecordBatch>
     let results: Vec<RecordBatch> = df.collect().await?;
     // Use the assert_batches_eq macro to compare the results with expected output
     datafusion::assert_batches_eq!(vec![
         "+---+----------------+",
-        "| a | MIN(example.b) |",
+        "| a | min(example.b) |",
         "+---+----------------+",
         "| 1 | 2              |",
         "+---+----------------+",

From 5c4254aeb310731fff3fb29c95571e3db2efb41e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Mon, 5 Aug 2024 22:50:40 +0800
Subject: [PATCH 218/357] Don't implement create_sliding_accumulator repeatedly
 (#11813)

---
 datafusion/expr/src/test/function_stub.rs         |  7 -------
 datafusion/functions-aggregate/src/average.rs     |  7 -------
 datafusion/functions-aggregate/src/bool_and_or.rs | 14 --------------
 datafusion/functions-aggregate/src/regr.rs        |  7 -------
 4 files changed, 35 deletions(-)

diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs
index 22af84db3f86a..b4f768085fcc3 100644
--- a/datafusion/expr/src/test/function_stub.rs
+++ b/datafusion/expr/src/test/function_stub.rs
@@ -202,13 +202,6 @@ impl AggregateUDFImpl for Sum {
         unreachable!("stub should not have accumulate()")
     }
 
-    fn create_sliding_accumulator(
-        &self,
-        _args: AccumulatorArgs,
-    ) -> Result<Box<dyn Accumulator>> {
-        unreachable!("stub should not have accumulate()")
-    }
-
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index 228bce1979a38..288e0b09f8092 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -215,13 +215,6 @@ impl AggregateUDFImpl for Avg {
         &self.aliases
     }
 
-    fn create_sliding_accumulator(
-        &self,
-        args: AccumulatorArgs,
-    ) -> Result<Box<dyn Accumulator>> {
-        self.accumulator(args)
-    }
-
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
diff --git a/datafusion/functions-aggregate/src/bool_and_or.rs b/datafusion/functions-aggregate/src/bool_and_or.rs
index d0028672743ed..b91fbb9ff7095 100644
--- a/datafusion/functions-aggregate/src/bool_and_or.rs
+++ b/datafusion/functions-aggregate/src/bool_and_or.rs
@@ -165,13 +165,6 @@ impl AggregateUDFImpl for BoolAnd {
         &[]
     }
 
-    fn create_sliding_accumulator(
-        &self,
-        _: AccumulatorArgs,
-    ) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::<BoolAndAccumulator>::default())
-    }
-
     fn order_sensitivity(&self) -> AggregateOrderSensitivity {
         AggregateOrderSensitivity::Insensitive
     }
@@ -292,13 +285,6 @@ impl AggregateUDFImpl for BoolOr {
         &[]
     }
 
-    fn create_sliding_accumulator(
-        &self,
-        _: AccumulatorArgs,
-    ) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::<BoolOrAccumulator>::default())
-    }
-
     fn order_sensitivity(&self) -> AggregateOrderSensitivity {
         AggregateOrderSensitivity::Insensitive
     }
diff --git a/datafusion/functions-aggregate/src/regr.rs b/datafusion/functions-aggregate/src/regr.rs
index aad110a13e136..390a769aca7f8 100644
--- a/datafusion/functions-aggregate/src/regr.rs
+++ b/datafusion/functions-aggregate/src/regr.rs
@@ -164,13 +164,6 @@ impl AggregateUDFImpl for Regr {
         Ok(Box::new(RegrAccumulator::try_new(&self.regr_type)?))
     }
 
-    fn create_sliding_accumulator(
-        &self,
-        _args: AccumulatorArgs,
-    ) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(RegrAccumulator::try_new(&self.regr_type)?))
-    }
-
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         Ok(vec![
             Field::new(

From f19d30d5261e3a525e2898a8f08f44744820e605 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 11:07:46 -0400
Subject: [PATCH 219/357] chore(deps): update rstest requirement from 0.21.0 to
 0.22.0 (#11811)

Updates the requirements on [rstest](https://github.com/la10736/rstest) to permit the latest version.
- [Release notes](https://github.com/la10736/rstest/releases)
- [Changelog](https://github.com/la10736/rstest/blob/master/CHANGELOG.md)
- [Commits](https://github.com/la10736/rstest/compare/v0.21.0...v0.21.0)

---
updated-dependencies:
- dependency-name: rstest
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 90aff3f715cab..38b5627bc1872 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -125,7 +125,7 @@ parquet = { version = "52.2.0", default-features = false, features = [
 ] }
 rand = "0.8"
 regex = "1.8"
-rstest = "0.21.0"
+rstest = "0.22.0"
 serde_json = "1"
 sqlparser = { version = "0.49", features = ["visitor"] }
 tempfile = "3"

From 0417e543e1b0de61f9eb48c9c1dd4af943faa394 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 5 Aug 2024 12:00:58 -0400
Subject: [PATCH 220/357] Minor: Update exected output due to logical conflict
 (#11824)

---
 datafusion/sqllogictest/test_files/aggregate.slt | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 9625f02afbb48..8a5222143356f 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -4530,21 +4530,21 @@ EXPLAIN SELECT DISTINCT c3, min(c1) FROM aggregate_test_100 group by c3 limit 5;
 ----
 logical_plan
 01)Limit: skip=0, fetch=5
-02)--Aggregate: groupBy=[[aggregate_test_100.c3, MIN(aggregate_test_100.c1)]], aggr=[[]]
-03)----Aggregate: groupBy=[[aggregate_test_100.c3]], aggr=[[MIN(aggregate_test_100.c1)]]
+02)--Aggregate: groupBy=[[aggregate_test_100.c3, min(aggregate_test_100.c1)]], aggr=[[]]
+03)----Aggregate: groupBy=[[aggregate_test_100.c3]], aggr=[[min(aggregate_test_100.c1)]]
 04)------TableScan: aggregate_test_100 projection=[c1, c3]
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--CoalescePartitionsExec
 03)----LocalLimitExec: fetch=5
-04)------AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3, MIN(aggregate_test_100.c1)@1 as MIN(aggregate_test_100.c1)], aggr=[], lim=[5]
+04)------AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3, min(aggregate_test_100.c1)@1 as min(aggregate_test_100.c1)], aggr=[], lim=[5]
 05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([c3@0, MIN(aggregate_test_100.c1)@1], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[c3@0 as c3, MIN(aggregate_test_100.c1)@1 as MIN(aggregate_test_100.c1)], aggr=[], lim=[5]
-08)--------------AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3], aggr=[MIN(aggregate_test_100.c1)]
+06)----------RepartitionExec: partitioning=Hash([c3@0, min(aggregate_test_100.c1)@1], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[c3@0 as c3, min(aggregate_test_100.c1)@1 as min(aggregate_test_100.c1)], aggr=[], lim=[5]
+08)--------------AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3], aggr=[min(aggregate_test_100.c1)]
 09)----------------CoalesceBatchesExec: target_batch_size=8192
 10)------------------RepartitionExec: partitioning=Hash([c3@0], 4), input_partitions=4
-11)--------------------AggregateExec: mode=Partial, gby=[c3@1 as c3], aggr=[MIN(aggregate_test_100.c1)]
+11)--------------------AggregateExec: mode=Partial, gby=[c3@1 as c3], aggr=[min(aggregate_test_100.c1)]
 12)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 13)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true
 

From f56a2ef28f669351d5faec23ca0a6edef5c53ee6 Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Tue, 6 Aug 2024 00:21:04 +0800
Subject: [PATCH 221/357] doc: Add support for `map` and `make_map` functions
 (#11799)

* doc: Add support for `map` and `make_map` functions

* chore: Add example for MAP
---
 .../source/user-guide/sql/scalar_functions.md | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 561824772af8c..c7490df04983e 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -3636,6 +3636,70 @@ Unwraps struct fields into columns.
 +-----------------------+-----------------------+
 ```
 
+## Map Functions
+
+- [map](#map)
+- [make_map](#make_map)
+
+### `map`
+
+Returns an Arrow map with the specified key-value pairs.
+
+```
+map(key, value)
+map(key: value)
+```
+
+#### Arguments
+
+- **key**: Expression to be used for key.
+  Can be a constant, column, or function, any combination of arithmetic or
+  string operators, or a named expression of previous listed.
+- **value**: Expression to be used for value.
+  Can be a constant, column, or function, any combination of arithmetic or
+  string operators, or a named expression of previous listed.
+
+#### Example
+
+```
+SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]);
+----
+{POST: 41, HEAD: 33, PATCH: }
+
+SELECT MAP([[1,2], [3,4]], ['a', 'b']);
+----
+{[1, 2]: a, [3, 4]: b}
+
+SELECT MAP { 'a': 1, 'b': 2 };
+----
+{a: 1, b: 2}
+```
+
+### `make_map`
+
+Returns an Arrow map with the specified key-value pairs.
+
+```
+make_map(key_1, value_1, ..., key_n, value_n)
+```
+
+#### Arguments
+
+- **key_n**: Expression to be used for key.
+  Can be a constant, column, or function, any combination of arithmetic or
+  string operators, or a named expression of previous listed.
+- **value_n**: Expression to be used for value.
+  Can be a constant, column, or function, any combination of arithmetic or
+  string operators, or a named expression of previous listed.
+
+#### Example
+
+```
+SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null);
+----
+{POST: 41, HEAD: 33, PATCH: }
+```
+
 ## Hashing Functions
 
 - [digest](#digest)

From 011a3f3e32e140190486d51cd98494de29be9ba5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Tue, 6 Aug 2024 02:31:36 +0800
Subject: [PATCH 222/357] Improve readme page in crates.io (#11809)

---
 datafusion/core/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 0415c3164b38c..0714c3e94a85a 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -20,7 +20,7 @@ name = "datafusion"
 description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
 keywords = ["arrow", "query", "sql"]
 include = ["benches/*.rs", "src/**/*.rs", "Cargo.toml"]
-readme = "README.md"
+readme = "../../README.md"
 version = { workspace = true }
 edition = { workspace = true }
 homepage = { workspace = true }

From c6f0d3cac93ef1436313160f1dba878745d693bb Mon Sep 17 00:00:00 2001
From: Simon Vandel Sillesen <simon.vandel@gmail.com>
Date: Mon, 5 Aug 2024 20:32:15 +0200
Subject: [PATCH 223/357] Pass scalar to `eq` inside `nullif`  (#11697)

* Properly specialize nullif for scalar (3x faster)

* missed feature flag

* fix test

* extract

* dodes -> does

Co-authored-by: Oleks V <comphead@users.noreply.github.com>

---------

Co-authored-by: Oleks V <comphead@users.noreply.github.com>
---
 datafusion/functions/Cargo.toml         |  5 +++
 datafusion/functions/benches/nullif.rs  | 42 +++++++++++++++++++++++++
 datafusion/functions/src/core/nullif.rs | 10 ++++--
 3 files changed, 54 insertions(+), 3 deletions(-)
 create mode 100644 datafusion/functions/benches/nullif.rs

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 0281676cabf2d..9675d03a01617 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -112,6 +112,11 @@ harness = false
 name = "make_date"
 required-features = ["datetime_expressions"]
 
+[[bench]]
+harness = false
+name = "nullif"
+required-features = ["core_expressions"]
+
 [[bench]]
 harness = false
 name = "date_bin"
diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs
new file mode 100644
index 0000000000000..dfabad335835f
--- /dev/null
+++ b/datafusion/functions/benches/nullif.rs
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::util::bench_util::create_string_array_with_len;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_common::ScalarValue;
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::core::nullif;
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let nullif = nullif();
+    for size in [1024, 4096, 8192] {
+        let array = Arc::new(create_string_array_with_len::<i32>(size, 0.2, 32));
+        let args = vec![
+            ColumnarValue::Scalar(ScalarValue::Utf8(Some("abcd".to_string()))),
+            ColumnarValue::Array(array),
+        ];
+        c.bench_function(&format!("nullif scalar array: {}", size), |b| {
+            b.iter(|| black_box(nullif.invoke(&args).unwrap()))
+        });
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs
index e8bf2db514c37..6fcfbd36416ef 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -19,7 +19,6 @@ use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
 
-use arrow::array::Array;
 use arrow::compute::kernels::cmp::eq;
 use arrow::compute::kernels::nullif::nullif;
 use datafusion_common::ScalarValue;
@@ -122,8 +121,13 @@ fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
             Ok(ColumnarValue::Array(array))
         }
         (ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => {
-            let lhs = lhs.to_array_of_size(rhs.len())?;
-            let array = nullif(&lhs, &eq(&lhs, &rhs)?)?;
+            let lhs_s = lhs.to_scalar()?;
+            let lhs_a = lhs.to_array_of_size(rhs.len())?;
+            let array = nullif(
+                // nullif in arrow-select does not support Datum, so we need to convert to array
+                lhs_a.as_ref(),
+                &eq(&lhs_s, &rhs)?,
+            )?;
             Ok(ColumnarValue::Array(array))
         }
         (ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {

From fcd907d78e32976b4f4dd93db5fb87a385630acd Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Tue, 6 Aug 2024 04:16:23 +0800
Subject: [PATCH 224/357] refactor: move `aggregate_statistics` to
 `datafusion-physical-optimizer` (#11798)

* refactor: move aggregate statistics to datafusion-physical-optimizer

* chore

* chore: Update cargo lock

* refactor: Move COUNT_STAR_EXPANSION to datafusion_common

* refactor: Move tests to core/tests
---
 datafusion-cli/Cargo.lock                     | 100 +--
 datafusion/common/src/utils/expr.rs           |  24 +
 datafusion/common/src/utils/mod.rs            |   1 +
 .../aggregate_statistics.rs                   | 657 ------------------
 .../limited_distinct_aggregation.rs           |   2 +-
 datafusion/core/src/physical_optimizer/mod.rs |   1 -
 datafusion/core/src/test_util/mod.rs          |  61 +-
 .../tests/physical_optimizer_integration.rs   | 325 +++++++++
 datafusion/expr/src/utils.rs                  |   4 +-
 .../src/aggregate_statistics.rs               | 298 ++++++++
 datafusion/physical-optimizer/src/lib.rs      |   1 +
 11 files changed, 770 insertions(+), 704 deletions(-)
 create mode 100644 datafusion/common/src/utils/expr.rs
 delete mode 100644 datafusion/core/src/physical_optimizer/aggregate_statistics.rs
 create mode 100644 datafusion/core/tests/physical_optimizer_integration.rs
 create mode 100644 datafusion/physical-optimizer/src/aggregate_statistics.rs

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 5884e424c781c..9d20c242bbef9 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -272,7 +272,7 @@ dependencies = [
  "arrow-schema",
  "chrono",
  "half",
- "indexmap 2.2.6",
+ "indexmap 2.3.0",
  "lexical-core",
  "num",
  "serde",
@@ -375,7 +375,7 @@ dependencies = [
  "tokio",
  "xz2",
  "zstd 0.13.2",
- "zstd-safe 7.2.0",
+ "zstd-safe 7.2.1",
 ]
 
 [[package]]
@@ -837,9 +837,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.6.1"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952"
+checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
 
 [[package]]
 name = "bytes-utils"
@@ -874,9 +874,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.6"
+version = "1.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f"
+checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc"
 dependencies = [
  "jobserver",
  "libc",
@@ -1161,7 +1161,7 @@ dependencies = [
  "glob",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.2.6",
+ "indexmap 2.3.0",
  "itertools 0.12.1",
  "log",
  "num-traits",
@@ -1357,7 +1357,7 @@ dependencies = [
  "datafusion-expr",
  "datafusion-physical-expr",
  "hashbrown 0.14.5",
- "indexmap 2.2.6",
+ "indexmap 2.3.0",
  "itertools 0.12.1",
  "log",
  "paste",
@@ -1384,7 +1384,7 @@ dependencies = [
  "half",
  "hashbrown 0.14.5",
  "hex",
- "indexmap 2.2.6",
+ "indexmap 2.3.0",
  "itertools 0.12.1",
  "log",
  "paste",
@@ -1436,7 +1436,7 @@ dependencies = [
  "futures",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.2.6",
+ "indexmap 2.3.0",
  "itertools 0.12.1",
  "log",
  "once_cell",
@@ -1629,9 +1629,9 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.0.30"
+version = "1.0.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
+checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -1801,7 +1801,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "http 0.2.12",
- "indexmap 2.2.6",
+ "indexmap 2.3.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -1820,7 +1820,7 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "http 1.1.0",
- "indexmap 2.2.6",
+ "indexmap 2.3.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -2112,9 +2112,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.2.6"
+version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0"
 dependencies = [
  "equivalent",
  "hashbrown 0.14.5",
@@ -2552,7 +2552,7 @@ dependencies = [
  "rand",
  "reqwest",
  "ring 0.17.8",
- "rustls-pemfile 2.1.2",
+ "rustls-pemfile 2.1.3",
  "serde",
  "serde_json",
  "snafu",
@@ -2682,7 +2682,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
 dependencies = [
  "fixedbitset",
- "indexmap 2.2.6",
+ "indexmap 2.3.0",
 ]
 
 [[package]]
@@ -2769,9 +2769,12 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
 
 [[package]]
 name = "ppv-lite86"
-version = "0.2.17"
+version = "0.2.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+dependencies = [
+ "zerocopy",
+]
 
 [[package]]
 name = "predicates"
@@ -2854,9 +2857,9 @@ dependencies = [
 
 [[package]]
 name = "quinn"
-version = "0.11.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4ceeeeabace7857413798eb1ffa1e9c905a9946a57d81fb69b4b71c4d8eb3ad"
+checksum = "b22d8e7369034b9a7132bc2008cac12f2013c8132b45e0554e6e20e2617f2156"
 dependencies = [
  "bytes",
  "pin-project-lite",
@@ -2864,6 +2867,7 @@ dependencies = [
  "quinn-udp",
  "rustc-hash",
  "rustls 0.23.12",
+ "socket2",
  "thiserror",
  "tokio",
  "tracing",
@@ -2871,9 +2875,9 @@ dependencies = [
 
 [[package]]
 name = "quinn-proto"
-version = "0.11.3"
+version = "0.11.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe"
+checksum = "ba92fb39ec7ad06ca2582c0ca834dfeadcaf06ddfc8e635c80aa7e1c05315fdd"
 dependencies = [
  "bytes",
  "rand",
@@ -2895,6 +2899,7 @@ dependencies = [
  "libc",
  "once_cell",
  "socket2",
+ "tracing",
  "windows-sys 0.52.0",
 ]
 
@@ -2969,9 +2974,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.10.5"
+version = "1.10.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
+checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3029,7 +3034,7 @@ dependencies = [
  "quinn",
  "rustls 0.23.12",
  "rustls-native-certs 0.7.1",
- "rustls-pemfile 2.1.2",
+ "rustls-pemfile 2.1.3",
  "rustls-pki-types",
  "serde",
  "serde_json",
@@ -3117,9 +3122,9 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
 
 [[package]]
 name = "rustc-hash"
-version = "1.1.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
 
 [[package]]
 name = "rustc_version"
@@ -3188,7 +3193,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a88d6d420651b496bdd98684116959239430022a115c1240e6c3993be0b15fba"
 dependencies = [
  "openssl-probe",
- "rustls-pemfile 2.1.2",
+ "rustls-pemfile 2.1.3",
  "rustls-pki-types",
  "schannel",
  "security-framework",
@@ -3205,9 +3210,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pemfile"
-version = "2.1.2"
+version = "2.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d"
+checksum = "196fe16b00e106300d3e45ecfcb764fa292a535d7326a29a5875c579c7417425"
 dependencies = [
  "base64 0.22.1",
  "rustls-pki-types",
@@ -3356,9 +3361,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.121"
+version = "1.0.122"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ab380d7d9f22ef3f21ad3e6c1ebe8e4fc7a2000ccba2e4d71fc96f15b2cb609"
+checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
 dependencies = [
  "itoa",
  "memchr",
@@ -3585,12 +3590,13 @@ checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
 
 [[package]]
 name = "tempfile"
-version = "3.10.1"
+version = "3.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
+checksum = "b8fcd239983515c23a32fb82099f97d0b11b8c72f654ed659363a95c3dad7a53"
 dependencies = [
  "cfg-if",
  "fastrand 2.1.0",
+ "once_cell",
  "rustix",
  "windows-sys 0.52.0",
 ]
@@ -4119,11 +4125,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
 name = "winapi-util"
-version = "0.1.8"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -4159,6 +4165,15 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-targets"
 version = "0.48.5"
@@ -4311,6 +4326,7 @@ version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
 dependencies = [
+ "byteorder",
  "zerocopy-derive",
 ]
 
@@ -4346,7 +4362,7 @@ version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
 dependencies = [
- "zstd-safe 7.2.0",
+ "zstd-safe 7.2.1",
 ]
 
 [[package]]
@@ -4361,9 +4377,9 @@ dependencies = [
 
 [[package]]
 name = "zstd-safe"
-version = "7.2.0"
+version = "7.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa"
+checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059"
 dependencies = [
  "zstd-sys",
 ]
diff --git a/datafusion/common/src/utils/expr.rs b/datafusion/common/src/utils/expr.rs
new file mode 100644
index 0000000000000..0fe4546b85382
--- /dev/null
+++ b/datafusion/common/src/utils/expr.rs
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Expression utilities
+
+use crate::ScalarValue;
+
+///  The value to which `COUNT(*)` is expanded to in
+///  `COUNT(<constant>)` expressions
+pub const COUNT_STAR_EXPANSION: ScalarValue = ScalarValue::Int64(Some(1));
diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index 8b025255f5df7..58dc8f40b5773 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -17,6 +17,7 @@
 
 //! This module provides the bisect function, which implements binary search.
 
+pub mod expr;
 pub mod memory;
 pub mod proxy;
 
diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
deleted file mode 100644
index a0f6f6a65b1f7..0000000000000
--- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
+++ /dev/null
@@ -1,657 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utilizing exact statistics from sources to avoid scanning data
-use std::sync::Arc;
-
-use crate::config::ConfigOptions;
-use crate::error::Result;
-use crate::physical_plan::aggregates::AggregateExec;
-use crate::physical_plan::projection::ProjectionExec;
-use crate::physical_plan::{expressions, AggregateExpr, ExecutionPlan, Statistics};
-use crate::scalar::ScalarValue;
-
-use datafusion_common::stats::Precision;
-use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_expr::utils::COUNT_STAR_EXPANSION;
-use datafusion_physical_optimizer::PhysicalOptimizerRule;
-use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
-use datafusion_physical_plan::udaf::AggregateFunctionExpr;
-
-/// Optimizer that uses available statistics for aggregate functions
-#[derive(Default)]
-pub struct AggregateStatistics {}
-
-impl AggregateStatistics {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl PhysicalOptimizerRule for AggregateStatistics {
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        _config: &ConfigOptions,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if let Some(partial_agg_exec) = take_optimizable(&*plan) {
-            let partial_agg_exec = partial_agg_exec
-                .as_any()
-                .downcast_ref::<AggregateExec>()
-                .expect("take_optimizable() ensures that this is a AggregateExec");
-            let stats = partial_agg_exec.input().statistics()?;
-            let mut projections = vec![];
-            for expr in partial_agg_exec.aggr_expr() {
-                if let Some((non_null_rows, name)) =
-                    take_optimizable_column_and_table_count(&**expr, &stats)
-                {
-                    projections.push((expressions::lit(non_null_rows), name.to_owned()));
-                } else if let Some((min, name)) = take_optimizable_min(&**expr, &stats) {
-                    projections.push((expressions::lit(min), name.to_owned()));
-                } else if let Some((max, name)) = take_optimizable_max(&**expr, &stats) {
-                    projections.push((expressions::lit(max), name.to_owned()));
-                } else {
-                    // TODO: we need all aggr_expr to be resolved (cf TODO fullres)
-                    break;
-                }
-            }
-
-            // TODO fullres: use statistics even if not all aggr_expr could be resolved
-            if projections.len() == partial_agg_exec.aggr_expr().len() {
-                // input can be entirely removed
-                Ok(Arc::new(ProjectionExec::try_new(
-                    projections,
-                    Arc::new(PlaceholderRowExec::new(plan.schema())),
-                )?))
-            } else {
-                plan.map_children(|child| {
-                    self.optimize(child, _config).map(Transformed::yes)
-                })
-                .data()
-            }
-        } else {
-            plan.map_children(|child| self.optimize(child, _config).map(Transformed::yes))
-                .data()
-        }
-    }
-
-    fn name(&self) -> &str {
-        "aggregate_statistics"
-    }
-
-    /// This rule will change the nullable properties of the schema, disable the schema check.
-    fn schema_check(&self) -> bool {
-        false
-    }
-}
-
-/// assert if the node passed as argument is a final `AggregateExec` node that can be optimized:
-/// - its child (with possible intermediate layers) is a partial `AggregateExec` node
-/// - they both have no grouping expression
-///
-/// If this is the case, return a ref to the partial `AggregateExec`, else `None`.
-/// We would have preferred to return a casted ref to AggregateExec but the recursion requires
-/// the `ExecutionPlan.children()` method that returns an owned reference.
-fn take_optimizable(node: &dyn ExecutionPlan) -> Option<Arc<dyn ExecutionPlan>> {
-    if let Some(final_agg_exec) = node.as_any().downcast_ref::<AggregateExec>() {
-        if !final_agg_exec.mode().is_first_stage()
-            && final_agg_exec.group_expr().is_empty()
-        {
-            let mut child = Arc::clone(final_agg_exec.input());
-            loop {
-                if let Some(partial_agg_exec) =
-                    child.as_any().downcast_ref::<AggregateExec>()
-                {
-                    if partial_agg_exec.mode().is_first_stage()
-                        && partial_agg_exec.group_expr().is_empty()
-                        && partial_agg_exec.filter_expr().iter().all(|e| e.is_none())
-                    {
-                        return Some(child);
-                    }
-                }
-                if let [childrens_child] = child.children().as_slice() {
-                    child = Arc::clone(childrens_child);
-                } else {
-                    break;
-                }
-            }
-        }
-    }
-    None
-}
-
-/// If this agg_expr is a count that can be exactly derived from the statistics, return it.
-fn take_optimizable_column_and_table_count(
-    agg_expr: &dyn AggregateExpr,
-    stats: &Statistics,
-) -> Option<(ScalarValue, String)> {
-    let col_stats = &stats.column_statistics;
-    if is_non_distinct_count(agg_expr) {
-        if let Precision::Exact(num_rows) = stats.num_rows {
-            let exprs = agg_expr.expressions();
-            if exprs.len() == 1 {
-                // TODO optimize with exprs other than Column
-                if let Some(col_expr) =
-                    exprs[0].as_any().downcast_ref::<expressions::Column>()
-                {
-                    let current_val = &col_stats[col_expr.index()].null_count;
-                    if let &Precision::Exact(val) = current_val {
-                        return Some((
-                            ScalarValue::Int64(Some((num_rows - val) as i64)),
-                            agg_expr.name().to_string(),
-                        ));
-                    }
-                } else if let Some(lit_expr) =
-                    exprs[0].as_any().downcast_ref::<expressions::Literal>()
-                {
-                    if lit_expr.value() == &COUNT_STAR_EXPANSION {
-                        return Some((
-                            ScalarValue::Int64(Some(num_rows as i64)),
-                            agg_expr.name().to_string(),
-                        ));
-                    }
-                }
-            }
-        }
-    }
-    None
-}
-
-/// If this agg_expr is a min that is exactly defined in the statistics, return it.
-fn take_optimizable_min(
-    agg_expr: &dyn AggregateExpr,
-    stats: &Statistics,
-) -> Option<(ScalarValue, String)> {
-    if let Precision::Exact(num_rows) = &stats.num_rows {
-        match *num_rows {
-            0 => {
-                // MIN/MAX with 0 rows is always null
-                if is_min(agg_expr) {
-                    if let Ok(min_data_type) =
-                        ScalarValue::try_from(agg_expr.field().unwrap().data_type())
-                    {
-                        return Some((min_data_type, agg_expr.name().to_string()));
-                    }
-                }
-            }
-            value if value > 0 => {
-                let col_stats = &stats.column_statistics;
-                if is_min(agg_expr) {
-                    let exprs = agg_expr.expressions();
-                    if exprs.len() == 1 {
-                        // TODO optimize with exprs other than Column
-                        if let Some(col_expr) =
-                            exprs[0].as_any().downcast_ref::<expressions::Column>()
-                        {
-                            if let Precision::Exact(val) =
-                                &col_stats[col_expr.index()].min_value
-                            {
-                                if !val.is_null() {
-                                    return Some((
-                                        val.clone(),
-                                        agg_expr.name().to_string(),
-                                    ));
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            _ => {}
-        }
-    }
-    None
-}
-
-/// If this agg_expr is a max that is exactly defined in the statistics, return it.
-fn take_optimizable_max(
-    agg_expr: &dyn AggregateExpr,
-    stats: &Statistics,
-) -> Option<(ScalarValue, String)> {
-    if let Precision::Exact(num_rows) = &stats.num_rows {
-        match *num_rows {
-            0 => {
-                // MIN/MAX with 0 rows is always null
-                if is_max(agg_expr) {
-                    if let Ok(max_data_type) =
-                        ScalarValue::try_from(agg_expr.field().unwrap().data_type())
-                    {
-                        return Some((max_data_type, agg_expr.name().to_string()));
-                    }
-                }
-            }
-            value if value > 0 => {
-                let col_stats = &stats.column_statistics;
-                if is_max(agg_expr) {
-                    let exprs = agg_expr.expressions();
-                    if exprs.len() == 1 {
-                        // TODO optimize with exprs other than Column
-                        if let Some(col_expr) =
-                            exprs[0].as_any().downcast_ref::<expressions::Column>()
-                        {
-                            if let Precision::Exact(val) =
-                                &col_stats[col_expr.index()].max_value
-                            {
-                                if !val.is_null() {
-                                    return Some((
-                                        val.clone(),
-                                        agg_expr.name().to_string(),
-                                    ));
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            _ => {}
-        }
-    }
-    None
-}
-
-// TODO: Move this check into AggregateUDFImpl
-// https://github.com/apache/datafusion/issues/11153
-fn is_non_distinct_count(agg_expr: &dyn AggregateExpr) -> bool {
-    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() {
-            return true;
-        }
-    }
-    false
-}
-
-// TODO: Move this check into AggregateUDFImpl
-// https://github.com/apache/datafusion/issues/11153
-fn is_min(agg_expr: &dyn AggregateExpr) -> bool {
-    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        if agg_expr.fun().name().to_lowercase() == "min" {
-            return true;
-        }
-    }
-    false
-}
-
-// TODO: Move this check into AggregateUDFImpl
-// https://github.com/apache/datafusion/issues/11153
-fn is_max(agg_expr: &dyn AggregateExpr) -> bool {
-    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        if agg_expr.fun().name().to_lowercase() == "max" {
-            return true;
-        }
-    }
-    false
-}
-
-#[cfg(test)]
-pub(crate) mod tests {
-    use super::*;
-
-    use crate::logical_expr::Operator;
-    use crate::physical_plan::aggregates::PhysicalGroupBy;
-    use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-    use crate::physical_plan::common;
-    use crate::physical_plan::filter::FilterExec;
-    use crate::physical_plan::memory::MemoryExec;
-    use crate::prelude::SessionContext;
-
-    use arrow::array::Int32Array;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow::record_batch::RecordBatch;
-    use datafusion_common::cast::as_int64_array;
-    use datafusion_functions_aggregate::count::count_udaf;
-    use datafusion_physical_expr::expressions::cast;
-    use datafusion_physical_expr::PhysicalExpr;
-    use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
-    use datafusion_physical_plan::aggregates::AggregateMode;
-
-    /// Mock data using a MemoryExec which has an exact count statistic
-    fn mock_data() -> Result<Arc<MemoryExec>> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Int32, true),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![
-                Arc::new(Int32Array::from(vec![Some(1), Some(2), None])),
-                Arc::new(Int32Array::from(vec![Some(4), None, Some(6)])),
-            ],
-        )?;
-
-        Ok(Arc::new(MemoryExec::try_new(
-            &[vec![batch]],
-            Arc::clone(&schema),
-            None,
-        )?))
-    }
-
-    /// Checks that the count optimization was applied and we still get the right result
-    async fn assert_count_optim_success(
-        plan: AggregateExec,
-        agg: TestAggregate,
-    ) -> Result<()> {
-        let session_ctx = SessionContext::new();
-        let state = session_ctx.state();
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(plan);
-
-        let optimized = AggregateStatistics::new()
-            .optimize(Arc::clone(&plan), state.config_options())?;
-
-        // A ProjectionExec is a sign that the count optimization was applied
-        assert!(optimized.as_any().is::<ProjectionExec>());
-
-        // run both the optimized and nonoptimized plan
-        let optimized_result =
-            common::collect(optimized.execute(0, session_ctx.task_ctx())?).await?;
-        let nonoptimized_result =
-            common::collect(plan.execute(0, session_ctx.task_ctx())?).await?;
-        assert_eq!(optimized_result.len(), nonoptimized_result.len());
-
-        //  and validate the results are the same and expected
-        assert_eq!(optimized_result.len(), 1);
-        check_batch(optimized_result.into_iter().next().unwrap(), &agg);
-        // check the non optimized one too to ensure types and names remain the same
-        assert_eq!(nonoptimized_result.len(), 1);
-        check_batch(nonoptimized_result.into_iter().next().unwrap(), &agg);
-
-        Ok(())
-    }
-
-    fn check_batch(batch: RecordBatch, agg: &TestAggregate) {
-        let schema = batch.schema();
-        let fields = schema.fields();
-        assert_eq!(fields.len(), 1);
-
-        let field = &fields[0];
-        assert_eq!(field.name(), agg.column_name());
-        assert_eq!(field.data_type(), &DataType::Int64);
-        // note that nullabiolity differs
-
-        assert_eq!(
-            as_int64_array(batch.column(0)).unwrap().values(),
-            &[agg.expected_count()]
-        );
-    }
-
-    /// Describe the type of aggregate being tested
-    pub(crate) enum TestAggregate {
-        /// Testing COUNT(*) type aggregates
-        CountStar,
-
-        /// Testing for COUNT(column) aggregate
-        ColumnA(Arc<Schema>),
-    }
-
-    impl TestAggregate {
-        pub(crate) fn new_count_star() -> Self {
-            Self::CountStar
-        }
-
-        fn new_count_column(schema: &Arc<Schema>) -> Self {
-            Self::ColumnA(schema.clone())
-        }
-
-        // Return appropriate expr depending if COUNT is for col or table (*)
-        pub(crate) fn count_expr(&self, schema: &Schema) -> Arc<dyn AggregateExpr> {
-            AggregateExprBuilder::new(count_udaf(), vec![self.column()])
-                .schema(Arc::new(schema.clone()))
-                .name(self.column_name())
-                .build()
-                .unwrap()
-        }
-
-        /// what argument would this aggregate need in the plan?
-        fn column(&self) -> Arc<dyn PhysicalExpr> {
-            match self {
-                Self::CountStar => expressions::lit(COUNT_STAR_EXPANSION),
-                Self::ColumnA(s) => expressions::col("a", s).unwrap(),
-            }
-        }
-
-        /// What name would this aggregate produce in a plan?
-        fn column_name(&self) -> &'static str {
-            match self {
-                Self::CountStar => "COUNT(*)",
-                Self::ColumnA(_) => "COUNT(a)",
-            }
-        }
-
-        /// What is the expected count?
-        fn expected_count(&self) -> i64 {
-            match self {
-                TestAggregate::CountStar => 3,
-                TestAggregate::ColumnA(_) => 2,
-            }
-        }
-    }
-
-    #[tokio::test]
-    async fn test_count_partial_direct_child() -> Result<()> {
-        // basic test case with the aggregation applied on a source with exact statistics
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_star();
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            source,
-            Arc::clone(&schema),
-        )?;
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(partial_agg),
-            Arc::clone(&schema),
-        )?;
-
-        assert_count_optim_success(final_agg, agg).await?;
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_count_partial_with_nulls_direct_child() -> Result<()> {
-        // basic test case with the aggregation applied on a source with exact statistics
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_column(&schema);
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            source,
-            Arc::clone(&schema),
-        )?;
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(partial_agg),
-            Arc::clone(&schema),
-        )?;
-
-        assert_count_optim_success(final_agg, agg).await?;
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_count_partial_indirect_child() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_star();
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            source,
-            Arc::clone(&schema),
-        )?;
-
-        // We introduce an intermediate optimization step between the partial and final aggregtator
-        let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(coalesce),
-            Arc::clone(&schema),
-        )?;
-
-        assert_count_optim_success(final_agg, agg).await?;
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_count_partial_with_nulls_indirect_child() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_column(&schema);
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            source,
-            Arc::clone(&schema),
-        )?;
-
-        // We introduce an intermediate optimization step between the partial and final aggregtator
-        let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(coalesce),
-            Arc::clone(&schema),
-        )?;
-
-        assert_count_optim_success(final_agg, agg).await?;
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_count_inexact_stat() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_star();
-
-        // adding a filter makes the statistics inexact
-        let filter = Arc::new(FilterExec::try_new(
-            expressions::binary(
-                expressions::col("a", &schema)?,
-                Operator::Gt,
-                cast(expressions::lit(1u32), &schema, DataType::Int32)?,
-                &schema,
-            )?,
-            source,
-        )?);
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            filter,
-            Arc::clone(&schema),
-        )?;
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(partial_agg),
-            Arc::clone(&schema),
-        )?;
-
-        let conf = ConfigOptions::new();
-        let optimized =
-            AggregateStatistics::new().optimize(Arc::new(final_agg), &conf)?;
-
-        // check that the original ExecutionPlan was not replaced
-        assert!(optimized.as_any().is::<AggregateExec>());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_count_with_nulls_inexact_stat() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_column(&schema);
-
-        // adding a filter makes the statistics inexact
-        let filter = Arc::new(FilterExec::try_new(
-            expressions::binary(
-                expressions::col("a", &schema)?,
-                Operator::Gt,
-                cast(expressions::lit(1u32), &schema, DataType::Int32)?,
-                &schema,
-            )?,
-            source,
-        )?);
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            filter,
-            Arc::clone(&schema),
-        )?;
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(partial_agg),
-            Arc::clone(&schema),
-        )?;
-
-        let conf = ConfigOptions::new();
-        let optimized =
-            AggregateStatistics::new().optimize(Arc::new(final_agg), &conf)?;
-
-        // check that the original ExecutionPlan was not replaced
-        assert!(optimized.as_any().is::<AggregateExec>());
-
-        Ok(())
-    }
-}
diff --git a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
index b5d3f432d84d0..b181ad9051edd 100644
--- a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
@@ -193,7 +193,6 @@ impl PhysicalOptimizerRule for LimitedDistinctAggregation {
 mod tests {
 
     use super::*;
-    use crate::physical_optimizer::aggregate_statistics::tests::TestAggregate;
     use crate::physical_optimizer::enforce_distribution::tests::{
         parquet_exec_with_sort, schema, trim_plan_display,
     };
@@ -201,6 +200,7 @@ mod tests {
     use crate::physical_plan::collect;
     use crate::physical_plan::memory::MemoryExec;
     use crate::prelude::SessionContext;
+    use crate::test_util::TestAggregate;
 
     use arrow::array::Int32Array;
     use arrow::compute::SortOptions;
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index 01ddab3ec97de..9291d0b848653 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -21,7 +21,6 @@
 //! "Repartition" or "Sortedness"
 //!
 //! [`ExecutionPlan`]: crate::physical_plan::ExecutionPlan
-pub mod aggregate_statistics;
 pub mod coalesce_batches;
 pub mod combine_partial_final_agg;
 pub mod enforce_distribution;
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index 042febf32fd19..6eb82dece31ce 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -45,11 +45,16 @@ use crate::prelude::{CsvReadOptions, SessionContext};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::TableReference;
+use datafusion_expr::utils::COUNT_STAR_EXPANSION;
 use datafusion_expr::{CreateExternalTable, Expr, TableType};
-use datafusion_physical_expr::EquivalenceProperties;
+use datafusion_functions_aggregate::count::count_udaf;
+use datafusion_physical_expr::{
+    expressions, AggregateExpr, EquivalenceProperties, PhysicalExpr,
+};
 
 use async_trait::async_trait;
 use datafusion_catalog::Session;
+use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
 use futures::Stream;
 use tempfile::TempDir;
 // backwards compatibility
@@ -402,3 +407,57 @@ pub fn bounded_stream(batch: RecordBatch, limit: usize) -> SendableRecordBatchSt
         batch,
     })
 }
+
+/// Describe the type of aggregate being tested
+pub enum TestAggregate {
+    /// Testing COUNT(*) type aggregates
+    CountStar,
+
+    /// Testing for COUNT(column) aggregate
+    ColumnA(Arc<Schema>),
+}
+
+impl TestAggregate {
+    /// Create a new COUNT(*) aggregate
+    pub fn new_count_star() -> Self {
+        Self::CountStar
+    }
+
+    /// Create a new COUNT(column) aggregate
+    pub fn new_count_column(schema: &Arc<Schema>) -> Self {
+        Self::ColumnA(schema.clone())
+    }
+
+    /// Return appropriate expr depending if COUNT is for col or table (*)
+    pub fn count_expr(&self, schema: &Schema) -> Arc<dyn AggregateExpr> {
+        AggregateExprBuilder::new(count_udaf(), vec![self.column()])
+            .schema(Arc::new(schema.clone()))
+            .name(self.column_name())
+            .build()
+            .unwrap()
+    }
+
+    /// what argument would this aggregate need in the plan?
+    fn column(&self) -> Arc<dyn PhysicalExpr> {
+        match self {
+            Self::CountStar => expressions::lit(COUNT_STAR_EXPANSION),
+            Self::ColumnA(s) => expressions::col("a", s).unwrap(),
+        }
+    }
+
+    /// What name would this aggregate produce in a plan?
+    pub fn column_name(&self) -> &'static str {
+        match self {
+            Self::CountStar => "COUNT(*)",
+            Self::ColumnA(_) => "COUNT(a)",
+        }
+    }
+
+    /// What is the expected count?
+    pub fn expected_count(&self) -> i64 {
+        match self {
+            TestAggregate::CountStar => 3,
+            TestAggregate::ColumnA(_) => 2,
+        }
+    }
+}
diff --git a/datafusion/core/tests/physical_optimizer_integration.rs b/datafusion/core/tests/physical_optimizer_integration.rs
new file mode 100644
index 0000000000000..bbf4dcd2b799d
--- /dev/null
+++ b/datafusion/core/tests/physical_optimizer_integration.rs
@@ -0,0 +1,325 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for the physical optimizer
+
+use datafusion_common::config::ConfigOptions;
+use datafusion_physical_optimizer::aggregate_statistics::AggregateStatistics;
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_plan::aggregates::AggregateExec;
+use datafusion_physical_plan::projection::ProjectionExec;
+use datafusion_physical_plan::ExecutionPlan;
+use std::sync::Arc;
+
+use datafusion::error::Result;
+use datafusion::logical_expr::Operator;
+use datafusion::prelude::SessionContext;
+use datafusion::test_util::TestAggregate;
+use datafusion_physical_plan::aggregates::PhysicalGroupBy;
+use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::common;
+use datafusion_physical_plan::filter::FilterExec;
+use datafusion_physical_plan::memory::MemoryExec;
+
+use arrow::array::Int32Array;
+use arrow::datatypes::{DataType, Field, Schema};
+use arrow::record_batch::RecordBatch;
+use datafusion_common::cast::as_int64_array;
+use datafusion_physical_expr::expressions::{self, cast};
+use datafusion_physical_plan::aggregates::AggregateMode;
+
+/// Mock data using a MemoryExec which has an exact count statistic
+fn mock_data() -> Result<Arc<MemoryExec>> {
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Int32, true),
+        Field::new("b", DataType::Int32, true),
+    ]));
+
+    let batch = RecordBatch::try_new(
+        Arc::clone(&schema),
+        vec![
+            Arc::new(Int32Array::from(vec![Some(1), Some(2), None])),
+            Arc::new(Int32Array::from(vec![Some(4), None, Some(6)])),
+        ],
+    )?;
+
+    Ok(Arc::new(MemoryExec::try_new(
+        &[vec![batch]],
+        Arc::clone(&schema),
+        None,
+    )?))
+}
+
+/// Checks that the count optimization was applied and we still get the right result
+async fn assert_count_optim_success(
+    plan: AggregateExec,
+    agg: TestAggregate,
+) -> Result<()> {
+    let session_ctx = SessionContext::new();
+    let state = session_ctx.state();
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(plan);
+
+    let optimized =
+        AggregateStatistics::new().optimize(Arc::clone(&plan), state.config_options())?;
+
+    // A ProjectionExec is a sign that the count optimization was applied
+    assert!(optimized.as_any().is::<ProjectionExec>());
+
+    // run both the optimized and nonoptimized plan
+    let optimized_result =
+        common::collect(optimized.execute(0, session_ctx.task_ctx())?).await?;
+    let nonoptimized_result =
+        common::collect(plan.execute(0, session_ctx.task_ctx())?).await?;
+    assert_eq!(optimized_result.len(), nonoptimized_result.len());
+
+    //  and validate the results are the same and expected
+    assert_eq!(optimized_result.len(), 1);
+    check_batch(optimized_result.into_iter().next().unwrap(), &agg);
+    // check the non optimized one too to ensure types and names remain the same
+    assert_eq!(nonoptimized_result.len(), 1);
+    check_batch(nonoptimized_result.into_iter().next().unwrap(), &agg);
+
+    Ok(())
+}
+
+fn check_batch(batch: RecordBatch, agg: &TestAggregate) {
+    let schema = batch.schema();
+    let fields = schema.fields();
+    assert_eq!(fields.len(), 1);
+
+    let field = &fields[0];
+    assert_eq!(field.name(), agg.column_name());
+    assert_eq!(field.data_type(), &DataType::Int64);
+    // note that nullabiolity differs
+
+    assert_eq!(
+        as_int64_array(batch.column(0)).unwrap().values(),
+        &[agg.expected_count()]
+    );
+}
+
+#[tokio::test]
+async fn test_count_partial_direct_child() -> Result<()> {
+    // basic test case with the aggregation applied on a source with exact statistics
+    let source = mock_data()?;
+    let schema = source.schema();
+    let agg = TestAggregate::new_count_star();
+
+    let partial_agg = AggregateExec::try_new(
+        AggregateMode::Partial,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        source,
+        Arc::clone(&schema),
+    )?;
+
+    let final_agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        Arc::new(partial_agg),
+        Arc::clone(&schema),
+    )?;
+
+    assert_count_optim_success(final_agg, agg).await?;
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_count_partial_with_nulls_direct_child() -> Result<()> {
+    // basic test case with the aggregation applied on a source with exact statistics
+    let source = mock_data()?;
+    let schema = source.schema();
+    let agg = TestAggregate::new_count_column(&schema);
+
+    let partial_agg = AggregateExec::try_new(
+        AggregateMode::Partial,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        source,
+        Arc::clone(&schema),
+    )?;
+
+    let final_agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        Arc::new(partial_agg),
+        Arc::clone(&schema),
+    )?;
+
+    assert_count_optim_success(final_agg, agg).await?;
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_count_partial_indirect_child() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+    let agg = TestAggregate::new_count_star();
+
+    let partial_agg = AggregateExec::try_new(
+        AggregateMode::Partial,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        source,
+        Arc::clone(&schema),
+    )?;
+
+    // We introduce an intermediate optimization step between the partial and final aggregtator
+    let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
+
+    let final_agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        Arc::new(coalesce),
+        Arc::clone(&schema),
+    )?;
+
+    assert_count_optim_success(final_agg, agg).await?;
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_count_partial_with_nulls_indirect_child() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+    let agg = TestAggregate::new_count_column(&schema);
+
+    let partial_agg = AggregateExec::try_new(
+        AggregateMode::Partial,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        source,
+        Arc::clone(&schema),
+    )?;
+
+    // We introduce an intermediate optimization step between the partial and final aggregtator
+    let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
+
+    let final_agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        Arc::new(coalesce),
+        Arc::clone(&schema),
+    )?;
+
+    assert_count_optim_success(final_agg, agg).await?;
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_count_inexact_stat() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+    let agg = TestAggregate::new_count_star();
+
+    // adding a filter makes the statistics inexact
+    let filter = Arc::new(FilterExec::try_new(
+        expressions::binary(
+            expressions::col("a", &schema)?,
+            Operator::Gt,
+            cast(expressions::lit(1u32), &schema, DataType::Int32)?,
+            &schema,
+        )?,
+        source,
+    )?);
+
+    let partial_agg = AggregateExec::try_new(
+        AggregateMode::Partial,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        filter,
+        Arc::clone(&schema),
+    )?;
+
+    let final_agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        Arc::new(partial_agg),
+        Arc::clone(&schema),
+    )?;
+
+    let conf = ConfigOptions::new();
+    let optimized = AggregateStatistics::new().optimize(Arc::new(final_agg), &conf)?;
+
+    // check that the original ExecutionPlan was not replaced
+    assert!(optimized.as_any().is::<AggregateExec>());
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_count_with_nulls_inexact_stat() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+    let agg = TestAggregate::new_count_column(&schema);
+
+    // adding a filter makes the statistics inexact
+    let filter = Arc::new(FilterExec::try_new(
+        expressions::binary(
+            expressions::col("a", &schema)?,
+            Operator::Gt,
+            cast(expressions::lit(1u32), &schema, DataType::Int32)?,
+            &schema,
+        )?,
+        source,
+    )?);
+
+    let partial_agg = AggregateExec::try_new(
+        AggregateMode::Partial,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        filter,
+        Arc::clone(&schema),
+    )?;
+
+    let final_agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::default(),
+        vec![agg.count_expr(&schema)],
+        vec![None],
+        Arc::new(partial_agg),
+        Arc::clone(&schema),
+    )?;
+
+    let conf = ConfigOptions::new();
+    let optimized = AggregateStatistics::new().optimize(Arc::new(final_agg), &conf)?;
+
+    // check that the original ExecutionPlan was not replaced
+    assert!(optimized.as_any().is::<AggregateExec>());
+
+    Ok(())
+}
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 683a8e170ed4d..65a70b6732668 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -35,14 +35,14 @@ use datafusion_common::tree_node::{
 use datafusion_common::utils::get_at_indices;
 use datafusion_common::{
     internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef, Result,
-    ScalarValue, TableReference,
+    TableReference,
 };
 
 use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem, WildcardAdditionalOptions};
 
 ///  The value to which `COUNT(*)` is expanded to in
 ///  `COUNT(<constant>)` expressions
-pub const COUNT_STAR_EXPANSION: ScalarValue = ScalarValue::Int64(Some(1));
+pub use datafusion_common::utils::expr::COUNT_STAR_EXPANSION;
 
 /// Recursively walk a list of expression trees, collecting the unique set of columns
 /// referenced in the expression
diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs
new file mode 100644
index 0000000000000..0ce92df393aac
--- /dev/null
+++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs
@@ -0,0 +1,298 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Utilizing exact statistics from sources to avoid scanning data
+use std::sync::Arc;
+
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::scalar::ScalarValue;
+use datafusion_common::Result;
+use datafusion_physical_plan::aggregates::AggregateExec;
+use datafusion_physical_plan::projection::ProjectionExec;
+use datafusion_physical_plan::{expressions, AggregateExpr, ExecutionPlan, Statistics};
+
+use crate::PhysicalOptimizerRule;
+use datafusion_common::stats::Precision;
+use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+use datafusion_common::utils::expr::COUNT_STAR_EXPANSION;
+use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
+use datafusion_physical_plan::udaf::AggregateFunctionExpr;
+
+/// Optimizer that uses available statistics for aggregate functions
+#[derive(Default)]
+pub struct AggregateStatistics {}
+
+impl AggregateStatistics {
+    #[allow(missing_docs)]
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl PhysicalOptimizerRule for AggregateStatistics {
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        _config: &ConfigOptions,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        if let Some(partial_agg_exec) = take_optimizable(&*plan) {
+            let partial_agg_exec = partial_agg_exec
+                .as_any()
+                .downcast_ref::<AggregateExec>()
+                .expect("take_optimizable() ensures that this is a AggregateExec");
+            let stats = partial_agg_exec.input().statistics()?;
+            let mut projections = vec![];
+            for expr in partial_agg_exec.aggr_expr() {
+                if let Some((non_null_rows, name)) =
+                    take_optimizable_column_and_table_count(&**expr, &stats)
+                {
+                    projections.push((expressions::lit(non_null_rows), name.to_owned()));
+                } else if let Some((min, name)) = take_optimizable_min(&**expr, &stats) {
+                    projections.push((expressions::lit(min), name.to_owned()));
+                } else if let Some((max, name)) = take_optimizable_max(&**expr, &stats) {
+                    projections.push((expressions::lit(max), name.to_owned()));
+                } else {
+                    // TODO: we need all aggr_expr to be resolved (cf TODO fullres)
+                    break;
+                }
+            }
+
+            // TODO fullres: use statistics even if not all aggr_expr could be resolved
+            if projections.len() == partial_agg_exec.aggr_expr().len() {
+                // input can be entirely removed
+                Ok(Arc::new(ProjectionExec::try_new(
+                    projections,
+                    Arc::new(PlaceholderRowExec::new(plan.schema())),
+                )?))
+            } else {
+                plan.map_children(|child| {
+                    self.optimize(child, _config).map(Transformed::yes)
+                })
+                .data()
+            }
+        } else {
+            plan.map_children(|child| self.optimize(child, _config).map(Transformed::yes))
+                .data()
+        }
+    }
+
+    fn name(&self) -> &str {
+        "aggregate_statistics"
+    }
+
+    /// This rule will change the nullable properties of the schema, disable the schema check.
+    fn schema_check(&self) -> bool {
+        false
+    }
+}
+
+/// assert if the node passed as argument is a final `AggregateExec` node that can be optimized:
+/// - its child (with possible intermediate layers) is a partial `AggregateExec` node
+/// - they both have no grouping expression
+///
+/// If this is the case, return a ref to the partial `AggregateExec`, else `None`.
+/// We would have preferred to return a casted ref to AggregateExec but the recursion requires
+/// the `ExecutionPlan.children()` method that returns an owned reference.
+fn take_optimizable(node: &dyn ExecutionPlan) -> Option<Arc<dyn ExecutionPlan>> {
+    if let Some(final_agg_exec) = node.as_any().downcast_ref::<AggregateExec>() {
+        if !final_agg_exec.mode().is_first_stage()
+            && final_agg_exec.group_expr().is_empty()
+        {
+            let mut child = Arc::clone(final_agg_exec.input());
+            loop {
+                if let Some(partial_agg_exec) =
+                    child.as_any().downcast_ref::<AggregateExec>()
+                {
+                    if partial_agg_exec.mode().is_first_stage()
+                        && partial_agg_exec.group_expr().is_empty()
+                        && partial_agg_exec.filter_expr().iter().all(|e| e.is_none())
+                    {
+                        return Some(child);
+                    }
+                }
+                if let [childrens_child] = child.children().as_slice() {
+                    child = Arc::clone(childrens_child);
+                } else {
+                    break;
+                }
+            }
+        }
+    }
+    None
+}
+
+/// If this agg_expr is a count that can be exactly derived from the statistics, return it.
+fn take_optimizable_column_and_table_count(
+    agg_expr: &dyn AggregateExpr,
+    stats: &Statistics,
+) -> Option<(ScalarValue, String)> {
+    let col_stats = &stats.column_statistics;
+    if is_non_distinct_count(agg_expr) {
+        if let Precision::Exact(num_rows) = stats.num_rows {
+            let exprs = agg_expr.expressions();
+            if exprs.len() == 1 {
+                // TODO optimize with exprs other than Column
+                if let Some(col_expr) =
+                    exprs[0].as_any().downcast_ref::<expressions::Column>()
+                {
+                    let current_val = &col_stats[col_expr.index()].null_count;
+                    if let &Precision::Exact(val) = current_val {
+                        return Some((
+                            ScalarValue::Int64(Some((num_rows - val) as i64)),
+                            agg_expr.name().to_string(),
+                        ));
+                    }
+                } else if let Some(lit_expr) =
+                    exprs[0].as_any().downcast_ref::<expressions::Literal>()
+                {
+                    if lit_expr.value() == &COUNT_STAR_EXPANSION {
+                        return Some((
+                            ScalarValue::Int64(Some(num_rows as i64)),
+                            agg_expr.name().to_string(),
+                        ));
+                    }
+                }
+            }
+        }
+    }
+    None
+}
+
+/// If this agg_expr is a min that is exactly defined in the statistics, return it.
+fn take_optimizable_min(
+    agg_expr: &dyn AggregateExpr,
+    stats: &Statistics,
+) -> Option<(ScalarValue, String)> {
+    if let Precision::Exact(num_rows) = &stats.num_rows {
+        match *num_rows {
+            0 => {
+                // MIN/MAX with 0 rows is always null
+                if is_min(agg_expr) {
+                    if let Ok(min_data_type) =
+                        ScalarValue::try_from(agg_expr.field().unwrap().data_type())
+                    {
+                        return Some((min_data_type, agg_expr.name().to_string()));
+                    }
+                }
+            }
+            value if value > 0 => {
+                let col_stats = &stats.column_statistics;
+                if is_min(agg_expr) {
+                    let exprs = agg_expr.expressions();
+                    if exprs.len() == 1 {
+                        // TODO optimize with exprs other than Column
+                        if let Some(col_expr) =
+                            exprs[0].as_any().downcast_ref::<expressions::Column>()
+                        {
+                            if let Precision::Exact(val) =
+                                &col_stats[col_expr.index()].min_value
+                            {
+                                if !val.is_null() {
+                                    return Some((
+                                        val.clone(),
+                                        agg_expr.name().to_string(),
+                                    ));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            _ => {}
+        }
+    }
+    None
+}
+
+/// If this agg_expr is a max that is exactly defined in the statistics, return it.
+fn take_optimizable_max(
+    agg_expr: &dyn AggregateExpr,
+    stats: &Statistics,
+) -> Option<(ScalarValue, String)> {
+    if let Precision::Exact(num_rows) = &stats.num_rows {
+        match *num_rows {
+            0 => {
+                // MIN/MAX with 0 rows is always null
+                if is_max(agg_expr) {
+                    if let Ok(max_data_type) =
+                        ScalarValue::try_from(agg_expr.field().unwrap().data_type())
+                    {
+                        return Some((max_data_type, agg_expr.name().to_string()));
+                    }
+                }
+            }
+            value if value > 0 => {
+                let col_stats = &stats.column_statistics;
+                if is_max(agg_expr) {
+                    let exprs = agg_expr.expressions();
+                    if exprs.len() == 1 {
+                        // TODO optimize with exprs other than Column
+                        if let Some(col_expr) =
+                            exprs[0].as_any().downcast_ref::<expressions::Column>()
+                        {
+                            if let Precision::Exact(val) =
+                                &col_stats[col_expr.index()].max_value
+                            {
+                                if !val.is_null() {
+                                    return Some((
+                                        val.clone(),
+                                        agg_expr.name().to_string(),
+                                    ));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            _ => {}
+        }
+    }
+    None
+}
+
+// TODO: Move this check into AggregateUDFImpl
+// https://github.com/apache/datafusion/issues/11153
+fn is_non_distinct_count(agg_expr: &dyn AggregateExpr) -> bool {
+    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
+        if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() {
+            return true;
+        }
+    }
+    false
+}
+
+// TODO: Move this check into AggregateUDFImpl
+// https://github.com/apache/datafusion/issues/11153
+fn is_min(agg_expr: &dyn AggregateExpr) -> bool {
+    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
+        if agg_expr.fun().name().to_lowercase() == "min" {
+            return true;
+        }
+    }
+    false
+}
+
+// TODO: Move this check into AggregateUDFImpl
+// https://github.com/apache/datafusion/issues/11153
+fn is_max(agg_expr: &dyn AggregateExpr) -> bool {
+    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
+        if agg_expr.fun().name().to_lowercase() == "max" {
+            return true;
+        }
+    }
+    false
+}
diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs
index 6b9df7cad5c8b..8108493a0d3b6 100644
--- a/datafusion/physical-optimizer/src/lib.rs
+++ b/datafusion/physical-optimizer/src/lib.rs
@@ -17,6 +17,7 @@
 // Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
 
+pub mod aggregate_statistics;
 mod optimizer;
 pub mod output_requirements;
 

From eb2b5fe74c7aef81cf55a7d6219cd15cc80e946b Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Tue, 6 Aug 2024 04:46:51 +0800
Subject: [PATCH 225/357] feat: Add support for cardinality function on maps
 (#11801)

* feat: Add support for cardinality function on maps

* chore: Fix prettier

* feat: Add specialized signature for MapArray in ArrayFunctionSignature
---
 datafusion/expr/src/signature.rs              |  6 +++
 .../expr/src/type_coercion/functions.rs       | 10 +++++
 .../functions-nested/src/cardinality.rs       | 39 +++++++++++++++----
 datafusion/sqllogictest/test_files/map.slt    |  9 +++++
 docs/source/user-guide/expressions.md         |  2 +-
 5 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs
index b1cec3bad774b..577c663142a1e 100644
--- a/datafusion/expr/src/signature.rs
+++ b/datafusion/expr/src/signature.rs
@@ -145,6 +145,9 @@ pub enum ArrayFunctionSignature {
     /// The function takes a single argument that must be a List/LargeList/FixedSizeList
     /// or something that can be coerced to one of those types.
     Array,
+    /// Specialized Signature for MapArray
+    /// The function takes a single argument that must be a MapArray
+    MapArray,
 }
 
 impl std::fmt::Display for ArrayFunctionSignature {
@@ -165,6 +168,9 @@ impl std::fmt::Display for ArrayFunctionSignature {
             ArrayFunctionSignature::Array => {
                 write!(f, "array")
             }
+            ArrayFunctionSignature::MapArray => {
+                write!(f, "map_array")
+            }
         }
     }
 }
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index ef52a01e0598f..66807c3f446cd 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -378,6 +378,16 @@ fn get_valid_types(
                 array(&current_types[0])
                     .map_or_else(|| vec![vec![]], |array_type| vec![vec![array_type]])
             }
+            ArrayFunctionSignature::MapArray => {
+                if current_types.len() != 1 {
+                    return Ok(vec![vec![]]);
+                }
+
+                match &current_types[0] {
+                    DataType::Map(_, _) => vec![vec![current_types[0].clone()]],
+                    _ => vec![vec![]],
+                }
+            }
         },
         TypeSignature::Any(number) => {
             if current_types.len() != *number {
diff --git a/datafusion/functions-nested/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs
index f6755c3447684..ea07ac381affd 100644
--- a/datafusion/functions-nested/src/cardinality.rs
+++ b/datafusion/functions-nested/src/cardinality.rs
@@ -18,13 +18,18 @@
 //! [`ScalarUDFImpl`] definitions for cardinality function.
 
 use crate::utils::make_scalar_function;
-use arrow_array::{ArrayRef, GenericListArray, OffsetSizeTrait, UInt64Array};
+use arrow_array::{
+    Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, UInt64Array,
+};
 use arrow_schema::DataType;
-use arrow_schema::DataType::{FixedSizeList, LargeList, List, UInt64};
-use datafusion_common::cast::{as_large_list_array, as_list_array};
+use arrow_schema::DataType::{FixedSizeList, LargeList, List, Map, UInt64};
+use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
 use datafusion_common::Result;
 use datafusion_common::{exec_err, plan_err};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    ArrayFunctionSignature, ColumnarValue, ScalarUDFImpl, Signature, TypeSignature,
+    Volatility,
+};
 use std::any::Any;
 use std::sync::Arc;
 
@@ -32,14 +37,20 @@ make_udf_expr_and_func!(
     Cardinality,
     cardinality,
     array,
-    "returns the total number of elements in the array.",
+    "returns the total number of elements in the array or map.",
     cardinality_udf
 );
 
 impl Cardinality {
     pub fn new() -> Self {
         Self {
-            signature: Signature::array(Volatility::Immutable),
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
+                    TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray),
+                ],
+                Volatility::Immutable,
+            ),
             aliases: vec![],
         }
     }
@@ -64,9 +75,9 @@ impl ScalarUDFImpl for Cardinality {
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         Ok(match arg_types[0] {
-            List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
+            List(_) | LargeList(_) | FixedSizeList(_, _) | Map(_, _) => UInt64,
             _ => {
-                return plan_err!("The cardinality function can only accept List/LargeList/FixedSizeList.");
+                return plan_err!("The cardinality function can only accept List/LargeList/FixedSizeList/Map.");
             }
         })
     }
@@ -95,12 +106,24 @@ pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             let list_array = as_large_list_array(&args[0])?;
             generic_list_cardinality::<i64>(list_array)
         }
+        Map(_, _) => {
+            let map_array = as_map_array(&args[0])?;
+            generic_map_cardinality(map_array)
+        }
         other => {
             exec_err!("cardinality does not support type '{:?}'", other)
         }
     }
 }
 
+fn generic_map_cardinality(array: &MapArray) -> Result<ArrayRef> {
+    let result: UInt64Array = array
+        .iter()
+        .map(|opt_arr| opt_arr.map(|arr| arr.len() as u64))
+        .collect();
+    Ok(Arc::new(result))
+}
+
 fn generic_list_cardinality<O: OffsetSizeTrait>(
     array: &GenericListArray<O>,
 ) -> Result<ArrayRef> {
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index 11998eea90440..eb350c22bb5d9 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -459,3 +459,12 @@ SELECT MAP { 'a': 1, 2: 3 };
 # SELECT MAKE_MAP(1, null, 2, 33, 3, null)[2];
 # ----
 # 33
+
+## cardinality
+
+# cardinality scalar function
+query IIII
+select cardinality(map([1, 2, 3], ['a', 'b', 'c'])), cardinality(MAP {'a': 1, 'b': null}), cardinality(MAP([],[])),
+       cardinality(MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} });
+----
+3 2 0 2
diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md
index 60036e440ffb7..ad5a9cb75152c 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -243,7 +243,7 @@ select log(-1), log(0), sqrt(-1);
 | array_except(array1, array2)                   | Returns an array of the elements that appear in the first array but not in the second. `array_except([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2]`                                                                             |
 | array_resize(array, size, value)               | Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. `array_resize([1, 2, 3], 5, 0) -> [1, 2, 3, 0, 0]`                                                         |
 | array_sort(array, desc, null_first)            | Returns sorted array. `array_sort([3, 1, 2, 5, 4]) -> [1, 2, 3, 4, 5]`                                                                                                                                                  |
-| cardinality(array)                             | Returns the total number of elements in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6`                                                                                                                           |
+| cardinality(array/map)                         | Returns the total number of elements in the array or map. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6`                                                                                                                    |
 | make_array(value1, [value2 [, ...]])           | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]`                                                                                                                        |
 | range(start [, stop, step])                    | Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]`                                                                                                                          |
 | string_to_array(array, delimiter, null_string) | Splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`. `string_to_array('abc#def#ghi', '#', ' ') -> ['abc', 'def', 'ghi']` |

From a1645c4c1d6578d425e6a86648e32f13bca5cd43 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 5 Aug 2024 20:29:56 -0400
Subject: [PATCH 226/357] Minor: refactor probe check into function
 `should_skip_aggregation` (#11821)

---
 .../physical-plan/src/aggregates/row_hash.rs  | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index 62ed79dad4aa8..1b84befb02694 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -635,11 +635,7 @@ impl Stream for GroupedHashAggregateStream {
                         (
                             if self.input_done {
                                 ExecutionState::Done
-                            } else if self
-                                .skip_aggregation_probe
-                                .as_ref()
-                                .is_some_and(|probe| probe.should_skip())
-                            {
+                            } else if self.should_skip_aggregation() {
                                 ExecutionState::SkippingAggregation
                             } else {
                                 ExecutionState::ReadingInput
@@ -955,12 +951,13 @@ impl GroupedHashAggregateStream {
         Ok(())
     }
 
-    // Updates skip aggregation probe state.
-    // In case stream has any spills, the probe is forcefully set to
-    // forbid aggregation skipping, and locked, since spilling resets
-    // total number of unique groups.
-    //
-    // Note: currently spilling is not supported for Partial aggregation
+    /// Updates skip aggregation probe state.
+    ///
+    /// In case stream has any spills, the probe is forcefully set to
+    /// forbid aggregation skipping, and locked, since spilling resets
+    /// total number of unique groups.
+    ///
+    /// Note: currently spilling is not supported for Partial aggregation
     fn update_skip_aggregation_probe(&mut self, input_rows: usize) {
         if let Some(probe) = self.skip_aggregation_probe.as_mut() {
             if !self.spill_state.spills.is_empty() {
@@ -971,8 +968,8 @@ impl GroupedHashAggregateStream {
         };
     }
 
-    // In case the probe indicates that aggregation may be
-    // skipped, forces stream to produce currently accumulated output.
+    /// In case the probe indicates that aggregation may be
+    /// skipped, forces stream to produce currently accumulated output.
     fn switch_to_skip_aggregation(&mut self) -> Result<()> {
         if let Some(probe) = self.skip_aggregation_probe.as_mut() {
             if probe.should_skip() {
@@ -984,7 +981,15 @@ impl GroupedHashAggregateStream {
         Ok(())
     }
 
-    // Transforms input batch to intermediate aggregate state, without grouping it
+    /// Returns true if the aggregation probe indicates that aggregation
+    /// should be skipped.
+    fn should_skip_aggregation(&self) -> bool {
+        self.skip_aggregation_probe
+            .as_ref()
+            .is_some_and(|probe| probe.should_skip())
+    }
+
+    /// Transforms input batch to intermediate aggregate state, without grouping it
     fn transform_to_states(&self, batch: RecordBatch) -> Result<RecordBatch> {
         let group_values = evaluate_group_by(&self.group_by, &batch)?;
         let input_values = evaluate_many(&self.aggregate_arguments, &batch)?;

From 4a47dcbed03fdd4982f0e7a366a2db3af64c6c0c Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 5 Aug 2024 23:39:10 -0400
Subject: [PATCH 227/357] Minor: move path_partition into `core_integration`
 (#11831)

---
 datafusion/core/tests/sql/mod.rs                  | 1 +
 datafusion/core/tests/{ => sql}/path_partition.rs | 0
 2 files changed, 1 insertion(+)
 rename datafusion/core/tests/{ => sql}/path_partition.rs (100%)

diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 995ce35c5bc26..dc9d047860213 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -60,6 +60,7 @@ pub mod aggregates;
 pub mod create_drop;
 pub mod explain_analyze;
 pub mod joins;
+mod path_partition;
 pub mod select;
 mod sql_api;
 
diff --git a/datafusion/core/tests/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs
similarity index 100%
rename from datafusion/core/tests/path_partition.rs
rename to datafusion/core/tests/sql/path_partition.rs

From 537774ec1d96980f492ea46981dba5467c6d720a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 5 Aug 2024 23:53:19 -0400
Subject: [PATCH 228/357] Move optimizer integration tests to
 `core_integration` (#11830)

---
 datafusion/core/tests/core_integration.rs      |  6 ++++++
 .../mod.rs}                                    |  0
 .../aggregate_statistics.rs}                   |  0
 .../core/tests/physical_optimizer/mod.rs       | 18 ++++++++++++++++++
 .../src/aggregate_statistics.rs                |  2 ++
 5 files changed, 26 insertions(+)
 rename datafusion/core/tests/{optimizer_integration.rs => optimizer/mod.rs} (100%)
 rename datafusion/core/tests/{physical_optimizer_integration.rs => physical_optimizer/aggregate_statistics.rs} (100%)
 create mode 100644 datafusion/core/tests/physical_optimizer/mod.rs

diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs
index deb5280388f12..79e5056e3cf5b 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -36,6 +36,12 @@ mod memory_limit;
 /// Run all tests that are found in the `custom_sources_cases` directory
 mod custom_sources_cases;
 
+/// Run all tests that are found in the `optimizer` directory
+mod optimizer;
+
+/// Run all tests that are found in the `physical_optimizer` directory
+mod physical_optimizer;
+
 #[cfg(test)]
 #[ctor::ctor]
 fn init() {
diff --git a/datafusion/core/tests/optimizer_integration.rs b/datafusion/core/tests/optimizer/mod.rs
similarity index 100%
rename from datafusion/core/tests/optimizer_integration.rs
rename to datafusion/core/tests/optimizer/mod.rs
diff --git a/datafusion/core/tests/physical_optimizer_integration.rs b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
similarity index 100%
rename from datafusion/core/tests/physical_optimizer_integration.rs
rename to datafusion/core/tests/physical_optimizer/aggregate_statistics.rs
diff --git a/datafusion/core/tests/physical_optimizer/mod.rs b/datafusion/core/tests/physical_optimizer/mod.rs
new file mode 100644
index 0000000000000..0ee89a3d213c4
--- /dev/null
+++ b/datafusion/core/tests/physical_optimizer/mod.rs
@@ -0,0 +1,18 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod aggregate_statistics;
diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs
index 0ce92df393aac..66b250c5063b5 100644
--- a/datafusion/physical-optimizer/src/aggregate_statistics.rs
+++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs
@@ -296,3 +296,5 @@ fn is_max(agg_expr: &dyn AggregateExpr) -> bool {
     }
     false
 }
+
+// See tests in datafusion/core/tests/physical_optimizer

From bf51babc9c7dc3e95476f84169646b4452fb9799 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Tue, 6 Aug 2024 13:05:55 +0800
Subject: [PATCH 229/357] Optionally create name of aggregate expression from
 expressions (#11776)

* Optionally create name of aggregate expression from expressions

* Fix test

* name -> alias

* keep trying to create aggr expr's display name first
---
 .../combine_partial_final_agg.rs              |   4 +-
 datafusion/core/src/physical_planner.rs       | 272 +-----------------
 datafusion/core/src/test_util/mod.rs          |   2 +-
 .../core/tests/fuzz_cases/aggregate_fuzz.rs   |   2 +-
 datafusion/expr/src/expr.rs                   | 262 ++++++++++++++++-
 .../physical-expr-common/src/aggregate/mod.rs |  40 ++-
 .../physical-plan/src/aggregates/mod.rs       |  52 ++--
 datafusion/physical-plan/src/windows/mod.rs   |   2 +-
 datafusion/proto/src/physical_plan/mod.rs     |   2 +-
 .../tests/cases/roundtrip_physical_plan.rs    |  20 +-
 10 files changed, 337 insertions(+), 321 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index 6f3274820c8c1..843efcc7b0d28 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -280,7 +280,7 @@ mod tests {
     ) -> Arc<dyn AggregateExpr> {
         AggregateExprBuilder::new(count_udaf(), vec![expr])
             .schema(Arc::new(schema.clone()))
-            .name(name)
+            .alias(name)
             .build()
             .unwrap()
     }
@@ -364,7 +364,7 @@ mod tests {
             vec![
                 AggregateExprBuilder::new(sum_udaf(), vec![col("b", &schema)?])
                     .schema(Arc::clone(&schema))
-                    .name("Sum(b)")
+                    .alias("Sum(b)")
                     .build()
                     .unwrap(),
             ];
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 378a892111c59..65cdbf9fe62ce 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -19,7 +19,6 @@
 
 use std::borrow::Cow;
 use std::collections::HashMap;
-use std::fmt::Write;
 use std::sync::Arc;
 
 use crate::datasource::file_format::file_type_to_format;
@@ -74,11 +73,9 @@ use datafusion_common::{
 };
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
-    self, AggregateFunction, Alias, Between, BinaryExpr, Cast, GroupingSet, InList, Like,
-    TryCast, WindowFunction,
+    self, physical_name, AggregateFunction, Alias, GroupingSet, WindowFunction,
 };
 use datafusion_expr::expr_rewriter::unnormalize_cols;
-use datafusion_expr::expr_vec_fmt;
 use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
 use datafusion_expr::{
     DescribeTable, DmlStatement, Extension, Filter, RecursiveQuery, StringifiedPlan,
@@ -97,265 +94,6 @@ use log::{debug, trace};
 use sqlparser::ast::NullTreatment;
 use tokio::sync::Mutex;
 
-fn create_function_physical_name(
-    fun: &str,
-    distinct: bool,
-    args: &[Expr],
-    order_by: Option<&Vec<Expr>>,
-) -> Result<String> {
-    let names: Vec<String> = args
-        .iter()
-        .map(|e| create_physical_name(e, false))
-        .collect::<Result<_>>()?;
-
-    let distinct_str = match distinct {
-        true => "DISTINCT ",
-        false => "",
-    };
-
-    let phys_name = format!("{}({}{})", fun, distinct_str, names.join(","));
-
-    Ok(order_by
-        .map(|order_by| format!("{} ORDER BY [{}]", phys_name, expr_vec_fmt!(order_by)))
-        .unwrap_or(phys_name))
-}
-
-fn physical_name(e: &Expr) -> Result<String> {
-    create_physical_name(e, true)
-}
-
-fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
-    match e {
-        Expr::Unnest(_) => {
-            internal_err!(
-                "Expr::Unnest should have been converted to LogicalPlan::Unnest"
-            )
-        }
-        Expr::Column(c) => {
-            if is_first_expr {
-                Ok(c.name.clone())
-            } else {
-                Ok(c.flat_name())
-            }
-        }
-        Expr::Alias(Alias { name, .. }) => Ok(name.clone()),
-        Expr::ScalarVariable(_, variable_names) => Ok(variable_names.join(".")),
-        Expr::Literal(value) => Ok(format!("{value:?}")),
-        Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
-            let left = create_physical_name(left, false)?;
-            let right = create_physical_name(right, false)?;
-            Ok(format!("{left} {op} {right}"))
-        }
-        Expr::Case(case) => {
-            let mut name = "CASE ".to_string();
-            if let Some(e) = &case.expr {
-                let _ = write!(name, "{} ", create_physical_name(e, false)?);
-            }
-            for (w, t) in &case.when_then_expr {
-                let _ = write!(
-                    name,
-                    "WHEN {} THEN {} ",
-                    create_physical_name(w, false)?,
-                    create_physical_name(t, false)?
-                );
-            }
-            if let Some(e) = &case.else_expr {
-                let _ = write!(name, "ELSE {} ", create_physical_name(e, false)?);
-            }
-            name += "END";
-            Ok(name)
-        }
-        Expr::Cast(Cast { expr, .. }) => {
-            // CAST does not change the expression name
-            create_physical_name(expr, false)
-        }
-        Expr::TryCast(TryCast { expr, .. }) => {
-            // CAST does not change the expression name
-            create_physical_name(expr, false)
-        }
-        Expr::Not(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("NOT {expr}"))
-        }
-        Expr::Negative(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("(- {expr})"))
-        }
-        Expr::IsNull(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NULL"))
-        }
-        Expr::IsNotNull(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT NULL"))
-        }
-        Expr::IsTrue(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS TRUE"))
-        }
-        Expr::IsFalse(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS FALSE"))
-        }
-        Expr::IsUnknown(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS UNKNOWN"))
-        }
-        Expr::IsNotTrue(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT TRUE"))
-        }
-        Expr::IsNotFalse(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT FALSE"))
-        }
-        Expr::IsNotUnknown(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT UNKNOWN"))
-        }
-        Expr::ScalarFunction(fun) => fun.func.display_name(&fun.args),
-        Expr::WindowFunction(WindowFunction {
-            fun,
-            args,
-            order_by,
-            ..
-        }) => {
-            create_function_physical_name(&fun.to_string(), false, args, Some(order_by))
-        }
-        Expr::AggregateFunction(AggregateFunction {
-            func,
-            distinct,
-            args,
-            filter: _,
-            order_by,
-            null_treatment: _,
-        }) => {
-            create_function_physical_name(func.name(), *distinct, args, order_by.as_ref())
-        }
-        Expr::GroupingSet(grouping_set) => match grouping_set {
-            GroupingSet::Rollup(exprs) => Ok(format!(
-                "ROLLUP ({})",
-                exprs
-                    .iter()
-                    .map(|e| create_physical_name(e, false))
-                    .collect::<Result<Vec<_>>>()?
-                    .join(", ")
-            )),
-            GroupingSet::Cube(exprs) => Ok(format!(
-                "CUBE ({})",
-                exprs
-                    .iter()
-                    .map(|e| create_physical_name(e, false))
-                    .collect::<Result<Vec<_>>>()?
-                    .join(", ")
-            )),
-            GroupingSet::GroupingSets(lists_of_exprs) => {
-                let mut strings = vec![];
-                for exprs in lists_of_exprs {
-                    let exprs_str = exprs
-                        .iter()
-                        .map(|e| create_physical_name(e, false))
-                        .collect::<Result<Vec<_>>>()?
-                        .join(", ");
-                    strings.push(format!("({exprs_str})"));
-                }
-                Ok(format!("GROUPING SETS ({})", strings.join(", ")))
-            }
-        },
-
-        Expr::InList(InList {
-            expr,
-            list,
-            negated,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let list = list.iter().map(|expr| create_physical_name(expr, false));
-            if *negated {
-                Ok(format!("{expr} NOT IN ({list:?})"))
-            } else {
-                Ok(format!("{expr} IN ({list:?})"))
-            }
-        }
-        Expr::Exists { .. } => {
-            not_impl_err!("EXISTS is not yet supported in the physical plan")
-        }
-        Expr::InSubquery(_) => {
-            not_impl_err!("IN subquery is not yet supported in the physical plan")
-        }
-        Expr::ScalarSubquery(_) => {
-            not_impl_err!("Scalar subqueries are not yet supported in the physical plan")
-        }
-        Expr::Between(Between {
-            expr,
-            negated,
-            low,
-            high,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let low = create_physical_name(low, false)?;
-            let high = create_physical_name(high, false)?;
-            if *negated {
-                Ok(format!("{expr} NOT BETWEEN {low} AND {high}"))
-            } else {
-                Ok(format!("{expr} BETWEEN {low} AND {high}"))
-            }
-        }
-        Expr::Like(Like {
-            negated,
-            expr,
-            pattern,
-            escape_char,
-            case_insensitive,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let pattern = create_physical_name(pattern, false)?;
-            let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
-            let escape = if let Some(char) = escape_char {
-                format!("CHAR '{char}'")
-            } else {
-                "".to_string()
-            };
-            if *negated {
-                Ok(format!("{expr} NOT {op_name} {pattern}{escape}"))
-            } else {
-                Ok(format!("{expr} {op_name} {pattern}{escape}"))
-            }
-        }
-        Expr::SimilarTo(Like {
-            negated,
-            expr,
-            pattern,
-            escape_char,
-            case_insensitive: _,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let pattern = create_physical_name(pattern, false)?;
-            let escape = if let Some(char) = escape_char {
-                format!("CHAR '{char}'")
-            } else {
-                "".to_string()
-            };
-            if *negated {
-                Ok(format!("{expr} NOT SIMILAR TO {pattern}{escape}"))
-            } else {
-                Ok(format!("{expr} SIMILAR TO {pattern}{escape}"))
-            }
-        }
-        Expr::Sort { .. } => {
-            internal_err!("Create physical name does not support sort expression")
-        }
-        Expr::Wildcard { .. } => {
-            internal_err!("Create physical name does not support wildcard")
-        }
-        Expr::Placeholder(_) => {
-            internal_err!("Create physical name does not support placeholder")
-        }
-        Expr::OuterReferenceColumn(_, _) => {
-            internal_err!("Create physical name does not support OuterReferenceColumn")
-        }
-    }
-}
-
 /// Physical query planner that converts a `LogicalPlan` to an
 /// `ExecutionPlan` suitable for execution.
 #[async_trait]
@@ -1807,7 +1545,7 @@ type AggregateExprWithOptionalArgs = (
 /// Create an aggregate expression with a name from a logical expression
 pub fn create_aggregate_expr_with_name_and_maybe_filter(
     e: &Expr,
-    name: impl Into<String>,
+    name: Option<String>,
     logical_input_schema: &DFSchema,
     _physical_input_schema: &Schema,
     execution_props: &ExecutionProps,
@@ -1881,9 +1619,9 @@ pub fn create_aggregate_expr_and_maybe_filter(
 ) -> Result<AggregateExprWithOptionalArgs> {
     // unpack (nested) aliased logical expressions, e.g. "sum(col) as total"
     let (name, e) = match e {
-        Expr::Alias(Alias { expr, name, .. }) => (name.clone(), expr.as_ref()),
-        Expr::AggregateFunction(_) => (e.display_name().unwrap_or(physical_name(e)?), e),
-        _ => (physical_name(e)?, e),
+        Expr::Alias(Alias { expr, name, .. }) => (Some(name.clone()), expr.as_ref()),
+        Expr::AggregateFunction(_) => (e.display_name().ok(), e),
+        _ => (None, e),
     };
 
     create_aggregate_expr_with_name_and_maybe_filter(
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index 6eb82dece31ce..9610a7f203648 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -432,7 +432,7 @@ impl TestAggregate {
     pub fn count_expr(&self, schema: &Schema) -> Arc<dyn AggregateExpr> {
         AggregateExprBuilder::new(count_udaf(), vec![self.column()])
             .schema(Arc::new(schema.clone()))
-            .name(self.column_name())
+            .alias(self.column_name())
             .build()
             .unwrap()
     }
diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index 6f286c9aeba1e..4cecb0b69335c 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -107,7 +107,7 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
         vec![
             AggregateExprBuilder::new(sum_udaf(), vec![col("d", &schema).unwrap()])
                 .schema(Arc::clone(&schema))
-                .name("sum1")
+                .alias("sum1")
                 .build()
                 .unwrap(),
         ];
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 1a51c181f49f3..edf45a244e1f2 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -38,7 +38,8 @@ use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
 use datafusion_common::{
-    internal_err, plan_err, Column, DFSchema, Result, ScalarValue, TableReference,
+    internal_err, not_impl_err, plan_err, Column, DFSchema, Result, ScalarValue,
+    TableReference,
 };
 use sqlparser::ast::NullTreatment;
 
@@ -2277,6 +2278,265 @@ fn write_names_join<W: Write>(w: &mut W, exprs: &[Expr], sep: &str) -> Result<()
     Ok(())
 }
 
+pub fn create_function_physical_name(
+    fun: &str,
+    distinct: bool,
+    args: &[Expr],
+    order_by: Option<&Vec<Expr>>,
+) -> Result<String> {
+    let names: Vec<String> = args
+        .iter()
+        .map(|e| create_physical_name(e, false))
+        .collect::<Result<_>>()?;
+
+    let distinct_str = match distinct {
+        true => "DISTINCT ",
+        false => "",
+    };
+
+    let phys_name = format!("{}({}{})", fun, distinct_str, names.join(","));
+
+    Ok(order_by
+        .map(|order_by| format!("{} ORDER BY [{}]", phys_name, expr_vec_fmt!(order_by)))
+        .unwrap_or(phys_name))
+}
+
+pub fn physical_name(e: &Expr) -> Result<String> {
+    create_physical_name(e, true)
+}
+
+fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
+    match e {
+        Expr::Unnest(_) => {
+            internal_err!(
+                "Expr::Unnest should have been converted to LogicalPlan::Unnest"
+            )
+        }
+        Expr::Column(c) => {
+            if is_first_expr {
+                Ok(c.name.clone())
+            } else {
+                Ok(c.flat_name())
+            }
+        }
+        Expr::Alias(Alias { name, .. }) => Ok(name.clone()),
+        Expr::ScalarVariable(_, variable_names) => Ok(variable_names.join(".")),
+        Expr::Literal(value) => Ok(format!("{value:?}")),
+        Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
+            let left = create_physical_name(left, false)?;
+            let right = create_physical_name(right, false)?;
+            Ok(format!("{left} {op} {right}"))
+        }
+        Expr::Case(case) => {
+            let mut name = "CASE ".to_string();
+            if let Some(e) = &case.expr {
+                let _ = write!(name, "{} ", create_physical_name(e, false)?);
+            }
+            for (w, t) in &case.when_then_expr {
+                let _ = write!(
+                    name,
+                    "WHEN {} THEN {} ",
+                    create_physical_name(w, false)?,
+                    create_physical_name(t, false)?
+                );
+            }
+            if let Some(e) = &case.else_expr {
+                let _ = write!(name, "ELSE {} ", create_physical_name(e, false)?);
+            }
+            name += "END";
+            Ok(name)
+        }
+        Expr::Cast(Cast { expr, .. }) => {
+            // CAST does not change the expression name
+            create_physical_name(expr, false)
+        }
+        Expr::TryCast(TryCast { expr, .. }) => {
+            // CAST does not change the expression name
+            create_physical_name(expr, false)
+        }
+        Expr::Not(expr) => {
+            let expr = create_physical_name(expr, false)?;
+            Ok(format!("NOT {expr}"))
+        }
+        Expr::Negative(expr) => {
+            let expr = create_physical_name(expr, false)?;
+            Ok(format!("(- {expr})"))
+        }
+        Expr::IsNull(expr) => {
+            let expr = create_physical_name(expr, false)?;
+            Ok(format!("{expr} IS NULL"))
+        }
+        Expr::IsNotNull(expr) => {
+            let expr = create_physical_name(expr, false)?;
+            Ok(format!("{expr} IS NOT NULL"))
+        }
+        Expr::IsTrue(expr) => {
+            let expr = create_physical_name(expr, false)?;
+            Ok(format!("{expr} IS TRUE"))
+        }
+        Expr::IsFalse(expr) => {
+            let expr = create_physical_name(expr, false)?;
+            Ok(format!("{expr} IS FALSE"))
+        }
+        Expr::IsUnknown(expr) => {
+            let expr = create_physical_name(expr, false)?;
+            Ok(format!("{expr} IS UNKNOWN"))
+        }
+        Expr::IsNotTrue(expr) => {
+            let expr = create_physical_name(expr, false)?;
+            Ok(format!("{expr} IS NOT TRUE"))
+        }
+        Expr::IsNotFalse(expr) => {
+            let expr = create_physical_name(expr, false)?;
+            Ok(format!("{expr} IS NOT FALSE"))
+        }
+        Expr::IsNotUnknown(expr) => {
+            let expr = create_physical_name(expr, false)?;
+            Ok(format!("{expr} IS NOT UNKNOWN"))
+        }
+        Expr::ScalarFunction(fun) => fun.func.display_name(&fun.args),
+        Expr::WindowFunction(WindowFunction {
+            fun,
+            args,
+            order_by,
+            ..
+        }) => {
+            create_function_physical_name(&fun.to_string(), false, args, Some(order_by))
+        }
+        Expr::AggregateFunction(AggregateFunction {
+            func,
+            distinct,
+            args,
+            filter: _,
+            order_by,
+            null_treatment: _,
+        }) => {
+            create_function_physical_name(func.name(), *distinct, args, order_by.as_ref())
+        }
+        Expr::GroupingSet(grouping_set) => match grouping_set {
+            GroupingSet::Rollup(exprs) => Ok(format!(
+                "ROLLUP ({})",
+                exprs
+                    .iter()
+                    .map(|e| create_physical_name(e, false))
+                    .collect::<Result<Vec<_>>>()?
+                    .join(", ")
+            )),
+            GroupingSet::Cube(exprs) => Ok(format!(
+                "CUBE ({})",
+                exprs
+                    .iter()
+                    .map(|e| create_physical_name(e, false))
+                    .collect::<Result<Vec<_>>>()?
+                    .join(", ")
+            )),
+            GroupingSet::GroupingSets(lists_of_exprs) => {
+                let mut strings = vec![];
+                for exprs in lists_of_exprs {
+                    let exprs_str = exprs
+                        .iter()
+                        .map(|e| create_physical_name(e, false))
+                        .collect::<Result<Vec<_>>>()?
+                        .join(", ");
+                    strings.push(format!("({exprs_str})"));
+                }
+                Ok(format!("GROUPING SETS ({})", strings.join(", ")))
+            }
+        },
+
+        Expr::InList(InList {
+            expr,
+            list,
+            negated,
+        }) => {
+            let expr = create_physical_name(expr, false)?;
+            let list = list.iter().map(|expr| create_physical_name(expr, false));
+            if *negated {
+                Ok(format!("{expr} NOT IN ({list:?})"))
+            } else {
+                Ok(format!("{expr} IN ({list:?})"))
+            }
+        }
+        Expr::Exists { .. } => {
+            not_impl_err!("EXISTS is not yet supported in the physical plan")
+        }
+        Expr::InSubquery(_) => {
+            not_impl_err!("IN subquery is not yet supported in the physical plan")
+        }
+        Expr::ScalarSubquery(_) => {
+            not_impl_err!("Scalar subqueries are not yet supported in the physical plan")
+        }
+        Expr::Between(Between {
+            expr,
+            negated,
+            low,
+            high,
+        }) => {
+            let expr = create_physical_name(expr, false)?;
+            let low = create_physical_name(low, false)?;
+            let high = create_physical_name(high, false)?;
+            if *negated {
+                Ok(format!("{expr} NOT BETWEEN {low} AND {high}"))
+            } else {
+                Ok(format!("{expr} BETWEEN {low} AND {high}"))
+            }
+        }
+        Expr::Like(Like {
+            negated,
+            expr,
+            pattern,
+            escape_char,
+            case_insensitive,
+        }) => {
+            let expr = create_physical_name(expr, false)?;
+            let pattern = create_physical_name(pattern, false)?;
+            let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
+            let escape = if let Some(char) = escape_char {
+                format!("CHAR '{char}'")
+            } else {
+                "".to_string()
+            };
+            if *negated {
+                Ok(format!("{expr} NOT {op_name} {pattern}{escape}"))
+            } else {
+                Ok(format!("{expr} {op_name} {pattern}{escape}"))
+            }
+        }
+        Expr::SimilarTo(Like {
+            negated,
+            expr,
+            pattern,
+            escape_char,
+            case_insensitive: _,
+        }) => {
+            let expr = create_physical_name(expr, false)?;
+            let pattern = create_physical_name(pattern, false)?;
+            let escape = if let Some(char) = escape_char {
+                format!("CHAR '{char}'")
+            } else {
+                "".to_string()
+            };
+            if *negated {
+                Ok(format!("{expr} NOT SIMILAR TO {pattern}{escape}"))
+            } else {
+                Ok(format!("{expr} SIMILAR TO {pattern}{escape}"))
+            }
+        }
+        Expr::Sort { .. } => {
+            internal_err!("Create physical name does not support sort expression")
+        }
+        Expr::Wildcard { .. } => {
+            internal_err!("Create physical name does not support wildcard")
+        }
+        Expr::Placeholder(_) => {
+            internal_err!("Create physical name does not support placeholder")
+        }
+        Expr::OuterReferenceColumn(_, _) => {
+            internal_err!("Create physical name does not support OuterReferenceColumn")
+        }
+    }
+}
+
 #[cfg(test)]
 mod test {
     use crate::expr_fn::col;
diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
index 665cdd708329f..350023352b12f 100644
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -22,6 +22,7 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 
 use datafusion_common::exec_err;
 use datafusion_common::{internal_err, not_impl_err, DFSchema, Result};
+use datafusion_expr::expr::create_function_physical_name;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::type_coercion::aggregates::check_arg_count;
 use datafusion_expr::utils::AggregateOrderSensitivity;
@@ -67,7 +68,7 @@ pub fn create_aggregate_expr(
     sort_exprs: &[Expr],
     ordering_req: &[PhysicalSortExpr],
     schema: &Schema,
-    name: impl Into<String>,
+    name: Option<String>,
     ignore_nulls: bool,
     is_distinct: bool,
 ) -> Result<Arc<dyn AggregateExpr>> {
@@ -77,7 +78,9 @@ pub fn create_aggregate_expr(
     builder = builder.order_by(ordering_req.to_vec());
     builder = builder.logical_exprs(input_exprs.to_vec());
     builder = builder.schema(Arc::new(schema.clone()));
-    builder = builder.name(name);
+    if let Some(name) = name {
+        builder = builder.alias(name);
+    }
 
     if ignore_nulls {
         builder = builder.ignore_nulls();
@@ -98,7 +101,7 @@ pub fn create_aggregate_expr_with_dfschema(
     sort_exprs: &[Expr],
     ordering_req: &[PhysicalSortExpr],
     dfschema: &DFSchema,
-    name: impl Into<String>,
+    alias: Option<String>,
     ignore_nulls: bool,
     is_distinct: bool,
     is_reversed: bool,
@@ -111,7 +114,9 @@ pub fn create_aggregate_expr_with_dfschema(
     builder = builder.dfschema(dfschema.clone());
     let schema: Schema = dfschema.into();
     builder = builder.schema(Arc::new(schema));
-    builder = builder.name(name);
+    if let Some(alias) = alias {
+        builder = builder.alias(alias);
+    }
 
     if ignore_nulls {
         builder = builder.ignore_nulls();
@@ -137,7 +142,7 @@ pub struct AggregateExprBuilder {
     args: Vec<Arc<dyn PhysicalExpr>>,
     /// Logical expressions of the aggregate function, it will be deprecated in <https://github.com/apache/datafusion/issues/11359>
     logical_args: Vec<Expr>,
-    name: String,
+    alias: Option<String>,
     /// Arrow Schema for the aggregate function
     schema: SchemaRef,
     /// Datafusion Schema for the aggregate function
@@ -160,7 +165,7 @@ impl AggregateExprBuilder {
             fun,
             args,
             logical_args: vec![],
-            name: String::new(),
+            alias: None,
             schema: Arc::new(Schema::empty()),
             dfschema: DFSchema::empty(),
             sort_exprs: vec![],
@@ -176,7 +181,7 @@ impl AggregateExprBuilder {
             fun,
             args,
             logical_args,
-            name,
+            alias,
             schema,
             dfschema,
             sort_exprs,
@@ -213,6 +218,19 @@ impl AggregateExprBuilder {
         )?;
 
         let data_type = fun.return_type(&input_exprs_types)?;
+        let name = match alias {
+            None => create_function_physical_name(
+                fun.name(),
+                is_distinct,
+                &logical_args,
+                if sort_exprs.is_empty() {
+                    None
+                } else {
+                    Some(&sort_exprs)
+                },
+            )?,
+            Some(alias) => alias,
+        };
 
         Ok(Arc::new(AggregateFunctionExpr {
             fun: Arc::unwrap_or_clone(fun),
@@ -232,8 +250,8 @@ impl AggregateExprBuilder {
         }))
     }
 
-    pub fn name(mut self, name: impl Into<String>) -> Self {
-        self.name = name.into();
+    pub fn alias(mut self, alias: impl Into<String>) -> Self {
+        self.alias = Some(alias.into());
         self
     }
 
@@ -680,7 +698,7 @@ impl AggregateExpr for AggregateFunctionExpr {
             &self.sort_exprs,
             &self.ordering_req,
             &self.dfschema,
-            self.name(),
+            Some(self.name().to_string()),
             self.ignore_nulls,
             self.is_distinct,
             self.is_reversed,
@@ -721,7 +739,7 @@ impl AggregateExpr for AggregateFunctionExpr {
                     &reverse_sort_exprs,
                     &reverse_ordering_req,
                     &self.dfschema,
-                    name,
+                    Some(name),
                     self.ignore_nulls,
                     self.is_distinct,
                     !self.is_reversed,
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 8941418c12e1e..d72da9b300499 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -1362,7 +1362,7 @@ mod tests {
 
         let aggregates = vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1i8)])
             .schema(Arc::clone(&input_schema))
-            .name("COUNT(1)")
+            .alias("COUNT(1)")
             .logical_exprs(vec![datafusion_expr::lit(1i8)])
             .build()?];
 
@@ -1507,7 +1507,7 @@ mod tests {
             vec![
                 AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?])
                     .schema(Arc::clone(&input_schema))
-                    .name("AVG(b)")
+                    .alias("AVG(b)")
                     .build()?,
             ];
 
@@ -1803,7 +1803,7 @@ mod tests {
     fn test_median_agg_expr(schema: SchemaRef) -> Result<Arc<dyn AggregateExpr>> {
         AggregateExprBuilder::new(median_udaf(), vec![col("a", &schema)?])
             .schema(schema)
-            .name("MEDIAN(a)")
+            .alias("MEDIAN(a)")
             .build()
     }
 
@@ -1834,7 +1834,7 @@ mod tests {
             vec![
                 AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?])
                     .schema(Arc::clone(&input_schema))
-                    .name("AVG(b)")
+                    .alias("AVG(b)")
                     .build()?,
             ];
 
@@ -1894,7 +1894,7 @@ mod tests {
             vec![
                 AggregateExprBuilder::new(avg_udaf(), vec![col("a", &schema)?])
                     .schema(Arc::clone(&schema))
-                    .name("AVG(a)")
+                    .alias("AVG(a)")
                     .build()?,
             ];
 
@@ -1934,7 +1934,7 @@ mod tests {
             vec![
                 AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
                     .schema(Arc::clone(&schema))
-                    .name("AVG(b)")
+                    .alias("AVG(b)")
                     .build()?,
             ];
 
@@ -2002,7 +2002,7 @@ mod tests {
             &sort_exprs,
             &ordering_req,
             dfschema,
-            "FIRST_VALUE(b)",
+            None,
             false,
             false,
             false,
@@ -2034,7 +2034,7 @@ mod tests {
             &sort_exprs,
             &ordering_req,
             dfschema,
-            "LAST_VALUE(b)",
+            None,
             false,
             false,
             false,
@@ -2130,24 +2130,24 @@ mod tests {
         let result = crate::collect(aggregate_final, task_ctx).await?;
         if is_first_acc {
             let expected = [
-                "+---+----------------+",
-                "| a | FIRST_VALUE(b) |",
-                "+---+----------------+",
-                "| 2 | 0.0            |",
-                "| 3 | 1.0            |",
-                "| 4 | 3.0            |",
-                "+---+----------------+",
+                "+---+--------------------------------------------+",
+                "| a | first_value(b) ORDER BY [b ASC NULLS LAST] |",
+                "+---+--------------------------------------------+",
+                "| 2 | 0.0                                        |",
+                "| 3 | 1.0                                        |",
+                "| 4 | 3.0                                        |",
+                "+---+--------------------------------------------+",
             ];
             assert_batches_eq!(expected, &result);
         } else {
             let expected = [
-                "+---+---------------+",
-                "| a | LAST_VALUE(b) |",
-                "+---+---------------+",
-                "| 2 | 3.0           |",
-                "| 3 | 5.0           |",
-                "| 4 | 6.0           |",
-                "+---+---------------+",
+                "+---+-------------------------------------------+",
+                "| a | last_value(b) ORDER BY [b ASC NULLS LAST] |",
+                "+---+-------------------------------------------+",
+                "| 2 | 3.0                                       |",
+                "| 3 | 5.0                                       |",
+                "| 4 | 6.0                                       |",
+                "+---+-------------------------------------------+",
             ];
             assert_batches_eq!(expected, &result);
         };
@@ -2267,7 +2267,7 @@ mod tests {
                     &sort_exprs,
                     &ordering_req,
                     &test_df_schema,
-                    "array_agg",
+                    None,
                     false,
                     false,
                     false,
@@ -2363,7 +2363,7 @@ mod tests {
         let aggregates: Vec<Arc<dyn AggregateExpr>> =
             vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1)])
                 .schema(Arc::clone(&schema))
-                .name("1")
+                .alias("1")
                 .build()?];
 
         let input_batches = (0..4)
@@ -2427,7 +2427,7 @@ mod tests {
                 &[],
                 &[],
                 &df_schema,
-                "COUNT(val)",
+                Some("COUNT(val)".to_string()),
                 false,
                 false,
                 false,
@@ -2515,7 +2515,7 @@ mod tests {
                 &[],
                 &[],
                 &df_schema,
-                "COUNT(val)",
+                Some("COUNT(val)".to_string()),
                 false,
                 false,
                 false,
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 65cef28efc451..b41f3ad71bb8f 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -128,7 +128,7 @@ pub fn create_window_expr(
 
             let aggregate = AggregateExprBuilder::new(Arc::clone(fun), args.to_vec())
                 .schema(Arc::new(input_schema.clone()))
-                .name(name)
+                .alias(name)
                 .order_by(order_by.to_vec())
                 .sort_exprs(sort_exprs)
                 .with_ignore_nulls(ignore_nulls)
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index aefa1d87a2784..59db791c7595a 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -492,7 +492,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                                             // https://github.com/apache/datafusion/issues/11804
                                             AggregateExprBuilder::new(agg_udf, input_phy_expr)
                                                 .schema(Arc::clone(&physical_schema))
-                                                .name(name)
+                                                .alias(name)
                                                 .with_ignore_nulls(agg_node.ignore_nulls)
                                                 .with_distinct(agg_node.distinct)
                                                 .build()
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 0e2bc9cbb3e22..712182791b0bc 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -296,7 +296,7 @@ fn roundtrip_window() -> Result<()> {
             vec![cast(col("b", &schema)?, &schema, DataType::Float64)?],
         )
         .schema(Arc::clone(&schema))
-        .name("avg(b)")
+        .alias("avg(b)")
         .build()?,
         &[],
         &[],
@@ -312,7 +312,7 @@ fn roundtrip_window() -> Result<()> {
     let args = vec![cast(col("a", &schema)?, &schema, DataType::Float64)?];
     let sum_expr = AggregateExprBuilder::new(sum_udaf(), args)
         .schema(Arc::clone(&schema))
-        .name("SUM(a) RANGE BETWEEN CURRENT ROW AND UNBOUNDED PRECEEDING")
+        .alias("SUM(a) RANGE BETWEEN CURRENT ROW AND UNBOUNDED PRECEEDING")
         .build()?;
 
     let sliding_aggr_window_expr = Arc::new(SlidingAggregateWindowExpr::new(
@@ -346,17 +346,17 @@ fn rountrip_aggregate() -> Result<()> {
 
     let avg_expr = AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
         .schema(Arc::clone(&schema))
-        .name("AVG(b)")
+        .alias("AVG(b)")
         .build()?;
     let nth_expr =
         AggregateExprBuilder::new(nth_value_udaf(), vec![col("b", &schema)?, lit(1u64)])
             .schema(Arc::clone(&schema))
-            .name("NTH_VALUE(b, 1)")
+            .alias("NTH_VALUE(b, 1)")
             .build()?;
     let str_agg_expr =
         AggregateExprBuilder::new(string_agg_udaf(), vec![col("b", &schema)?, lit(1u64)])
             .schema(Arc::clone(&schema))
-            .name("NTH_VALUE(b, 1)")
+            .alias("NTH_VALUE(b, 1)")
             .build()?;
 
     let test_cases: Vec<Vec<Arc<dyn AggregateExpr>>> = vec![
@@ -396,7 +396,7 @@ fn rountrip_aggregate_with_limit() -> Result<()> {
         vec![
             AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
                 .schema(Arc::clone(&schema))
-                .name("AVG(b)")
+                .alias("AVG(b)")
                 .build()?,
         ];
 
@@ -463,7 +463,7 @@ fn roundtrip_aggregate_udaf() -> Result<()> {
         vec![
             AggregateExprBuilder::new(Arc::new(udaf), vec![col("b", &schema)?])
                 .schema(Arc::clone(&schema))
-                .name("example_agg")
+                .alias("example_agg")
                 .build()?,
         ];
 
@@ -914,7 +914,7 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
         vec![udf_expr.clone() as Arc<dyn PhysicalExpr>],
     )
     .schema(schema.clone())
-    .name("max")
+    .alias("max")
     .build()?;
 
     let window = Arc::new(WindowAggExec::try_new(
@@ -965,7 +965,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
 
     let aggr_expr = AggregateExprBuilder::new(Arc::clone(&udaf), aggr_args.clone())
         .schema(Arc::clone(&schema))
-        .name("aggregate_udf")
+        .alias("aggregate_udf")
         .build()?;
 
     let filter = Arc::new(FilterExec::try_new(
@@ -990,7 +990,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
 
     let aggr_expr = AggregateExprBuilder::new(udaf, aggr_args.clone())
         .schema(Arc::clone(&schema))
-        .name("aggregate_udf")
+        .alias("aggregate_udf")
         .distinct()
         .ignore_nulls()
         .build()?;

From d8bc7e2e6c50b26509b7fe7ef8915aa3010afc92 Mon Sep 17 00:00:00 2001
From: Kezhu Wang <kezhuw@gmail.com>
Date: Tue, 6 Aug 2024 13:38:31 +0800
Subject: [PATCH 230/357] Bump deprecated version of
 SessionState::new_with_config_rt to 41.0.0 (#11839)

SessionState::new_with_config_rt was deprecated in favor of
SessionStateBuilder in #11403 which is not shipped in 40.x.
---
 datafusion/core/src/execution/session_state.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index ccad0240fddbc..0a057d6f1417e 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -249,7 +249,7 @@ impl Session for SessionState {
 impl SessionState {
     /// Returns new [`SessionState`] using the provided
     /// [`SessionConfig`] and [`RuntimeEnv`].
-    #[deprecated(since = "40.0.0", note = "Use SessionStateBuilder")]
+    #[deprecated(since = "41.0.0", note = "Use SessionStateBuilder")]
     pub fn new_with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self {
         SessionStateBuilder::new()
             .with_config(config)

From 1c98e6e640090600d36ce51f87919fea7b6eeed2 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 6 Aug 2024 03:22:52 -0400
Subject: [PATCH 231/357] Fix partial aggregation skipping with Decimal
 aggregators (#11833)

* Fix partial aggregation skipping for Decimal

* fix bug
---
 .../aggregate/groups_accumulator/prim_op.rs   |  3 ++-
 .../test_files/aggregate_skip_partial.slt     | 26 +++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs
index 8d69646bd422a..acf1ae525c797 100644
--- a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs
+++ b/datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs
@@ -184,7 +184,8 @@ where
                     "initial_values underlying buffer must not be shared"
                 )
             })?
-            .map_err(DataFusionError::from)?;
+            .map_err(DataFusionError::from)?
+            .with_data_type(self.data_type.clone());
 
         Ok(vec![Arc::new(state_values)])
     }
diff --git a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
index 65efc24ec037c..6c0cf5f800d89 100644
--- a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
+++ b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
@@ -322,3 +322,29 @@ FROM aggregate_test_100_null GROUP BY c2 ORDER BY c2;
 3 109 211 2.80575042963 2.80632930994
 4 -171 56 2.10740506649 1.939846396446
 5 -86 -76 1.8741710186 1.600569307804
+
+
+statement ok
+DROP TABLE aggregate_test_100_null;
+
+# Test for aggregate functions with different intermediate types
+# Need more than 10 values to trigger skipping
+statement ok
+CREATE TABLE decimal_table(i int, d decimal(10,3)) as
+VALUES (1, 1.1), (2, 2.2), (3, 3.3), (2, 4.4), (1, 5.5);
+
+statement ok
+CREATE TABLE t(id int) as values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+
+query IR
+SELECT i, sum(d)
+FROM decimal_table CROSS JOIN t
+GROUP BY i
+ORDER BY i;
+----
+1 66
+2 66
+3 33
+
+statement ok
+DROP TABLE decimal_table;

From 3d76aa25e4830ef8da42fae17453d8d1b8e66d4e Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Tue, 6 Aug 2024 03:23:59 -0700
Subject: [PATCH 232/357] feat: support `Utf8View` type in `starts_with`
 function (#11787)

* feat: support `Utf8View` for `starts_with`

* style: clippy

* simplify string view handling

* fix: allow utf8 and largeutf8 to be cast into utf8view

* fix: fix test

* Apply suggestions from code review

Co-authored-by: Yongting You <2010youy01@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* style: fix format

* feat: add addiontal tests

* tests: improve tests

* fix: fix null case

* tests: one more null test

* Test comments and execution tests

---------

Co-authored-by: Yongting You <2010youy01@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/expr/src/expr_schema.rs            |  1 +
 .../expr/src/type_coercion/functions.rs       | 16 ++++
 .../functions/src/string/starts_with.rs       | 92 +++++++++++++++----
 .../sqllogictest/test_files/string_view.slt   | 70 +++++++++++++-
 4 files changed, 158 insertions(+), 21 deletions(-)

diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 676903d59a075..9faeb8aed506c 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -148,6 +148,7 @@ impl ExprSchemable for Expr {
                     .iter()
                     .map(|e| e.get_type(schema))
                     .collect::<Result<Vec<_>>>()?;
+
                 // verify that function is invoked with correct number and type of arguments as defined in `TypeSignature`
                 data_types_with_scalar_udf(&arg_data_types, func).map_err(|err| {
                     plan_datafusion_err!(
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index 66807c3f446cd..4f2776516d3e4 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -583,6 +583,10 @@ fn coerced_from<'a>(
         (Interval(_), _) if matches!(type_from, Utf8 | LargeUtf8) => {
             Some(type_into.clone())
         }
+        // We can go into a Utf8View from a Utf8 or LargeUtf8
+        (Utf8View, _) if matches!(type_from, Utf8 | LargeUtf8 | Null) => {
+            Some(type_into.clone())
+        }
         // Any type can be coerced into strings
         (Utf8 | LargeUtf8, _) => Some(type_into.clone()),
         (Null, _) if can_cast_types(type_from, type_into) => Some(type_into.clone()),
@@ -646,6 +650,18 @@ mod tests {
     use super::*;
     use arrow::datatypes::Field;
 
+    #[test]
+    fn test_string_conversion() {
+        let cases = vec![
+            (DataType::Utf8View, DataType::Utf8, true),
+            (DataType::Utf8View, DataType::LargeUtf8, true),
+        ];
+
+        for case in cases {
+            assert_eq!(can_coerce_from(&case.0, &case.1), case.2);
+        }
+    }
+
     #[test]
     fn test_maybe_data_types() {
         // this vec contains: arg1, arg2, expected result
diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs
index 05bd960ff14b6..8450697cbf303 100644
--- a/datafusion/functions/src/string/starts_with.rs
+++ b/datafusion/functions/src/string/starts_with.rs
@@ -18,10 +18,10 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, OffsetSizeTrait};
+use arrow::array::ArrayRef;
 use arrow::datatypes::DataType;
 
-use datafusion_common::{cast::as_generic_string_array, internal_err, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::ColumnarValue;
 use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
@@ -30,12 +30,8 @@ use crate::utils::make_scalar_function;
 
 /// Returns true if string starts with prefix.
 /// starts_with('alphabet', 'alph') = 't'
-pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let left = as_generic_string_array::<T>(&args[0])?;
-    let right = as_generic_string_array::<T>(&args[1])?;
-
-    let result = arrow::compute::kernels::comparison::starts_with(left, right)?;
-
+pub fn starts_with(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let result = arrow::compute::kernels::comparison::starts_with(&args[0], &args[1])?;
     Ok(Arc::new(result) as ArrayRef)
 }
 
@@ -52,14 +48,15 @@ impl Default for StartsWithFunc {
 
 impl StartsWithFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![Utf8, LargeUtf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8]),
+                    // Planner attempts coercion to the target type starting with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![DataType::Utf8View, DataType::Utf8View]),
+                    Exact(vec![DataType::Utf8, DataType::Utf8]),
+                    Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
                 ],
                 Volatility::Immutable,
             ),
@@ -81,18 +78,73 @@ impl ScalarUDFImpl for StartsWithFunc {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        use DataType::*;
-
-        Ok(Boolean)
+        Ok(DataType::Boolean)
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(starts_with::<i32>, vec![])(args),
-            DataType::LargeUtf8 => {
-                return make_scalar_function(starts_with::<i64>, vec![])(args);
+            DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => {
+                make_scalar_function(starts_with, vec![])(args)
             }
-            _ => internal_err!("Unsupported data type"),
+            _ => internal_err!("Unsupported data types for starts_with. Expected Utf8, LargeUtf8 or Utf8View")?,
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::utils::test::test_function;
+    use arrow::array::{Array, BooleanArray};
+    use arrow::datatypes::DataType::Boolean;
+    use datafusion_common::{Result, ScalarValue};
+    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+
+    use super::*;
+
+    #[test]
+    fn test_functions() -> Result<()> {
+        // Generate test cases for starts_with
+        let test_cases = vec![
+            (Some("alphabet"), Some("alph"), Some(true)),
+            (Some("alphabet"), Some("bet"), Some(false)),
+            (
+                Some("somewhat large string"),
+                Some("somewhat large"),
+                Some(true),
+            ),
+            (Some("somewhat large string"), Some("large"), Some(false)),
+        ]
+        .into_iter()
+        .flat_map(|(a, b, c)| {
+            let utf_8_args = vec![
+                ColumnarValue::Scalar(ScalarValue::Utf8(a.map(|s| s.to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(b.map(|s| s.to_string()))),
+            ];
+
+            let large_utf_8_args = vec![
+                ColumnarValue::Scalar(ScalarValue::LargeUtf8(a.map(|s| s.to_string()))),
+                ColumnarValue::Scalar(ScalarValue::LargeUtf8(b.map(|s| s.to_string()))),
+            ];
+
+            let utf_8_view_args = vec![
+                ColumnarValue::Scalar(ScalarValue::Utf8View(a.map(|s| s.to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8View(b.map(|s| s.to_string()))),
+            ];
+
+            vec![(utf_8_args, c), (large_utf_8_args, c), (utf_8_view_args, c)]
+        });
+
+        for (args, expected) in test_cases {
+            test_function!(
+                StartsWithFunc::new(),
+                &args,
+                Ok(expected),
+                bool,
+                Boolean,
+                BooleanArray
+            );
+        }
+
+        Ok(())
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 763b4e99c6145..584d3b3306904 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -355,6 +355,75 @@ logical_plan
 01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT test.column1_utf8), count(DISTINCT test.column1_utf8view), count(DISTINCT test.column1_dict)]]
 02)--TableScan: test projection=[column1_utf8, column1_utf8view, column1_dict]
 
+### `STARTS_WITH`
+
+# Test STARTS_WITH with utf8view against utf8view, utf8, and largeutf8
+# (should be no casts)
+query TT
+EXPLAIN SELECT
+  STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
+  STARTS_WITH(column1_utf8view, column2_utf8) as c2,
+  STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
+FROM test;
+----
+logical_plan
+01)Projection: starts_with(test.column1_utf8view, test.column2_utf8view) AS c1, starts_with(test.column1_utf8view, CAST(test.column2_utf8 AS Utf8View)) AS c2, starts_with(test.column1_utf8view, CAST(test.column2_large_utf8 AS Utf8View)) AS c3
+02)--TableScan: test projection=[column2_utf8, column2_large_utf8, column1_utf8view, column2_utf8view]
+
+query BBB
+SELECT
+  STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
+  STARTS_WITH(column1_utf8view, column2_utf8) as c2,
+  STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
+FROM test;
+----
+false false false
+true true true
+true true true
+NULL NULL NULL
+
+# Test STARTS_WITH with utf8 against utf8view, utf8, and largeutf8
+# Should work, but will have to cast to common types
+# should cast utf8 -> utf8view and largeutf8 -> utf8view
+query TT
+EXPLAIN SELECT
+  STARTS_WITH(column1_utf8, column2_utf8view) as c1,
+  STARTS_WITH(column1_utf8, column2_utf8) as c3,
+  STARTS_WITH(column1_utf8, column2_large_utf8) as c4
+FROM test;
+----
+logical_plan
+01)Projection: starts_with(__common_expr_1, test.column2_utf8view) AS c1, starts_with(test.column1_utf8, test.column2_utf8) AS c3, starts_with(__common_expr_1, CAST(test.column2_large_utf8 AS Utf8View)) AS c4
+02)--Projection: CAST(test.column1_utf8 AS Utf8View) AS __common_expr_1, test.column1_utf8, test.column2_utf8, test.column2_large_utf8, test.column2_utf8view
+03)----TableScan: test projection=[column1_utf8, column2_utf8, column2_large_utf8, column2_utf8view]
+
+query BBB
+ SELECT
+  STARTS_WITH(column1_utf8, column2_utf8view) as c1,
+  STARTS_WITH(column1_utf8, column2_utf8) as c3,
+  STARTS_WITH(column1_utf8, column2_large_utf8) as c4
+FROM test;
+----
+false false false
+true true true
+true true true
+NULL NULL NULL
+
+
+# Test STARTS_WITH with utf8view against literals
+# In this case, the literals should be cast to utf8view. The columns
+# should not be cast to utf8.
+query TT
+EXPLAIN SELECT
+  STARTS_WITH(column1_utf8view, 'äöüß') as c1,
+  STARTS_WITH(column1_utf8view, '') as c2,
+  STARTS_WITH(column1_utf8view, NULL) as c3,
+  STARTS_WITH(NULL, column1_utf8view) as c4
+FROM test;
+----
+logical_plan
+01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1, starts_with(test.column1_utf8view, Utf8View("")) AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
+02)--TableScan: test projection=[column1_utf8view]
 
 statement ok
 drop table test;
@@ -376,6 +445,5 @@ select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt;
 ----
 2024-01-23
 
-
 statement ok
 drop table dates;

From 4e278cad19c350f93195e0953881fefafe442f55 Mon Sep 17 00:00:00 2001
From: Eduard Karacharov <eduard.karacharov@gmail.com>
Date: Tue, 6 Aug 2024 14:00:01 +0300
Subject: [PATCH 233/357] fix: hash join tests with forced collisions (#11806)

* tests: hash join tests with hash collisions

* replace div_ceil expression with function call

* Apply suggestions from code review

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* remove redundant comments

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../physical-plan/src/joins/hash_join.rs      | 83 ++++++++++++-------
 1 file changed, 51 insertions(+), 32 deletions(-)

diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 063f35059fb8f..14835f717ea37 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -1583,7 +1583,6 @@ mod tests {
     use rstest::*;
     use rstest_reuse::*;
 
-    #[cfg(not(feature = "force_hash_collisions"))]
     fn div_ceil(a: usize, b: usize) -> usize {
         (a + b - 1) / b
     }
@@ -1931,9 +1930,6 @@ mod tests {
         Ok(())
     }
 
-    // FIXME(#TODO) test fails with feature `force_hash_collisions`
-    // https://github.com/apache/datafusion/issues/11658
-    #[cfg(not(feature = "force_hash_collisions"))]
     #[apply(batch_sizes)]
     #[tokio::test]
     async fn join_inner_two(batch_size: usize) -> Result<()> {
@@ -1964,12 +1960,20 @@ mod tests {
 
         assert_eq!(columns, vec!["a1", "b2", "c1", "a1", "b2", "c2"]);
 
-        // expected joined records = 3
-        // in case batch_size is 1 - additional empty batch for remaining 3-2 row
-        let mut expected_batch_count = div_ceil(3, batch_size);
-        if batch_size == 1 {
-            expected_batch_count += 1;
-        }
+        let expected_batch_count = if cfg!(not(feature = "force_hash_collisions")) {
+            // Expected number of hash table matches = 3
+            // in case batch_size is 1 - additional empty batch for remaining 3-2 row
+            let mut expected_batch_count = div_ceil(3, batch_size);
+            if batch_size == 1 {
+                expected_batch_count += 1;
+            }
+            expected_batch_count
+        } else {
+            // With hash collisions enabled, all records will match each other
+            // and filtered later.
+            div_ceil(9, batch_size)
+        };
+
         assert_eq!(batches.len(), expected_batch_count);
 
         let expected = [
@@ -1989,9 +1993,6 @@ mod tests {
     }
 
     /// Test where the left has 2 parts, the right with 1 part => 1 part
-    // FIXME(#TODO) test fails with feature `force_hash_collisions`
-    // https://github.com/apache/datafusion/issues/11658
-    #[cfg(not(feature = "force_hash_collisions"))]
     #[apply(batch_sizes)]
     #[tokio::test]
     async fn join_inner_one_two_parts_left(batch_size: usize) -> Result<()> {
@@ -2029,12 +2030,20 @@ mod tests {
 
         assert_eq!(columns, vec!["a1", "b2", "c1", "a1", "b2", "c2"]);
 
-        // expected joined records = 3
-        // in case batch_size is 1 - additional empty batch for remaining 3-2 row
-        let mut expected_batch_count = div_ceil(3, batch_size);
-        if batch_size == 1 {
-            expected_batch_count += 1;
-        }
+        let expected_batch_count = if cfg!(not(feature = "force_hash_collisions")) {
+            // Expected number of hash table matches = 3
+            // in case batch_size is 1 - additional empty batch for remaining 3-2 row
+            let mut expected_batch_count = div_ceil(3, batch_size);
+            if batch_size == 1 {
+                expected_batch_count += 1;
+            }
+            expected_batch_count
+        } else {
+            // With hash collisions enabled, all records will match each other
+            // and filtered later.
+            div_ceil(9, batch_size)
+        };
+
         assert_eq!(batches.len(), expected_batch_count);
 
         let expected = [
@@ -2104,9 +2113,6 @@ mod tests {
     }
 
     /// Test where the left has 1 part, the right has 2 parts => 2 parts
-    // FIXME(#TODO) test fails with feature `force_hash_collisions`
-    // https://github.com/apache/datafusion/issues/11658
-    #[cfg(not(feature = "force_hash_collisions"))]
     #[apply(batch_sizes)]
     #[tokio::test]
     async fn join_inner_one_two_parts_right(batch_size: usize) -> Result<()> {
@@ -2143,12 +2149,19 @@ mod tests {
         let stream = join.execute(0, Arc::clone(&task_ctx))?;
         let batches = common::collect(stream).await?;
 
-        // expected joined records = 1 (first right batch)
-        // and additional empty batch for non-joined 20-6-80
-        let mut expected_batch_count = div_ceil(1, batch_size);
-        if batch_size == 1 {
-            expected_batch_count += 1;
-        }
+        let expected_batch_count = if cfg!(not(feature = "force_hash_collisions")) {
+            // Expected number of hash table matches for first right batch = 1
+            // and additional empty batch for non-joined 20-6-80
+            let mut expected_batch_count = div_ceil(1, batch_size);
+            if batch_size == 1 {
+                expected_batch_count += 1;
+            }
+            expected_batch_count
+        } else {
+            // With hash collisions enabled, all records will match each other
+            // and filtered later.
+            div_ceil(6, batch_size)
+        };
         assert_eq!(batches.len(), expected_batch_count);
 
         let expected = [
@@ -2166,8 +2179,14 @@ mod tests {
         let stream = join.execute(1, Arc::clone(&task_ctx))?;
         let batches = common::collect(stream).await?;
 
-        // expected joined records = 2 (second right batch)
-        let expected_batch_count = div_ceil(2, batch_size);
+        let expected_batch_count = if cfg!(not(feature = "force_hash_collisions")) {
+            // Expected number of hash table matches for second right batch = 2
+            div_ceil(2, batch_size)
+        } else {
+            // With hash collisions enabled, all records will match each other
+            // and filtered later.
+            div_ceil(3, batch_size)
+        };
         assert_eq!(batches.len(), expected_batch_count);
 
         let expected = [
@@ -3732,9 +3751,9 @@ mod tests {
                     | JoinType::Right
                     | JoinType::RightSemi
                     | JoinType::RightAnti => {
-                        (expected_resultset_records + batch_size - 1) / batch_size
+                        div_ceil(expected_resultset_records, batch_size)
                     }
-                    _ => (expected_resultset_records + batch_size - 1) / batch_size + 1,
+                    _ => div_ceil(expected_resultset_records, batch_size) + 1,
                 };
                 assert_eq!(
                     batches.len(),

From e19e9825f2e127621782f5fdcd5f22e04ace8ee8 Mon Sep 17 00:00:00 2001
From: Xiangpeng Hao <haoxiangpeng123@gmail.com>
Date: Tue, 6 Aug 2024 20:19:12 +0800
Subject: [PATCH 234/357] don't create zero sized buffer (#11841)

---
 datafusion/physical-plan/src/coalesce_batches.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index b822ec2dafeb1..de42a55ad3502 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -492,8 +492,10 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
             if actual_buffer_size > (ideal_buffer_size * 2) {
                 // We set the block size to `ideal_buffer_size` so that the new StringViewArray only has one buffer, which accelerate later concat_batches.
                 // See https://github.com/apache/arrow-rs/issues/6094 for more details.
-                let mut builder = StringViewBuilder::with_capacity(s.len())
-                    .with_block_size(ideal_buffer_size as u32);
+                let mut builder = StringViewBuilder::with_capacity(s.len());
+                if ideal_buffer_size > 0 {
+                    builder = builder.with_block_size(ideal_buffer_size as u32);
+                }
 
                 for v in s.iter() {
                     builder.append_option(v);
@@ -802,7 +804,7 @@ mod tests {
     impl StringViewTest {
         /// Create a `StringViewArray` with the parameters specified in this struct
         fn build(self) -> StringViewArray {
-            let mut builder = StringViewBuilder::with_capacity(100);
+            let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192);
             loop {
                 for &v in self.strings.iter() {
                     builder.append_option(v);

From 16a3557325eb8f949f4a87ab90c0a0b174dc8d86 Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Tue, 6 Aug 2024 15:42:00 +0200
Subject: [PATCH 235/357] fix: `collect_columns` quadratic complexity (#11843)

Fix accidental $O(n^2)$ in `collect_columns`.

There are the following ways to insert a clone into a hash set:

- **clone before check:** Basically `set.insert(x.clone())`. That's
  rather expensive if you have a lot of duplicates.
- **iter & clone:** That's what we do right now, but that's in $O(n^2)$.
- **check & insert:** Basically `if !set.contains(x) {set.insert(x.clone())}`
  That requires two hash probes though.
- **entry-based API:** Sadly the stdlib doesn't really offer any
  handle/entry-based APIs yet (see https://github.com/rust-lang/rust/issues/60896 ),
  but `hashbrown` does, so we can use the nice
  `set.get_or_insert_owned(x)` which will only clone the reference if it
  doesn't exists yet and only hashes once.

We now use the last approach.
---
 datafusion/physical-expr/src/utils/mod.rs | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs
index 6c4791b158c81..4c37db4849a7f 100644
--- a/datafusion/physical-expr/src/utils/mod.rs
+++ b/datafusion/physical-expr/src/utils/mod.rs
@@ -17,9 +17,10 @@
 
 mod guarantee;
 pub use guarantee::{Guarantee, LiteralGuarantee};
+use hashbrown::HashSet;
 
 use std::borrow::Borrow;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::sync::Arc;
 
 use crate::expressions::{BinaryExpr, Column};
@@ -204,9 +205,7 @@ pub fn collect_columns(expr: &Arc<dyn PhysicalExpr>) -> HashSet<Column> {
     let mut columns = HashSet::<Column>::new();
     expr.apply(|expr| {
         if let Some(column) = expr.as_any().downcast_ref::<Column>() {
-            if !columns.iter().any(|c| c.eq(column)) {
-                columns.insert(column.clone());
-            }
+            columns.get_or_insert_owned(column);
         }
         Ok(TreeNodeRecursion::Continue)
     })

From bddb6415a50746d2803dd908d19c3758952d74f9 Mon Sep 17 00:00:00 2001
From: kamille <caoruiqiu.crq@antgroup.com>
Date: Wed, 7 Aug 2024 03:17:36 +0800
Subject: [PATCH 236/357] Reduce clone of `Statistics` in `ListingTable` and
 `PartitionedFile` (#11802)

* reduce clone of `Statistics` by using arc.

* optimize `get_statistics_with_limit` and `split_files`.

* directly create the col stats set.

* fix pb.

* fix fmt.

* fix clippy.

* fix compile.

* remove stale codes.

* optimize `split_files` by using drain.

* remove default for PartitionedFile.

* don't keep `Arc<Statistic>` in `PartitionedFile`.

* fix pb.
---
 .../core/src/datasource/listing/helpers.rs    |  21 ++-
 datafusion/core/src/datasource/listing/mod.rs |   1 +
 .../core/src/datasource/listing/table.rs      |  26 +--
 datafusion/core/src/datasource/statistics.rs  | 156 +++++++++---------
 4 files changed, 107 insertions(+), 97 deletions(-)

diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs
index 29b593a70ca06..67af8ef12c8b5 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -18,6 +18,7 @@
 //! Helper functions for the table implementation
 
 use std::collections::HashMap;
+use std::mem;
 use std::sync::Arc;
 
 use super::PartitionedFile;
@@ -138,10 +139,22 @@ pub fn split_files(
 
     // effectively this is div with rounding up instead of truncating
     let chunk_size = (partitioned_files.len() + n - 1) / n;
-    partitioned_files
-        .chunks(chunk_size)
-        .map(|c| c.to_vec())
-        .collect()
+    let mut chunks = Vec::with_capacity(n);
+    let mut current_chunk = Vec::with_capacity(chunk_size);
+    for file in partitioned_files.drain(..) {
+        current_chunk.push(file);
+        if current_chunk.len() == chunk_size {
+            let full_chunk =
+                mem::replace(&mut current_chunk, Vec::with_capacity(chunk_size));
+            chunks.push(full_chunk);
+        }
+    }
+
+    if !current_chunk.is_empty() {
+        chunks.push(current_chunk)
+    }
+
+    chunks
 }
 
 struct Partition {
diff --git a/datafusion/core/src/datasource/listing/mod.rs b/datafusion/core/src/datasource/listing/mod.rs
index 44f92760908d3..21a60614cff2d 100644
--- a/datafusion/core/src/datasource/listing/mod.rs
+++ b/datafusion/core/src/datasource/listing/mod.rs
@@ -82,6 +82,7 @@ pub struct PartitionedFile {
     /// An optional field for user defined per object metadata
     pub extensions: Option<Arc<dyn std::any::Any + Send + Sync>>,
 }
+
 impl PartitionedFile {
     /// Create a simple file without metadata or partition
     pub fn new(path: impl Into<String>, size: u64) -> Self {
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 80f49e4eb8e62..bb86ac3ae4161 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -973,15 +973,16 @@ impl ListingTable {
         // collect the statistics if required by the config
         let files = file_list
             .map(|part_file| async {
-                let mut part_file = part_file?;
+                let part_file = part_file?;
                 if self.options.collect_stat {
                     let statistics =
                         self.do_collect_statistics(ctx, &store, &part_file).await?;
-                    part_file.statistics = Some(statistics.clone());
-                    Ok((part_file, statistics)) as Result<(PartitionedFile, Statistics)>
+                    Ok((part_file, statistics))
                 } else {
-                    Ok((part_file, Statistics::new_unknown(&self.file_schema)))
-                        as Result<(PartitionedFile, Statistics)>
+                    Ok((
+                        part_file,
+                        Arc::new(Statistics::new_unknown(&self.file_schema)),
+                    ))
                 }
             })
             .boxed()
@@ -1011,12 +1012,12 @@ impl ListingTable {
         ctx: &SessionState,
         store: &Arc<dyn ObjectStore>,
         part_file: &PartitionedFile,
-    ) -> Result<Statistics> {
-        let statistics_cache = self.collected_statistics.clone();
-        return match statistics_cache
+    ) -> Result<Arc<Statistics>> {
+        match self
+            .collected_statistics
             .get_with_extra(&part_file.object_meta.location, &part_file.object_meta)
         {
-            Some(statistics) => Ok(statistics.as_ref().clone()),
+            Some(statistics) => Ok(statistics.clone()),
             None => {
                 let statistics = self
                     .options
@@ -1028,14 +1029,15 @@ impl ListingTable {
                         &part_file.object_meta,
                     )
                     .await?;
-                statistics_cache.put_with_extra(
+                let statistics = Arc::new(statistics);
+                self.collected_statistics.put_with_extra(
                     &part_file.object_meta.location,
-                    statistics.clone().into(),
+                    statistics.clone(),
                     &part_file.object_meta,
                 );
                 Ok(statistics)
             }
-        };
+        }
     }
 }
 
diff --git a/datafusion/core/src/datasource/statistics.rs b/datafusion/core/src/datasource/statistics.rs
index 8c789e461b08e..9d031a6bbc858 100644
--- a/datafusion/core/src/datasource/statistics.rs
+++ b/datafusion/core/src/datasource/statistics.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::mem;
+use std::sync::Arc;
+
 use super::listing::PartitionedFile;
 use crate::arrow::datatypes::{Schema, SchemaRef};
 use crate::error::Result;
@@ -26,8 +29,6 @@ use datafusion_common::stats::Precision;
 use datafusion_common::ScalarValue;
 
 use futures::{Stream, StreamExt};
-use itertools::izip;
-use itertools::multiunzip;
 
 /// Get all files as well as the file level summary statistics (no statistic for partition columns).
 /// If the optional `limit` is provided, includes only sufficient files. Needed to read up to
@@ -35,7 +36,7 @@ use itertools::multiunzip;
 /// `ListingTable`. If it is false we only construct bare statistics and skip a potentially expensive
 ///  call to `multiunzip` for constructing file level summary statistics.
 pub async fn get_statistics_with_limit(
-    all_files: impl Stream<Item = Result<(PartitionedFile, Statistics)>>,
+    all_files: impl Stream<Item = Result<(PartitionedFile, Arc<Statistics>)>>,
     file_schema: SchemaRef,
     limit: Option<usize>,
     collect_stats: bool,
@@ -48,9 +49,7 @@ pub async fn get_statistics_with_limit(
     // - zero for summations, and
     // - neutral element for extreme points.
     let size = file_schema.fields().len();
-    let mut null_counts: Vec<Precision<usize>> = vec![Precision::Absent; size];
-    let mut max_values: Vec<Precision<ScalarValue>> = vec![Precision::Absent; size];
-    let mut min_values: Vec<Precision<ScalarValue>> = vec![Precision::Absent; size];
+    let mut col_stats_set = vec![ColumnStatistics::default(); size];
     let mut num_rows = Precision::<usize>::Absent;
     let mut total_byte_size = Precision::<usize>::Absent;
 
@@ -58,16 +57,19 @@ pub async fn get_statistics_with_limit(
     let mut all_files = Box::pin(all_files.fuse());
 
     if let Some(first_file) = all_files.next().await {
-        let (file, file_stats) = first_file?;
+        let (mut file, file_stats) = first_file?;
+        file.statistics = Some(file_stats.as_ref().clone());
         result_files.push(file);
 
         // First file, we set them directly from the file statistics.
-        num_rows = file_stats.num_rows;
-        total_byte_size = file_stats.total_byte_size;
-        for (index, file_column) in file_stats.column_statistics.into_iter().enumerate() {
-            null_counts[index] = file_column.null_count;
-            max_values[index] = file_column.max_value;
-            min_values[index] = file_column.min_value;
+        num_rows = file_stats.num_rows.clone();
+        total_byte_size = file_stats.total_byte_size.clone();
+        for (index, file_column) in
+            file_stats.column_statistics.clone().into_iter().enumerate()
+        {
+            col_stats_set[index].null_count = file_column.null_count;
+            col_stats_set[index].max_value = file_column.max_value;
+            col_stats_set[index].min_value = file_column.min_value;
         }
 
         // If the number of rows exceeds the limit, we can stop processing
@@ -80,7 +82,8 @@ pub async fn get_statistics_with_limit(
         };
         if conservative_num_rows <= limit.unwrap_or(usize::MAX) {
             while let Some(current) = all_files.next().await {
-                let (file, file_stats) = current?;
+                let (mut file, file_stats) = current?;
+                file.statistics = Some(file_stats.as_ref().clone());
                 result_files.push(file);
                 if !collect_stats {
                     continue;
@@ -90,38 +93,28 @@ pub async fn get_statistics_with_limit(
                 // counts across all the files in question. If any file does not
                 // provide any information or provides an inexact value, we demote
                 // the statistic precision to inexact.
-                num_rows = add_row_stats(file_stats.num_rows, num_rows);
+                num_rows = add_row_stats(file_stats.num_rows.clone(), num_rows);
 
                 total_byte_size =
-                    add_row_stats(file_stats.total_byte_size, total_byte_size);
+                    add_row_stats(file_stats.total_byte_size.clone(), total_byte_size);
 
-                (null_counts, max_values, min_values) = multiunzip(
-                    izip!(
-                        file_stats.column_statistics.into_iter(),
-                        null_counts.into_iter(),
-                        max_values.into_iter(),
-                        min_values.into_iter()
-                    )
-                    .map(
-                        |(
-                            ColumnStatistics {
-                                null_count: file_nc,
-                                max_value: file_max,
-                                min_value: file_min,
-                                distinct_count: _,
-                            },
-                            null_count,
-                            max_value,
-                            min_value,
-                        )| {
-                            (
-                                add_row_stats(file_nc, null_count),
-                                set_max_if_greater(file_max, max_value),
-                                set_min_if_lesser(file_min, min_value),
-                            )
-                        },
-                    ),
-                );
+                for (file_col_stats, col_stats) in file_stats
+                    .column_statistics
+                    .iter()
+                    .zip(col_stats_set.iter_mut())
+                {
+                    let ColumnStatistics {
+                        null_count: file_nc,
+                        max_value: file_max,
+                        min_value: file_min,
+                        distinct_count: _,
+                    } = file_col_stats;
+
+                    col_stats.null_count =
+                        add_row_stats(file_nc.clone(), col_stats.null_count.clone());
+                    set_max_if_greater(file_max, &mut col_stats.max_value);
+                    set_min_if_lesser(file_min, &mut col_stats.min_value)
+                }
 
                 // If the number of rows exceeds the limit, we can stop processing
                 // files. This only applies when we know the number of rows. It also
@@ -139,7 +132,7 @@ pub async fn get_statistics_with_limit(
     let mut statistics = Statistics {
         num_rows,
         total_byte_size,
-        column_statistics: get_col_stats_vec(null_counts, max_values, min_values),
+        column_statistics: col_stats_set,
     };
     if all_files.next().await.is_some() {
         // If we still have files in the stream, it means that the limit kicked
@@ -182,21 +175,6 @@ fn add_row_stats(
     }
 }
 
-pub(crate) fn get_col_stats_vec(
-    null_counts: Vec<Precision<usize>>,
-    max_values: Vec<Precision<ScalarValue>>,
-    min_values: Vec<Precision<ScalarValue>>,
-) -> Vec<ColumnStatistics> {
-    izip!(null_counts, max_values, min_values)
-        .map(|(null_count, max_value, min_value)| ColumnStatistics {
-            null_count,
-            max_value,
-            min_value,
-            distinct_count: Precision::Absent,
-        })
-        .collect()
-}
-
 pub(crate) fn get_col_stats(
     schema: &Schema,
     null_counts: Vec<Precision<usize>>,
@@ -238,45 +216,61 @@ fn min_max_aggregate_data_type(input_type: &DataType) -> &DataType {
 /// If the given value is numerically greater than the original maximum value,
 /// return the new maximum value with appropriate exactness information.
 fn set_max_if_greater(
-    max_nominee: Precision<ScalarValue>,
-    max_values: Precision<ScalarValue>,
-) -> Precision<ScalarValue> {
-    match (&max_values, &max_nominee) {
-        (Precision::Exact(val1), Precision::Exact(val2)) if val1 < val2 => max_nominee,
+    max_nominee: &Precision<ScalarValue>,
+    max_value: &mut Precision<ScalarValue>,
+) {
+    match (&max_value, max_nominee) {
+        (Precision::Exact(val1), Precision::Exact(val2)) if val1 < val2 => {
+            *max_value = max_nominee.clone();
+        }
         (Precision::Exact(val1), Precision::Inexact(val2))
         | (Precision::Inexact(val1), Precision::Inexact(val2))
         | (Precision::Inexact(val1), Precision::Exact(val2))
             if val1 < val2 =>
         {
-            max_nominee.to_inexact()
+            *max_value = max_nominee.clone().to_inexact();
+        }
+        (Precision::Exact(_), Precision::Absent) => {
+            let exact_max = mem::take(max_value);
+            *max_value = exact_max.to_inexact();
+        }
+        (Precision::Absent, Precision::Exact(_)) => {
+            *max_value = max_nominee.clone().to_inexact();
+        }
+        (Precision::Absent, Precision::Inexact(_)) => {
+            *max_value = max_nominee.clone();
         }
-        (Precision::Exact(_), Precision::Absent) => max_values.to_inexact(),
-        (Precision::Absent, Precision::Exact(_)) => max_nominee.to_inexact(),
-        (Precision::Absent, Precision::Inexact(_)) => max_nominee,
-        (Precision::Absent, Precision::Absent) => Precision::Absent,
-        _ => max_values,
+        _ => {}
     }
 }
 
 /// If the given value is numerically lesser than the original minimum value,
 /// return the new minimum value with appropriate exactness information.
 fn set_min_if_lesser(
-    min_nominee: Precision<ScalarValue>,
-    min_values: Precision<ScalarValue>,
-) -> Precision<ScalarValue> {
-    match (&min_values, &min_nominee) {
-        (Precision::Exact(val1), Precision::Exact(val2)) if val1 > val2 => min_nominee,
+    min_nominee: &Precision<ScalarValue>,
+    min_value: &mut Precision<ScalarValue>,
+) {
+    match (&min_value, min_nominee) {
+        (Precision::Exact(val1), Precision::Exact(val2)) if val1 > val2 => {
+            *min_value = min_nominee.clone();
+        }
         (Precision::Exact(val1), Precision::Inexact(val2))
         | (Precision::Inexact(val1), Precision::Inexact(val2))
         | (Precision::Inexact(val1), Precision::Exact(val2))
             if val1 > val2 =>
         {
-            min_nominee.to_inexact()
+            *min_value = min_nominee.clone().to_inexact();
+        }
+        (Precision::Exact(_), Precision::Absent) => {
+            let exact_min = mem::take(min_value);
+            *min_value = exact_min.to_inexact();
+        }
+        (Precision::Absent, Precision::Exact(_)) => {
+            *min_value = min_nominee.clone().to_inexact();
+        }
+        (Precision::Absent, Precision::Inexact(_)) => {
+            *min_value = min_nominee.clone();
         }
-        (Precision::Exact(_), Precision::Absent) => min_values.to_inexact(),
-        (Precision::Absent, Precision::Exact(_)) => min_nominee.to_inexact(),
-        (Precision::Absent, Precision::Inexact(_)) => min_nominee,
-        (Precision::Absent, Precision::Absent) => Precision::Absent,
-        _ => min_values,
+        _ => {}
     }
 }

From 117ab1b8c861f431b2d04443e2028ca16fafc57c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Wed, 7 Aug 2024 16:39:24 +0800
Subject: [PATCH 237/357] Add `LogicalPlan::CreateIndex` (#11817)

* Add create index plan

* Fix clippy lints
---
 datafusion/core/src/execution/context/mod.rs  | 29 ++++++----
 datafusion/expr/src/logical_plan/ddl.rs       | 19 ++++++
 datafusion/expr/src/logical_plan/mod.rs       |  4 +-
 datafusion/expr/src/logical_plan/tree_node.rs |  1 +
 datafusion/proto/src/logical_plan/mod.rs      |  3 +
 datafusion/sql/src/statement.rs               | 58 +++++++++++++++----
 datafusion/sql/tests/sql_integration.rs       | 34 ++++++++++-
 7 files changed, 121 insertions(+), 27 deletions(-)

diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index e6bb1483e2565..c883b7a47fbbc 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -544,30 +544,35 @@ impl SessionContext {
                 // stack overflows.
                 match ddl {
                     DdlStatement::CreateExternalTable(cmd) => {
-                        Box::pin(async move { self.create_external_table(&cmd).await })
-                            as std::pin::Pin<Box<dyn futures::Future<Output = _> + Send>>
+                        (Box::pin(async move { self.create_external_table(&cmd).await })
+                            as std::pin::Pin<Box<dyn futures::Future<Output = _> + Send>>)
+                            .await
                     }
                     DdlStatement::CreateMemoryTable(cmd) => {
-                        Box::pin(self.create_memory_table(cmd))
+                        Box::pin(self.create_memory_table(cmd)).await
+                    }
+                    DdlStatement::CreateView(cmd) => {
+                        Box::pin(self.create_view(cmd)).await
                     }
-                    DdlStatement::CreateView(cmd) => Box::pin(self.create_view(cmd)),
                     DdlStatement::CreateCatalogSchema(cmd) => {
-                        Box::pin(self.create_catalog_schema(cmd))
+                        Box::pin(self.create_catalog_schema(cmd)).await
                     }
                     DdlStatement::CreateCatalog(cmd) => {
-                        Box::pin(self.create_catalog(cmd))
+                        Box::pin(self.create_catalog(cmd)).await
                     }
-                    DdlStatement::DropTable(cmd) => Box::pin(self.drop_table(cmd)),
-                    DdlStatement::DropView(cmd) => Box::pin(self.drop_view(cmd)),
+                    DdlStatement::DropTable(cmd) => Box::pin(self.drop_table(cmd)).await,
+                    DdlStatement::DropView(cmd) => Box::pin(self.drop_view(cmd)).await,
                     DdlStatement::DropCatalogSchema(cmd) => {
-                        Box::pin(self.drop_schema(cmd))
+                        Box::pin(self.drop_schema(cmd)).await
                     }
                     DdlStatement::CreateFunction(cmd) => {
-                        Box::pin(self.create_function(cmd))
+                        Box::pin(self.create_function(cmd)).await
+                    }
+                    DdlStatement::DropFunction(cmd) => {
+                        Box::pin(self.drop_function(cmd)).await
                     }
-                    DdlStatement::DropFunction(cmd) => Box::pin(self.drop_function(cmd)),
+                    ddl => Ok(DataFrame::new(self.state(), LogicalPlan::Ddl(ddl))),
                 }
-                .await
             }
             // TODO what about the other statements (like TransactionStart and TransactionEnd)
             LogicalPlan::Statement(Statement::SetVariable(stmt)) => {
diff --git a/datafusion/expr/src/logical_plan/ddl.rs b/datafusion/expr/src/logical_plan/ddl.rs
index 255bf4699b7f5..ad0fcd2d47712 100644
--- a/datafusion/expr/src/logical_plan/ddl.rs
+++ b/datafusion/expr/src/logical_plan/ddl.rs
@@ -41,6 +41,8 @@ pub enum DdlStatement {
     CreateCatalogSchema(CreateCatalogSchema),
     /// Creates a new catalog (aka "Database").
     CreateCatalog(CreateCatalog),
+    /// Creates a new index.
+    CreateIndex(CreateIndex),
     /// Drops a table.
     DropTable(DropTable),
     /// Drops a view.
@@ -66,6 +68,7 @@ impl DdlStatement {
                 schema
             }
             DdlStatement::CreateCatalog(CreateCatalog { schema, .. }) => schema,
+            DdlStatement::CreateIndex(CreateIndex { schema, .. }) => schema,
             DdlStatement::DropTable(DropTable { schema, .. }) => schema,
             DdlStatement::DropView(DropView { schema, .. }) => schema,
             DdlStatement::DropCatalogSchema(DropCatalogSchema { schema, .. }) => schema,
@@ -83,6 +86,7 @@ impl DdlStatement {
             DdlStatement::CreateView(_) => "CreateView",
             DdlStatement::CreateCatalogSchema(_) => "CreateCatalogSchema",
             DdlStatement::CreateCatalog(_) => "CreateCatalog",
+            DdlStatement::CreateIndex(_) => "CreateIndex",
             DdlStatement::DropTable(_) => "DropTable",
             DdlStatement::DropView(_) => "DropView",
             DdlStatement::DropCatalogSchema(_) => "DropCatalogSchema",
@@ -101,6 +105,7 @@ impl DdlStatement {
                 vec![input]
             }
             DdlStatement::CreateView(CreateView { input, .. }) => vec![input],
+            DdlStatement::CreateIndex(_) => vec![],
             DdlStatement::DropTable(_) => vec![],
             DdlStatement::DropView(_) => vec![],
             DdlStatement::DropCatalogSchema(_) => vec![],
@@ -147,6 +152,9 @@ impl DdlStatement {
                     }) => {
                         write!(f, "CreateCatalog: {catalog_name:?}")
                     }
+                    DdlStatement::CreateIndex(CreateIndex { name, .. }) => {
+                        write!(f, "CreateIndex: {name:?}")
+                    }
                     DdlStatement::DropTable(DropTable {
                         name, if_exists, ..
                     }) => {
@@ -351,3 +359,14 @@ pub struct DropFunction {
     pub if_exists: bool,
     pub schema: DFSchemaRef,
 }
+
+#[derive(Clone, PartialEq, Eq, Hash, Debug)]
+pub struct CreateIndex {
+    pub name: Option<String>,
+    pub table: TableReference,
+    pub using: Option<String>,
+    pub columns: Vec<Expr>,
+    pub unique: bool,
+    pub if_not_exists: bool,
+    pub schema: DFSchemaRef,
+}
diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs
index 8928f70cd5d27..b58208591920b 100644
--- a/datafusion/expr/src/logical_plan/mod.rs
+++ b/datafusion/expr/src/logical_plan/mod.rs
@@ -30,8 +30,8 @@ pub use builder::{
 };
 pub use ddl::{
     CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction,
-    CreateFunctionBody, CreateMemoryTable, CreateView, DdlStatement, DropCatalogSchema,
-    DropFunction, DropTable, DropView, OperateFunctionArg,
+    CreateFunctionBody, CreateIndex, CreateMemoryTable, CreateView, DdlStatement,
+    DropCatalogSchema, DropFunction, DropTable, DropView, OperateFunctionArg,
 };
 pub use dml::{DmlStatement, WriteOp};
 pub use plan::{
diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs
index a47906f203221..dbe43128fd384 100644
--- a/datafusion/expr/src/logical_plan/tree_node.rs
+++ b/datafusion/expr/src/logical_plan/tree_node.rs
@@ -303,6 +303,7 @@ impl TreeNode for LogicalPlan {
                     DdlStatement::CreateExternalTable(_)
                     | DdlStatement::CreateCatalogSchema(_)
                     | DdlStatement::CreateCatalog(_)
+                    | DdlStatement::CreateIndex(_)
                     | DdlStatement::DropTable(_)
                     | DdlStatement::DropView(_)
                     | DdlStatement::DropCatalogSchema(_)
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index 0a91babdfb609..bc019725f36c6 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -1645,6 +1645,9 @@ impl AsLogicalPlan for LogicalPlanNode {
             LogicalPlan::Ddl(DdlStatement::CreateMemoryTable(_)) => Err(proto_error(
                 "LogicalPlan serde is not yet implemented for CreateMemoryTable",
             )),
+            LogicalPlan::Ddl(DdlStatement::CreateIndex(_)) => Err(proto_error(
+                "LogicalPlan serde is not yet implemented for CreateIndex",
+            )),
             LogicalPlan::Ddl(DdlStatement::DropTable(_)) => Err(proto_error(
                 "LogicalPlan serde is not yet implemented for DropTable",
             )),
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 3737e1adf8f3c..6d47232ec2700 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -45,20 +45,20 @@ use datafusion_expr::utils::expr_to_columns;
 use datafusion_expr::{
     cast, col, Analyze, CreateCatalog, CreateCatalogSchema,
     CreateExternalTable as PlanCreateExternalTable, CreateFunction, CreateFunctionBody,
-    CreateMemoryTable, CreateView, DescribeTable, DmlStatement, DropCatalogSchema,
-    DropFunction, DropTable, DropView, EmptyRelation, Explain, Expr, ExprSchemable,
-    Filter, LogicalPlan, LogicalPlanBuilder, OperateFunctionArg, PlanType, Prepare,
-    SetVariable, Statement as PlanStatement, ToStringifiedPlan, TransactionAccessMode,
-    TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart,
-    Volatility, WriteOp,
+    CreateIndex as PlanCreateIndex, CreateMemoryTable, CreateView, DescribeTable,
+    DmlStatement, DropCatalogSchema, DropFunction, DropTable, DropView, EmptyRelation,
+    Explain, Expr, ExprSchemable, Filter, LogicalPlan, LogicalPlanBuilder,
+    OperateFunctionArg, PlanType, Prepare, SetVariable, Statement as PlanStatement,
+    ToStringifiedPlan, TransactionAccessMode, TransactionConclusion, TransactionEnd,
+    TransactionIsolationLevel, TransactionStart, Volatility, WriteOp,
 };
 use sqlparser::ast;
 use sqlparser::ast::{
-    Assignment, AssignmentTarget, ColumnDef, CreateTable, CreateTableOptions, Delete,
-    DescribeAlias, Expr as SQLExpr, FromTable, Ident, Insert, ObjectName, ObjectType,
-    OneOrManyWithParens, Query, SchemaName, SetExpr, ShowCreateObject,
-    ShowStatementFilter, Statement, TableConstraint, TableFactor, TableWithJoins,
-    TransactionMode, UnaryOperator, Value,
+    Assignment, AssignmentTarget, ColumnDef, CreateIndex, CreateTable,
+    CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, FromTable, Ident, Insert,
+    ObjectName, ObjectType, OneOrManyWithParens, Query, SchemaName, SetExpr,
+    ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor,
+    TableWithJoins, TransactionMode, UnaryOperator, Value,
 };
 use sqlparser::parser::ParserError::ParserError;
 
@@ -769,6 +769,42 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     exec_err!("Function name not provided")
                 }
             }
+            Statement::CreateIndex(CreateIndex {
+                name,
+                table_name,
+                using,
+                columns,
+                unique,
+                if_not_exists,
+                ..
+            }) => {
+                let name: Option<String> = name.as_ref().map(object_name_to_string);
+                let table = self.object_name_to_table_reference(table_name)?;
+                let table_schema = self
+                    .context_provider
+                    .get_table_source(table.clone())?
+                    .schema()
+                    .to_dfschema_ref()?;
+                let using: Option<String> = using.as_ref().map(ident_to_string);
+                let columns = self.order_by_to_sort_expr(
+                    columns,
+                    &table_schema,
+                    planner_context,
+                    false,
+                    None,
+                )?;
+                Ok(LogicalPlan::Ddl(DdlStatement::CreateIndex(
+                    PlanCreateIndex {
+                        name,
+                        table,
+                        using,
+                        columns,
+                        unique,
+                        if_not_exists,
+                        schema: DFSchemaRef::new(DFSchema::empty()),
+                    },
+                )))
+            }
             _ => {
                 not_impl_err!("Unsupported SQL statement: {sql:?}")
             }
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 8a5510eb69f3e..4d7e60805657c 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -28,11 +28,12 @@ use datafusion_common::{
     assert_contains, DataFusionError, ParamValues, Result, ScalarValue,
 };
 use datafusion_expr::{
+    col,
     dml::CopyTo,
     logical_plan::{LogicalPlan, Prepare},
     test::function_stub::sum_udaf,
-    ColumnarValue, CreateExternalTable, DdlStatement, ScalarUDF, ScalarUDFImpl,
-    Signature, Volatility,
+    ColumnarValue, CreateExternalTable, CreateIndex, DdlStatement, ScalarUDF,
+    ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_functions::{string, unicode};
 use datafusion_sql::{
@@ -4426,6 +4427,35 @@ fn test_parse_escaped_string_literal_value() {
     )
 }
 
+#[test]
+fn plan_create_index() {
+    let sql =
+        "CREATE UNIQUE INDEX IF NOT EXISTS idx_name ON test USING btree (name, age DESC)";
+    let plan = logical_plan_with_options(sql, ParserOptions::default()).unwrap();
+    match plan {
+        LogicalPlan::Ddl(DdlStatement::CreateIndex(CreateIndex {
+            name,
+            table,
+            using,
+            columns,
+            unique,
+            if_not_exists,
+            ..
+        })) => {
+            assert_eq!(name, Some("idx_name".to_string()));
+            assert_eq!(format!("{table}"), "test");
+            assert_eq!(using, Some("btree".to_string()));
+            assert_eq!(
+                columns,
+                vec![col("name").sort(true, false), col("age").sort(false, true),]
+            );
+            assert!(unique);
+            assert!(if_not_exists);
+        }
+        _ => panic!("wrong plan type"),
+    }
+}
+
 fn assert_field_not_found(err: DataFusionError, name: &str) {
     match err {
         DataFusionError::SchemaError { .. } => {

From 1ecdf900786844f09533e91f7e8fd52170ae6ae6 Mon Sep 17 00:00:00 2001
From: danlgrca <danlgrca@amazon.com>
Date: Wed, 7 Aug 2024 07:14:35 -0400
Subject: [PATCH 238/357] Update `object_store` to 0.10.2 (#11860)

As part of mitigating https://nvd.nist.gov/vuln/detail/CVE-2024-41178, `object_store` should be upgraded to the no-longer-vulnerable version 0.10.2 instead of 0.10.1
---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 38b5627bc1872..e1bd0d7aa72b2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -116,7 +116,7 @@ indexmap = "2.0.0"
 itertools = "0.12"
 log = "^0.4"
 num_cpus = "1.13.0"
-object_store = { version = "0.10.1", default-features = false }
+object_store = { version = "0.10.2", default-features = false }
 parking_lot = "0.12"
 parquet = { version = "52.2.0", default-features = false, features = [
     "arrow",

From 679a85f808ce130ff354c762d29315eb2dc32b3f Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 Aug 2024 12:59:06 -0400
Subject: [PATCH 239/357] Add metrics for skipped rows (#11706)

---
 .../physical-plan/src/aggregates/row_hash.rs  | 47 ++++++++++++++-----
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index 1b84befb02694..ed3d6d49f9f3d 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -28,12 +28,12 @@ use crate::aggregates::{
     PhysicalGroupBy,
 };
 use crate::common::IPCWriter;
-use crate::metrics::{BaselineMetrics, RecordOutput};
+use crate::metrics::{BaselineMetrics, MetricBuilder, RecordOutput};
 use crate::sorts::sort::sort_batch;
 use crate::sorts::streaming_merge;
 use crate::spill::read_spill_as_stream;
 use crate::stream::RecordBatchStreamAdapter;
-use crate::{aggregates, ExecutionPlan, PhysicalExpr};
+use crate::{aggregates, metrics, ExecutionPlan, PhysicalExpr};
 use crate::{RecordBatchStream, SendableRecordBatchStream};
 
 use arrow::array::*;
@@ -117,10 +117,22 @@ struct SkipAggregationProbe {
     /// Flag indicating that further updates of `SkipAggregationProbe`
     /// state won't make any effect
     is_locked: bool,
+
+    /// Number of rows where state was output without aggregation.
+    ///
+    /// * If 0, all input rows were aggregated (should_skip was always false)
+    ///
+    /// * if greater than zero, the number of rows which were output directly
+    ///   without aggregation
+    skipped_aggregation_rows: metrics::Count,
 }
 
 impl SkipAggregationProbe {
-    fn new(probe_rows_threshold: usize, probe_ratio_threshold: f64) -> Self {
+    fn new(
+        probe_rows_threshold: usize,
+        probe_ratio_threshold: f64,
+        skipped_aggregation_rows: metrics::Count,
+    ) -> Self {
         Self {
             input_rows: 0,
             num_groups: 0,
@@ -128,6 +140,7 @@ impl SkipAggregationProbe {
             probe_ratio_threshold,
             should_skip: false,
             is_locked: false,
+            skipped_aggregation_rows,
         }
     }
 
@@ -160,6 +173,11 @@ impl SkipAggregationProbe {
         self.should_skip = false;
         self.is_locked = true;
     }
+
+    /// Record the number of rows that were output directly without aggregation
+    fn record_skipped(&mut self, batch: &RecordBatch) {
+        self.skipped_aggregation_rows.add(batch.num_rows());
+    }
 }
 
 /// HashTable based Grouping Aggregator
@@ -473,17 +491,17 @@ impl GroupedHashAggregateStream {
                 .all(|acc| acc.supports_convert_to_state())
             && agg_group_by.is_single()
         {
+            let options = &context.session_config().options().execution;
+            let probe_rows_threshold =
+                options.skip_partial_aggregation_probe_rows_threshold;
+            let probe_ratio_threshold =
+                options.skip_partial_aggregation_probe_ratio_threshold;
+            let skipped_aggregation_rows = MetricBuilder::new(&agg.metrics)
+                .counter("skipped_aggregation_rows", partition);
             Some(SkipAggregationProbe::new(
-                context
-                    .session_config()
-                    .options()
-                    .execution
-                    .skip_partial_aggregation_probe_rows_threshold,
-                context
-                    .session_config()
-                    .options()
-                    .execution
-                    .skip_partial_aggregation_probe_ratio_threshold,
+                probe_rows_threshold,
+                probe_ratio_threshold,
+                skipped_aggregation_rows,
             ))
         } else {
             None
@@ -611,6 +629,9 @@ impl Stream for GroupedHashAggregateStream {
                     match ready!(self.input.poll_next_unpin(cx)) {
                         Some(Ok(batch)) => {
                             let _timer = elapsed_compute.timer();
+                            if let Some(probe) = self.skip_aggregation_probe.as_mut() {
+                                probe.record_skipped(&batch);
+                            }
                             let states = self.transform_to_states(batch)?;
                             return Poll::Ready(Some(Ok(
                                 states.record_output(&self.baseline_metrics)

From cd322f11455c2a95634268bb6064d1e888abdfec Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 7 Aug 2024 13:32:55 -0600
Subject: [PATCH 240/357] feat: Expose public method for optimizing physical
 plans (#11879)

* expose public method for optimizing physical plans using the default planner

* cargo fmt
---
 datafusion-examples/examples/planner_api.rs | 16 ++++++++++++++++
 datafusion/core/src/physical_planner.rs     |  6 +++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/datafusion-examples/examples/planner_api.rs b/datafusion-examples/examples/planner_api.rs
index 92b58bcee1974..35cf766ba1afe 100644
--- a/datafusion-examples/examples/planner_api.rs
+++ b/datafusion-examples/examples/planner_api.rs
@@ -17,6 +17,7 @@
 
 use datafusion::error::Result;
 use datafusion::physical_plan::displayable;
+use datafusion::physical_planner::DefaultPhysicalPlanner;
 use datafusion::prelude::*;
 use datafusion_expr::{LogicalPlan, PlanType};
 
@@ -123,5 +124,20 @@ async fn to_physical_plan_step_by_step_demo(
             .plan
     );
 
+    // Call the physical optimizer with an existing physical plan (in this
+    // case the plan is already optimized, but an unoptimized plan would
+    // typically be used in this context)
+    // Note that this is not part of the trait but a public method
+    // on DefaultPhysicalPlanner. Not all planners will provide this feature.
+    let planner = DefaultPhysicalPlanner::default();
+    let physical_plan =
+        planner.optimize_physical_plan(physical_plan, &ctx.state(), |_, _| {})?;
+    println!(
+        "Optimized physical plan:\n\n{}\n\n",
+        displayable(physical_plan.as_ref())
+            .to_stringified(false, PlanType::InitialPhysicalPlan)
+            .plan
+    );
+
     Ok(())
 }
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 65cdbf9fe62ce..58b02c08e34cd 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -180,7 +180,7 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
                     .create_initial_plan(logical_plan, session_state)
                     .await?;
 
-                self.optimize_internal(plan, session_state, |_, _| {})
+                self.optimize_physical_plan(plan, session_state, |_, _| {})
             }
         }
     }
@@ -1732,7 +1732,7 @@ impl DefaultPhysicalPlanner {
                             }
                         }
 
-                        let optimized_plan = self.optimize_internal(
+                        let optimized_plan = self.optimize_physical_plan(
                             input,
                             session_state,
                             |plan, optimizer| {
@@ -1816,7 +1816,7 @@ impl DefaultPhysicalPlanner {
 
     /// Optimize a physical plan by applying each physical optimizer,
     /// calling observer(plan, optimizer after each one)
-    fn optimize_internal<F>(
+    pub fn optimize_physical_plan<F>(
         &self,
         plan: Arc<dyn ExecutionPlan>,
         session_state: &SessionState,

From 60d1d3a702be9c95f14087337c693cd678d01dea Mon Sep 17 00:00:00 2001
From: Dharan Aditya <dharan.aditya@gmail.com>
Date: Thu, 8 Aug 2024 03:35:57 +0530
Subject: [PATCH 241/357] Cast `Utf8View` to `Utf8` to support `||` from
 `StringViewArray` (#11796)

* make query work

* hack string_concat_coercion

* more tests
---
 datafusion/expr/src/type_coercion/binary.rs   | 23 +++++---
 .../sqllogictest/test_files/string_view.slt   | 53 +++++++++++++++++++
 2 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index 17280289ed1b4..8da33081d6525 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -890,15 +890,22 @@ fn dictionary_coercion(
 /// 2. Data type of the other side should be able to cast to string type
 fn string_concat_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
-    string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
-        (Utf8, from_type) | (from_type, Utf8) => {
-            string_concat_internal_coercion(from_type, &Utf8)
-        }
-        (LargeUtf8, from_type) | (from_type, LargeUtf8) => {
-            string_concat_internal_coercion(from_type, &LargeUtf8)
+    match (lhs_type, rhs_type) {
+        // If Utf8View is in any side, we coerce to Utf8.
+        // Ref: https://github.com/apache/datafusion/pull/11796
+        (Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View) => {
+            Some(Utf8)
         }
-        _ => None,
-    })
+        _ => string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
+            (Utf8, from_type) | (from_type, Utf8) => {
+                string_concat_internal_coercion(from_type, &Utf8)
+            }
+            (LargeUtf8, from_type) | (from_type, LargeUtf8) => {
+                string_concat_internal_coercion(from_type, &LargeUtf8)
+            }
+            _ => None,
+        }),
+    }
 }
 
 fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 584d3b3306904..4d3f72b1e8d4e 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -447,3 +447,56 @@ select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt;
 
 statement ok
 drop table dates;
+
+statement ok
+create table temp as values
+('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')),
+('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool', 'Utf8View'));
+
+query T
+select column2||' is fast' from temp;
+----
+rust is fast
+datafusion is fast
+
+
+query T
+select column2 || ' is ' || column3 from temp;
+----
+rust is fast
+datafusion is cool
+
+query TT
+explain select column2 || 'is' || column3 from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || Utf8("is") || CAST(temp.column3 AS Utf8)
+02)--TableScan: temp projection=[column2, column3]
+
+
+query TT
+explain select column2||' is fast' from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || Utf8(" is fast")
+02)--TableScan: temp projection=[column2]
+
+
+query T
+select column2||column3 from temp;
+----
+rustfast
+datafusioncool
+
+query TT
+explain select column2||column3 from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || CAST(temp.column3 AS Utf8)
+02)--TableScan: temp projection=[column2, column3]
+
+query T
+select column2|| ' ' ||column3 from temp;
+----
+rust fast
+datafusion cool

From d0a1d3030a70eaba00eb856ce864c6a1cee02365 Mon Sep 17 00:00:00 2001
From: Ning Sun <sunng@protonmail.com>
Date: Thu, 8 Aug 2024 09:45:43 +0800
Subject: [PATCH 242/357] refactor: remove unneed mut for session context
 (#11864)

* doc: remove mut from session context docstring

* refactor: remove unnecessary mut for session context

* refactor: remove more unused mut
---
 .../examples/cli-session-context.rs           |  6 ++---
 datafusion-cli/src/catalog.rs                 |  2 +-
 datafusion-cli/src/command.rs                 |  2 +-
 datafusion-cli/src/exec.rs                    | 16 ++++++------
 datafusion-cli/src/main.rs                    | 10 +++----
 datafusion-examples/examples/catalog.rs       |  2 +-
 datafusion/core/benches/filter_query_sql.rs   | 14 +++++-----
 datafusion/core/src/dataframe/mod.rs          | 14 +++++-----
 datafusion/core/src/dataframe/parquet.rs      | 10 +++----
 datafusion/core/src/execution/context/mod.rs  | 13 ++++++----
 datafusion/core/src/test_util/mod.rs          |  6 ++---
 .../tests/user_defined/user_defined_plan.rs   | 26 +++++++++----------
 .../tests/cases/roundtrip_logical_plan.rs     |  4 ++-
 .../custom-table-providers.md                 |  2 +-
 14 files changed, 65 insertions(+), 62 deletions(-)

diff --git a/datafusion-cli/examples/cli-session-context.rs b/datafusion-cli/examples/cli-session-context.rs
index 8da52ed84a5f8..1a8f15c8731b2 100644
--- a/datafusion-cli/examples/cli-session-context.rs
+++ b/datafusion-cli/examples/cli-session-context.rs
@@ -82,7 +82,7 @@ impl CliSessionContext for MyUnionerContext {
 #[tokio::main]
 /// Runs the example.
 pub async fn main() {
-    let mut my_ctx = MyUnionerContext::default();
+    let my_ctx = MyUnionerContext::default();
 
     let mut print_options = PrintOptions {
         format: datafusion_cli::print_format::PrintFormat::Automatic,
@@ -91,7 +91,5 @@ pub async fn main() {
         color: true,
     };
 
-    exec_from_repl(&mut my_ctx, &mut print_options)
-        .await
-        .unwrap();
+    exec_from_repl(&my_ctx, &mut print_options).await.unwrap();
 }
diff --git a/datafusion-cli/src/catalog.rs b/datafusion-cli/src/catalog.rs
index 273eb30d3a710..c4636f1ce0e00 100644
--- a/datafusion-cli/src/catalog.rs
+++ b/datafusion-cli/src/catalog.rs
@@ -240,7 +240,7 @@ mod tests {
     use datafusion::prelude::SessionContext;
 
     fn setup_context() -> (SessionContext, Arc<dyn SchemaProvider>) {
-        let mut ctx = SessionContext::new();
+        let ctx = SessionContext::new();
         ctx.register_catalog_list(Arc::new(DynamicFileCatalog::new(
             ctx.state().catalog_list().clone(),
             ctx.state_weak_ref(),
diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs
index 1a6c023d3b505..05c00d634c942 100644
--- a/datafusion-cli/src/command.rs
+++ b/datafusion-cli/src/command.rs
@@ -55,7 +55,7 @@ pub enum OutputFormat {
 impl Command {
     pub async fn execute(
         &self,
-        ctx: &mut dyn CliSessionContext,
+        ctx: &dyn CliSessionContext,
         print_options: &mut PrintOptions,
     ) -> Result<()> {
         match self {
diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index b78f32e0ac486..178bce6f2fe65 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -49,7 +49,7 @@ use tokio::signal;
 
 /// run and execute SQL statements and commands, against a context with the given print options
 pub async fn exec_from_commands(
-    ctx: &mut dyn CliSessionContext,
+    ctx: &dyn CliSessionContext,
     commands: Vec<String>,
     print_options: &PrintOptions,
 ) -> Result<()> {
@@ -62,7 +62,7 @@ pub async fn exec_from_commands(
 
 /// run and execute SQL statements and commands from a file, against a context with the given print options
 pub async fn exec_from_lines(
-    ctx: &mut dyn CliSessionContext,
+    ctx: &dyn CliSessionContext,
     reader: &mut BufReader<File>,
     print_options: &PrintOptions,
 ) -> Result<()> {
@@ -102,7 +102,7 @@ pub async fn exec_from_lines(
 }
 
 pub async fn exec_from_files(
-    ctx: &mut dyn CliSessionContext,
+    ctx: &dyn CliSessionContext,
     files: Vec<String>,
     print_options: &PrintOptions,
 ) -> Result<()> {
@@ -121,7 +121,7 @@ pub async fn exec_from_files(
 
 /// run and execute SQL statements and commands against a context with the given print options
 pub async fn exec_from_repl(
-    ctx: &mut dyn CliSessionContext,
+    ctx: &dyn CliSessionContext,
     print_options: &mut PrintOptions,
 ) -> rustyline::Result<()> {
     let mut rl = Editor::new()?;
@@ -204,7 +204,7 @@ pub async fn exec_from_repl(
 }
 
 pub(super) async fn exec_and_print(
-    ctx: &mut dyn CliSessionContext,
+    ctx: &dyn CliSessionContext,
     print_options: &PrintOptions,
     sql: String,
 ) -> Result<()> {
@@ -300,7 +300,7 @@ fn config_file_type_from_str(ext: &str) -> Option<ConfigFileType> {
 }
 
 async fn create_plan(
-    ctx: &mut dyn CliSessionContext,
+    ctx: &dyn CliSessionContext,
     statement: Statement,
 ) -> Result<LogicalPlan, DataFusionError> {
     let mut plan = ctx.session_state().statement_to_plan(statement).await?;
@@ -473,7 +473,7 @@ mod tests {
             "cos://bucket/path/file.parquet",
             "gcs://bucket/path/file.parquet",
         ];
-        let mut ctx = SessionContext::new();
+        let ctx = SessionContext::new();
         let task_ctx = ctx.task_ctx();
         let dialect = &task_ctx.session_config().options().sql_parser.dialect;
         let dialect = dialect_from_str(dialect).ok_or_else(|| {
@@ -488,7 +488,7 @@ mod tests {
             let statements = DFParser::parse_sql_with_dialect(&sql, dialect.as_ref())?;
             for statement in statements {
                 //Should not fail
-                let mut plan = create_plan(&mut ctx, statement).await?;
+                let mut plan = create_plan(&ctx, statement).await?;
                 if let LogicalPlan::Copy(copy_to) = &mut plan {
                     assert_eq!(copy_to.output_url, location);
                     assert_eq!(copy_to.file_type.get_ext(), "parquet".to_string());
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 6266ae6f561ae..1810d3cef57cd 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -175,7 +175,7 @@ async fn main_inner() -> Result<()> {
 
     let runtime_env = create_runtime_env(rt_config.clone())?;
 
-    let mut ctx =
+    let ctx =
         SessionContext::new_with_config_rt(session_config.clone(), Arc::new(runtime_env));
     ctx.refresh_catalogs().await?;
     // install dynamic catalog provider that knows how to open files
@@ -212,20 +212,20 @@ async fn main_inner() -> Result<()> {
 
     if commands.is_empty() && files.is_empty() {
         if !rc.is_empty() {
-            exec::exec_from_files(&mut ctx, rc, &print_options).await?;
+            exec::exec_from_files(&ctx, rc, &print_options).await?;
         }
         // TODO maybe we can have thiserror for cli but for now let's keep it simple
-        return exec::exec_from_repl(&mut ctx, &mut print_options)
+        return exec::exec_from_repl(&ctx, &mut print_options)
             .await
             .map_err(|e| DataFusionError::External(Box::new(e)));
     }
 
     if !files.is_empty() {
-        exec::exec_from_files(&mut ctx, files, &print_options).await?;
+        exec::exec_from_files(&ctx, files, &print_options).await?;
     }
 
     if !commands.is_empty() {
-        exec::exec_from_commands(&mut ctx, commands, &print_options).await?;
+        exec::exec_from_commands(&ctx, commands, &print_options).await?;
     }
 
     Ok(())
diff --git a/datafusion-examples/examples/catalog.rs b/datafusion-examples/examples/catalog.rs
index f9ead592c7eac..f770056026ed4 100644
--- a/datafusion-examples/examples/catalog.rs
+++ b/datafusion-examples/examples/catalog.rs
@@ -44,7 +44,7 @@ async fn main() -> Result<()> {
     let dir_a = prepare_example_data()?;
     let dir_b = prepare_example_data()?;
 
-    let mut ctx = SessionContext::new();
+    let ctx = SessionContext::new();
     let state = ctx.state();
     let catlist = Arc::new(CustomCatalogProviderList::new());
 
diff --git a/datafusion/core/benches/filter_query_sql.rs b/datafusion/core/benches/filter_query_sql.rs
index 01adc357b39af..0e09ae09d7c2e 100644
--- a/datafusion/core/benches/filter_query_sql.rs
+++ b/datafusion/core/benches/filter_query_sql.rs
@@ -27,7 +27,7 @@ use futures::executor::block_on;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
-async fn query(ctx: &mut SessionContext, sql: &str) {
+async fn query(ctx: &SessionContext, sql: &str) {
     let rt = Runtime::new().unwrap();
 
     // execute the query
@@ -70,25 +70,25 @@ fn criterion_benchmark(c: &mut Criterion) {
     let batch_size = 4096; // 2^12
 
     c.bench_function("filter_array", |b| {
-        let mut ctx = create_context(array_len, batch_size).unwrap();
-        b.iter(|| block_on(query(&mut ctx, "select f32, f64 from t where f32 >= f64")))
+        let ctx = create_context(array_len, batch_size).unwrap();
+        b.iter(|| block_on(query(&ctx, "select f32, f64 from t where f32 >= f64")))
     });
 
     c.bench_function("filter_scalar", |b| {
-        let mut ctx = create_context(array_len, batch_size).unwrap();
+        let ctx = create_context(array_len, batch_size).unwrap();
         b.iter(|| {
             block_on(query(
-                &mut ctx,
+                &ctx,
                 "select f32, f64 from t where f32 >= 250 and f64 > 250",
             ))
         })
     });
 
     c.bench_function("filter_scalar in list", |b| {
-        let mut ctx = create_context(array_len, batch_size).unwrap();
+        let ctx = create_context(array_len, batch_size).unwrap();
         b.iter(|| {
             block_on(query(
-                &mut ctx,
+                &ctx,
                 "select f32, f64 from t where f32 in (10, 20, 30, 40)",
             ))
         })
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index c4c5a4aa08342..cc1a63cc05f7a 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1550,7 +1550,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// # use datafusion_common::ScalarValue;
-    /// let mut ctx = SessionContext::new();
+    /// let ctx = SessionContext::new();
     /// # ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let results = ctx
     ///   .sql("SELECT a FROM example WHERE b = $1")
@@ -2649,8 +2649,8 @@ mod tests {
 
     #[tokio::test]
     async fn registry() -> Result<()> {
-        let mut ctx = SessionContext::new();
-        register_aggregate_csv(&mut ctx, "aggregate_test_100").await?;
+        let ctx = SessionContext::new();
+        register_aggregate_csv(&ctx, "aggregate_test_100").await?;
 
         // declare the udf
         let my_fn: ScalarFunctionImplementation =
@@ -2783,8 +2783,8 @@ mod tests {
 
     /// Create a logical plan from a SQL query
     async fn create_plan(sql: &str) -> Result<LogicalPlan> {
-        let mut ctx = SessionContext::new();
-        register_aggregate_csv(&mut ctx, "aggregate_test_100").await?;
+        let ctx = SessionContext::new();
+        register_aggregate_csv(&ctx, "aggregate_test_100").await?;
         Ok(ctx.sql(sql).await?.into_unoptimized_plan())
     }
 
@@ -3147,9 +3147,9 @@ mod tests {
                 "datafusion.sql_parser.enable_ident_normalization".to_owned(),
                 "false".to_owned(),
             )]))?;
-        let mut ctx = SessionContext::new_with_config(config);
+        let ctx = SessionContext::new_with_config(config);
         let name = "aggregate_test_100";
-        register_aggregate_csv(&mut ctx, name).await?;
+        register_aggregate_csv(&ctx, name).await?;
         let df = ctx.table(name);
 
         let df = df
diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs
index 1abb550f5c98c..2a23f045f3b2b 100644
--- a/datafusion/core/src/dataframe/parquet.rs
+++ b/datafusion/core/src/dataframe/parquet.rs
@@ -189,14 +189,14 @@ mod tests {
     async fn write_parquet_with_small_rg_size() -> Result<()> {
         // This test verifies writing a parquet file with small rg size
         // relative to datafusion.execution.batch_size does not panic
-        let mut ctx = SessionContext::new_with_config(
-            SessionConfig::from_string_hash_map(HashMap::from_iter(
+        let ctx = SessionContext::new_with_config(SessionConfig::from_string_hash_map(
+            HashMap::from_iter(
                 [("datafusion.execution.batch_size", "10")]
                     .iter()
                     .map(|(s1, s2)| (s1.to_string(), s2.to_string())),
-            ))?,
-        );
-        register_aggregate_csv(&mut ctx, "aggregate_test_100").await?;
+            ),
+        )?);
+        register_aggregate_csv(&ctx, "aggregate_test_100").await?;
         let test_df = ctx.table("aggregate_test_100").await?;
 
         let output_path = "file://local/test.parquet";
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index c883b7a47fbbc..c63ffddd81b31 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -179,7 +179,7 @@ where
 /// # use datafusion::{error::Result, assert_batches_eq};
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
-/// let mut ctx = SessionContext::new();
+/// let ctx = SessionContext::new();
 /// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
 /// let results = ctx
 ///   .sql("SELECT a, min(b) FROM example GROUP BY a LIMIT 100")
@@ -369,7 +369,7 @@ impl SessionContext {
     /// # use datafusion_execution::object_store::ObjectStoreUrl;
     /// let object_store_url = ObjectStoreUrl::parse("file://").unwrap();
     /// let object_store = object_store::local::LocalFileSystem::new();
-    /// let mut ctx = SessionContext::new();
+    /// let ctx = SessionContext::new();
     /// // All files with the file:// url prefix will be read from the local file system
     /// ctx.register_object_store(object_store_url.as_ref(), Arc::new(object_store));
     /// ```
@@ -452,7 +452,7 @@ impl SessionContext {
     /// # use datafusion::{error::Result, assert_batches_eq};
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
-    /// let mut ctx = SessionContext::new();
+    /// let ctx = SessionContext::new();
     /// ctx
     ///   .sql("CREATE TABLE foo (x INTEGER)")
     ///   .await?
@@ -480,7 +480,7 @@ impl SessionContext {
     /// # use datafusion::physical_plan::collect;
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
-    /// let mut ctx = SessionContext::new();
+    /// let ctx = SessionContext::new();
     /// let options = SQLOptions::new()
     ///   .with_allow_ddl(false);
     /// let err = ctx.sql_with_options("CREATE TABLE foo (x INTEGER)", options)
@@ -1357,7 +1357,7 @@ impl SessionContext {
     }
 
     /// Register [`CatalogProviderList`] in [`SessionState`]
-    pub fn register_catalog_list(&mut self, catalog_list: Arc<dyn CatalogProviderList>) {
+    pub fn register_catalog_list(&self, catalog_list: Arc<dyn CatalogProviderList>) {
         self.state.write().register_catalog_list(catalog_list)
     }
 
@@ -1386,15 +1386,18 @@ impl FunctionRegistry for SessionContext {
     fn udwf(&self, name: &str) -> Result<Arc<WindowUDF>> {
         self.state.read().udwf(name)
     }
+
     fn register_udf(&mut self, udf: Arc<ScalarUDF>) -> Result<Option<Arc<ScalarUDF>>> {
         self.state.write().register_udf(udf)
     }
+
     fn register_udaf(
         &mut self,
         udaf: Arc<AggregateUDF>,
     ) -> Result<Option<Arc<AggregateUDF>>> {
         self.state.write().register_udaf(udaf)
     }
+
     fn register_udwf(&mut self, udwf: Arc<WindowUDF>) -> Result<Option<Arc<WindowUDF>>> {
         self.state.write().register_udwf(udwf)
     }
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index 9610a7f203648..937344ef5e4ed 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -112,7 +112,7 @@ pub fn aggr_test_schema() -> SchemaRef {
 
 /// Register session context for the aggregate_test_100.csv file
 pub async fn register_aggregate_csv(
-    ctx: &mut SessionContext,
+    ctx: &SessionContext,
     table_name: &str,
 ) -> Result<()> {
     let schema = aggr_test_schema();
@@ -128,8 +128,8 @@ pub async fn register_aggregate_csv(
 
 /// Create a table from the aggregate_test_100.csv file with the specified name
 pub async fn test_table_with_name(name: &str) -> Result<DataFrame> {
-    let mut ctx = SessionContext::new();
-    register_aggregate_csv(&mut ctx, name).await?;
+    let ctx = SessionContext::new();
+    register_aggregate_csv(&ctx, name).await?;
     ctx.table(name).await
 }
 
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index 47804b927e641..1aa33fc75e5d6 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -103,7 +103,7 @@ use datafusion_optimizer::AnalyzerRule;
 
 /// Execute the specified sql and return the resulting record batches
 /// pretty printed as a String.
-async fn exec_sql(ctx: &mut SessionContext, sql: &str) -> Result<String> {
+async fn exec_sql(ctx: &SessionContext, sql: &str) -> Result<String> {
     let df = ctx.sql(sql).await?;
     let batches = df.collect().await?;
     pretty_format_batches(&batches)
@@ -112,25 +112,25 @@ async fn exec_sql(ctx: &mut SessionContext, sql: &str) -> Result<String> {
 }
 
 /// Create a test table.
-async fn setup_table(mut ctx: SessionContext) -> Result<SessionContext> {
+async fn setup_table(ctx: SessionContext) -> Result<SessionContext> {
     let sql = "CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT) STORED AS CSV location 'tests/data/customer.csv'";
 
     let expected = vec!["++", "++"];
 
-    let s = exec_sql(&mut ctx, sql).await?;
+    let s = exec_sql(&ctx, sql).await?;
     let actual = s.lines().collect::<Vec<_>>();
 
     assert_eq!(expected, actual, "Creating table");
     Ok(ctx)
 }
 
-async fn setup_table_without_schemas(mut ctx: SessionContext) -> Result<SessionContext> {
+async fn setup_table_without_schemas(ctx: SessionContext) -> Result<SessionContext> {
     let sql =
         "CREATE EXTERNAL TABLE sales STORED AS CSV location 'tests/data/customer.csv'";
 
     let expected = vec!["++", "++"];
 
-    let s = exec_sql(&mut ctx, sql).await?;
+    let s = exec_sql(&ctx, sql).await?;
     let actual = s.lines().collect::<Vec<_>>();
 
     assert_eq!(expected, actual, "Creating table");
@@ -146,7 +146,7 @@ const QUERY2: &str = "SELECT 42, arrow_typeof(42)";
 
 // Run the query using the specified execution context and compare it
 // to the known result
-async fn run_and_compare_query(mut ctx: SessionContext, description: &str) -> Result<()> {
+async fn run_and_compare_query(ctx: SessionContext, description: &str) -> Result<()> {
     let expected = vec![
         "+-------------+---------+",
         "| customer_id | revenue |",
@@ -157,7 +157,7 @@ async fn run_and_compare_query(mut ctx: SessionContext, description: &str) -> Re
         "+-------------+---------+",
     ];
 
-    let s = exec_sql(&mut ctx, QUERY).await?;
+    let s = exec_sql(&ctx, QUERY).await?;
     let actual = s.lines().collect::<Vec<_>>();
 
     assert_eq!(
@@ -174,7 +174,7 @@ async fn run_and_compare_query(mut ctx: SessionContext, description: &str) -> Re
 // Run the query using the specified execution context and compare it
 // to the known result
 async fn run_and_compare_query_with_analyzer_rule(
-    mut ctx: SessionContext,
+    ctx: SessionContext,
     description: &str,
 ) -> Result<()> {
     let expected = vec![
@@ -185,7 +185,7 @@ async fn run_and_compare_query_with_analyzer_rule(
         "+------------+--------------------------+",
     ];
 
-    let s = exec_sql(&mut ctx, QUERY2).await?;
+    let s = exec_sql(&ctx, QUERY2).await?;
     let actual = s.lines().collect::<Vec<_>>();
 
     assert_eq!(
@@ -202,7 +202,7 @@ async fn run_and_compare_query_with_analyzer_rule(
 // Run the query using the specified execution context and compare it
 // to the known result
 async fn run_and_compare_query_with_auto_schemas(
-    mut ctx: SessionContext,
+    ctx: SessionContext,
     description: &str,
 ) -> Result<()> {
     let expected = vec![
@@ -215,7 +215,7 @@ async fn run_and_compare_query_with_auto_schemas(
         "+----------+----------+",
     ];
 
-    let s = exec_sql(&mut ctx, QUERY1).await?;
+    let s = exec_sql(&ctx, QUERY1).await?;
     let actual = s.lines().collect::<Vec<_>>();
 
     assert_eq!(
@@ -262,13 +262,13 @@ async fn topk_query() -> Result<()> {
 #[tokio::test]
 // Run EXPLAIN PLAN and show the plan was in fact rewritten
 async fn topk_plan() -> Result<()> {
-    let mut ctx = setup_table(make_topk_context()).await?;
+    let ctx = setup_table(make_topk_context()).await?;
 
     let mut expected = ["| logical_plan after topk                               | TopK: k=3                                                                     |",
         "|                                                       |   TableScan: sales projection=[customer_id,revenue]                                  |"].join("\n");
 
     let explain_query = format!("EXPLAIN VERBOSE {QUERY}");
-    let actual_output = exec_sql(&mut ctx, &explain_query).await?;
+    let actual_output = exec_sql(&ctx, &explain_query).await?;
 
     // normalize newlines (output on windows uses \r\n)
     let mut actual_output = actual_output.replace("\r\n", "\n");
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index b96398ef217f3..e5c226418441a 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -592,7 +592,9 @@ async fn roundtrip_logical_plan_copy_to_parquet() -> Result<()> {
     // Set specific Parquet format options
     let mut key_value_metadata = HashMap::new();
     key_value_metadata.insert("test".to_string(), Some("test".to_string()));
-    parquet_format.key_value_metadata = key_value_metadata.clone();
+    parquet_format
+        .key_value_metadata
+        .clone_from(&key_value_metadata);
 
     parquet_format.global.allow_single_file_parallelism = false;
     parquet_format.global.created_by = "test".to_string();
diff --git a/docs/source/library-user-guide/custom-table-providers.md b/docs/source/library-user-guide/custom-table-providers.md
index a250e880913c5..f86cea0bda954 100644
--- a/docs/source/library-user-guide/custom-table-providers.md
+++ b/docs/source/library-user-guide/custom-table-providers.md
@@ -146,7 +146,7 @@ For filters that can be pushed down, they'll be passed to the `scan` method as t
 In order to use the custom table provider, we need to register it with DataFusion. This is done by creating a `TableProvider` and registering it with the `SessionContext`.
 
 ```rust
-let mut ctx = SessionContext::new();
+let ctx = SessionContext::new();
 
 let custom_table_provider = CustomDataSource::new();
 ctx.register_table("custom_table", Arc::new(custom_table_provider));

From 053795cd4ec5cbac022fbea767b79f8da1424060 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Thu, 8 Aug 2024 09:48:09 +0800
Subject: [PATCH 243/357] Improve nested loop join code (#11863)

* Improve nested loop join code

* fmt
---
 .../src/joins/nested_loop_join.rs             | 90 +++++++++----------
 1 file changed, 41 insertions(+), 49 deletions(-)

diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 9f1465c2d7c1a..d69d818331be2 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -47,7 +47,7 @@ use arrow::compute::concat_batches;
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow::util::bit_util;
-use datafusion_common::{exec_err, JoinSide, Result, Statistics};
+use datafusion_common::{exec_datafusion_err, JoinSide, Result, Statistics};
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
 use datafusion_expr::JoinType;
@@ -562,62 +562,54 @@ fn join_left_and_right_batch(
     schema: &Schema,
     visited_left_side: &SharedBitmapBuilder,
 ) -> Result<RecordBatch> {
-    let indices_result = (0..left_batch.num_rows())
+    let indices = (0..left_batch.num_rows())
         .map(|left_row_index| {
             build_join_indices(left_row_index, right_batch, left_batch, filter)
         })
-        .collect::<Result<Vec<(UInt64Array, UInt32Array)>>>();
+        .collect::<Result<Vec<(UInt64Array, UInt32Array)>>>()
+        .map_err(|e| {
+            exec_datafusion_err!(
+                "Fail to build join indices in NestedLoopJoinExec, error:{e}"
+            )
+        })?;
 
     let mut left_indices_builder = UInt64Builder::new();
     let mut right_indices_builder = UInt32Builder::new();
-    let left_right_indices = match indices_result {
-        Err(err) => {
-            exec_err!("Fail to build join indices in NestedLoopJoinExec, error:{err}")
-        }
-        Ok(indices) => {
-            for (left_side, right_side) in indices {
-                left_indices_builder
-                    .append_values(left_side.values(), &vec![true; left_side.len()]);
-                right_indices_builder
-                    .append_values(right_side.values(), &vec![true; right_side.len()]);
-            }
-            Ok((
-                left_indices_builder.finish(),
-                right_indices_builder.finish(),
-            ))
-        }
-    };
-    match left_right_indices {
-        Ok((left_side, right_side)) => {
-            // set the left bitmap
-            // and only full join need the left bitmap
-            if need_produce_result_in_final(join_type) {
-                let mut bitmap = visited_left_side.lock();
-                left_side.iter().flatten().for_each(|x| {
-                    bitmap.set_bit(x as usize, true);
-                });
-            }
-            // adjust the two side indices base on the join type
-            let (left_side, right_side) = adjust_indices_by_join_type(
-                left_side,
-                right_side,
-                0..right_batch.num_rows(),
-                join_type,
-                false,
-            );
+    for (left_side, right_side) in indices {
+        left_indices_builder
+            .append_values(left_side.values(), &vec![true; left_side.len()]);
+        right_indices_builder
+            .append_values(right_side.values(), &vec![true; right_side.len()]);
+    }
 
-            build_batch_from_indices(
-                schema,
-                left_batch,
-                right_batch,
-                &left_side,
-                &right_side,
-                column_indices,
-                JoinSide::Left,
-            )
-        }
-        Err(e) => Err(e),
+    let left_side = left_indices_builder.finish();
+    let right_side = right_indices_builder.finish();
+    // set the left bitmap
+    // and only full join need the left bitmap
+    if need_produce_result_in_final(join_type) {
+        let mut bitmap = visited_left_side.lock();
+        left_side.iter().flatten().for_each(|x| {
+            bitmap.set_bit(x as usize, true);
+        });
     }
+    // adjust the two side indices base on the join type
+    let (left_side, right_side) = adjust_indices_by_join_type(
+        left_side,
+        right_side,
+        0..right_batch.num_rows(),
+        join_type,
+        false,
+    );
+
+    build_batch_from_indices(
+        schema,
+        left_batch,
+        right_batch,
+        &left_side,
+        &right_side,
+        column_indices,
+        JoinSide::Left,
+    )
 }
 
 fn get_final_indices_from_shared_bitmap(

From 0bbce5dce29df1123b0ab87a8907482c72d284c1 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <djanderson@users.noreply.github.com>
Date: Wed, 7 Aug 2024 20:32:57 -0600
Subject: [PATCH 244/357] [Minor]: Refactor to use Result.transpose() (#11882)

`Result.transpose()` converts `Result<Option<T>>` to `Option<Result<T>>`.

> Ok(None) will be mapped to None. Ok(Some(_)) and Err(_) will be mapped to Some(Ok(_)) and Some(Err(_)).
- https://doc.rust-lang.org/std/result/enum.Result.html#method.transpose
---
 .../core/src/datasource/physical_plan/arrow_file.rs       | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index e720b4efff6f3..a1ee6fbe13412 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -331,11 +331,9 @@ impl FileOpener for ArrowOpener {
                             .into_iter()
                             .zip(recordbatch_results)
                             .filter_map(move |(block, data)| {
-                                match decoder.read_record_batch(&block, &data.into()) {
-                                    Ok(Some(record_batch)) => Some(Ok(record_batch)),
-                                    Ok(None) => None,
-                                    Err(err) => Some(Err(err)),
-                                }
+                                decoder
+                                    .read_record_batch(&block, &data.into())
+                                    .transpose()
                             }),
                     )
                     .boxed())

From 2521043ddcb3895a2010b8e328f3fa10f77fc094 Mon Sep 17 00:00:00 2001
From: Samuel Colvin <s@muelcolvin.com>
Date: Thu, 8 Aug 2024 12:27:25 +0100
Subject: [PATCH 245/357] support `ANY()` op (#11849)

* support ANY() op

* use ExprPlanner

* revert test changes

* add planner tests

* minimise diff

* fix tests :fingers_crossed:

* move error test to slt
---
 datafusion/expr/src/planner.rs               |  7 +++
 datafusion/functions-nested/src/array_has.rs | 22 +++++-----
 datafusion/functions-nested/src/planner.rs   | 20 +++++++--
 datafusion/sql/src/expr/mod.rs               | 46 ++++++++++++++++----
 datafusion/sqllogictest/test_files/array.slt | 19 ++++++++
 5 files changed, 92 insertions(+), 22 deletions(-)

diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs
index c775427df1384..24f589c41582c 100644
--- a/datafusion/expr/src/planner.rs
+++ b/datafusion/expr/src/planner.rs
@@ -197,6 +197,13 @@ pub trait ExprPlanner: Send + Sync {
             "Default planner compound identifier hasn't been implemented for ExprPlanner"
         )
     }
+
+    /// Plans `ANY` expression, e.g., `expr = ANY(array_expr)`
+    ///
+    /// Returns origin binary expression if not possible
+    fn plan_any(&self, expr: RawBinaryExpr) -> Result<PlannerResult<RawBinaryExpr>> {
+        Ok(PlannerResult::Original(expr))
+    }
 }
 
 /// An operator with two arguments to plan
diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs
index bdda5a565947e..fe1df25799321 100644
--- a/datafusion/functions-nested/src/array_has.rs
+++ b/datafusion/functions-nested/src/array_has.rs
@@ -34,19 +34,19 @@ use std::sync::Arc;
 // Create static instances of ScalarUDFs for each function
 make_udf_expr_and_func!(ArrayHas,
     array_has,
-    first_array second_array, // arg name
+    haystack_array element, // arg names
     "returns true, if the element appears in the first array, otherwise false.", // doc
     array_has_udf // internal function name
 );
 make_udf_expr_and_func!(ArrayHasAll,
     array_has_all,
-    first_array second_array, // arg name
+    haystack_array needle_array, // arg names
     "returns true if each element of the second array appears in the first array; otherwise, it returns false.", // doc
     array_has_all_udf // internal function name
 );
 make_udf_expr_and_func!(ArrayHasAny,
     array_has_any,
-    first_array second_array, // arg name
+    haystack_array needle_array, // arg names
     "returns true if at least one element of the second array appears in the first array; otherwise, it returns false.", // doc
     array_has_any_udf // internal function name
 );
@@ -262,26 +262,26 @@ enum ComparisonType {
 }
 
 fn general_array_has_dispatch<O: OffsetSizeTrait>(
-    array: &ArrayRef,
-    sub_array: &ArrayRef,
+    haystack: &ArrayRef,
+    needle: &ArrayRef,
     comparison_type: ComparisonType,
 ) -> Result<ArrayRef> {
     let array = if comparison_type == ComparisonType::Single {
-        let arr = as_generic_list_array::<O>(array)?;
-        check_datatypes("array_has", &[arr.values(), sub_array])?;
+        let arr = as_generic_list_array::<O>(haystack)?;
+        check_datatypes("array_has", &[arr.values(), needle])?;
         arr
     } else {
-        check_datatypes("array_has", &[array, sub_array])?;
-        as_generic_list_array::<O>(array)?
+        check_datatypes("array_has", &[haystack, needle])?;
+        as_generic_list_array::<O>(haystack)?
     };
 
     let mut boolean_builder = BooleanArray::builder(array.len());
 
     let converter = RowConverter::new(vec![SortField::new(array.value_type())])?;
 
-    let element = Arc::clone(sub_array);
+    let element = Arc::clone(needle);
     let sub_array = if comparison_type != ComparisonType::Single {
-        as_generic_list_array::<O>(sub_array)?
+        as_generic_list_array::<O>(needle)?
     } else {
         array
     };
diff --git a/datafusion/functions-nested/src/planner.rs b/datafusion/functions-nested/src/planner.rs
index f980362105a1e..4cd8faa3ca98c 100644
--- a/datafusion/functions-nested/src/planner.rs
+++ b/datafusion/functions-nested/src/planner.rs
@@ -17,7 +17,7 @@
 
 //! SQL planning extensions like [`NestedFunctionPlanner`] and [`FieldAccessPlanner`]
 
-use datafusion_common::{exec_err, utils::list_ndims, DFSchema, Result};
+use datafusion_common::{plan_err, utils::list_ndims, DFSchema, Result};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::{
     planner::{ExprPlanner, PlannerResult, RawBinaryExpr, RawFieldAccessExpr},
@@ -28,7 +28,7 @@ use datafusion_functions_aggregate::nth_value::nth_value_udaf;
 
 use crate::map::map_udf;
 use crate::{
-    array_has::array_has_all,
+    array_has::{array_has_all, array_has_udf},
     expr_fn::{array_append, array_concat, array_prepend},
     extract::{array_element, array_slice},
     make_array::make_array,
@@ -102,7 +102,7 @@ impl ExprPlanner for NestedFunctionPlanner {
 
     fn plan_make_map(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
         if args.len() % 2 != 0 {
-            return exec_err!("make_map requires an even number of arguments");
+            return plan_err!("make_map requires an even number of arguments");
         }
 
         let (keys, values): (Vec<_>, Vec<_>) =
@@ -114,6 +114,20 @@ impl ExprPlanner for NestedFunctionPlanner {
             ScalarFunction::new_udf(map_udf(), vec![keys, values]),
         )))
     }
+
+    fn plan_any(&self, expr: RawBinaryExpr) -> Result<PlannerResult<RawBinaryExpr>> {
+        if expr.op == sqlparser::ast::BinaryOperator::Eq {
+            Ok(PlannerResult::Planned(Expr::ScalarFunction(
+                ScalarFunction::new_udf(
+                    array_has_udf(),
+                    // left and right are reversed here so `needle=any(haystack)` -> `array_has(haystack, needle)`
+                    vec![expr.right, expr.left],
+                ),
+            )))
+        } else {
+            plan_err!("Unsupported AnyOp: '{}', only '=' is supported", expr.op)
+        }
+    }
 }
 
 pub struct FieldAccessPlanner;
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index b80ffb6aed3ff..edb0002842a8f 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -17,12 +17,12 @@
 
 use arrow_schema::DataType;
 use arrow_schema::TimeUnit;
-use datafusion_expr::planner::PlannerResult;
-use datafusion_expr::planner::RawDictionaryExpr;
-use datafusion_expr::planner::RawFieldAccessExpr;
+use datafusion_expr::planner::{
+    PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
+};
 use sqlparser::ast::{
-    CastKind, DictionaryField, Expr as SQLExpr, MapEntry, StructField, Subscript,
-    TrimWhereField, Value,
+    BinaryOperator, CastKind, DictionaryField, Expr as SQLExpr, MapEntry, StructField,
+    Subscript, TrimWhereField, Value,
 };
 
 use datafusion_common::{
@@ -104,13 +104,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
     fn build_logical_expr(
         &self,
-        op: sqlparser::ast::BinaryOperator,
+        op: BinaryOperator,
         left: Expr,
         right: Expr,
         schema: &DFSchema,
     ) -> Result<Expr> {
         // try extension planers
-        let mut binary_expr = datafusion_expr::planner::RawBinaryExpr { op, left, right };
+        let mut binary_expr = RawBinaryExpr { op, left, right };
         for planner in self.context_provider.get_expr_planners() {
             match planner.plan_binary_op(binary_expr, schema)? {
                 PlannerResult::Planned(expr) => {
@@ -122,7 +122,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
         }
 
-        let datafusion_expr::planner::RawBinaryExpr { op, left, right } = binary_expr;
+        let RawBinaryExpr { op, left, right } = binary_expr;
         Ok(Expr::BinaryExpr(BinaryExpr::new(
             Box::new(left),
             self.parse_sql_binary_op(op)?,
@@ -631,6 +631,36 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             SQLExpr::Map(map) => {
                 self.try_plan_map_literal(map.entries, schema, planner_context)
             }
+            SQLExpr::AnyOp {
+                left,
+                compare_op,
+                right,
+            } => {
+                let mut binary_expr = RawBinaryExpr {
+                    op: compare_op,
+                    left: self.sql_expr_to_logical_expr(
+                        *left,
+                        schema,
+                        planner_context,
+                    )?,
+                    right: self.sql_expr_to_logical_expr(
+                        *right,
+                        schema,
+                        planner_context,
+                    )?,
+                };
+                for planner in self.context_provider.get_expr_planners() {
+                    match planner.plan_any(binary_expr)? {
+                        PlannerResult::Planned(expr) => {
+                            return Ok(expr);
+                        }
+                        PlannerResult::Original(expr) => {
+                            binary_expr = expr;
+                        }
+                    }
+                }
+                not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}")
+            }
             _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
         }
     }
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index f2972e4c14c26..b71bc765ba376 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5351,6 +5351,25 @@ true false true false false false true true false false true false true
 #----
 #true false true false false false true true false false true false true
 
+# any operator
+query ?
+select column3 from arrays where 'L'=any(column3);
+----
+[L, o, r, e, m]
+
+query I
+select count(*) from arrays where 'L'=any(column3);
+----
+1
+
+query I
+select count(*) from arrays where 'X'=any(column3);
+----
+0
+
+query error DataFusion error: Error during planning: Unsupported AnyOp: '>', only '=' is supported
+select count(*) from arrays where 'X'>any(column3);
+
 ## array_distinct
 
 #TODO: https://github.com/apache/datafusion/issues/7142

From 1f35b03426b00e176518b6791cd73a181473dc1a Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Thu, 8 Aug 2024 17:23:48 +0200
Subject: [PATCH 246/357] Sync rust docs params for CI and dev (#11890)

Since a4ac0829ecf63b3640315835b1374211dfadd953 commit there was a
discrepancy between rust.yml GitHub workflow and the `dev/rust_lint.sh`
script behavior. Sync the behaviors. Reuse common script to prevent
future discrepancies.
---
 .github/workflows/rust.yml | 6 +-----
 ci/scripts/rust_docs.sh    | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 00d31765e77de..809f3acd83749 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -233,11 +233,7 @@ jobs:
         with:
           rust-version: stable
       - name: Run cargo doc
-        run: |
-          export RUSTDOCFLAGS="-D warnings"
-          cargo doc --document-private-items --no-deps --workspace
-          cd datafusion-cli
-          cargo doc --document-private-items --no-deps
+        run: ci/scripts/rust_docs.sh
 
   linux-wasm-pack:
     name: build with wasm-pack
diff --git a/ci/scripts/rust_docs.sh b/ci/scripts/rust_docs.sh
index cf83b80b5132e..5c93711b6fb6b 100755
--- a/ci/scripts/rust_docs.sh
+++ b/ci/scripts/rust_docs.sh
@@ -18,7 +18,7 @@
 # under the License.
 
 set -ex
-export RUSTDOCFLAGS="-D warnings -A rustdoc::private-intra-doc-links"
+export RUSTDOCFLAGS="-D warnings"
 cargo doc --document-private-items --no-deps --workspace
 cd datafusion-cli
 cargo doc --document-private-items --no-deps

From 0ce6d1687e3fccc4dd50c5759919f2a9baecb549 Mon Sep 17 00:00:00 2001
From: Dmitry Bugakov <bugakov.dima@gmail.com>
Date: Thu, 8 Aug 2024 18:10:30 +0200
Subject: [PATCH 247/357] Update ASCII scalar function to support Utf8View
 #11834 (#11884)

---
 datafusion/functions/src/string/ascii.rs      | 118 +++++++++++++-----
 .../sqllogictest/test_files/string_view.slt   |  99 +++++++++++++++
 2 files changed, 189 insertions(+), 28 deletions(-)

diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs
index 9e1e6b81b61df..68ba3f5ff15f5 100644
--- a/datafusion/functions/src/string/ascii.rs
+++ b/datafusion/functions/src/string/ascii.rs
@@ -16,33 +16,15 @@
 // under the License.
 
 use crate::utils::make_scalar_function;
-use arrow::array::Int32Array;
-use arrow::array::{ArrayRef, OffsetSizeTrait};
+use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array};
 use arrow::datatypes::DataType;
-use datafusion_common::{cast::as_generic_string_array, internal_err, Result};
+use arrow::error::ArrowError;
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::ColumnarValue;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 use std::sync::Arc;
 
-/// Returns the numeric code of the first character of the argument.
-/// ascii('x') = 120
-pub fn ascii<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-
-    let result = string_array
-        .iter()
-        .map(|string| {
-            string.map(|string: &str| {
-                let mut chars = string.chars();
-                chars.next().map_or(0, |v| v as i32)
-            })
-        })
-        .collect::<Int32Array>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
 #[derive(Debug)]
 pub struct AsciiFunc {
     signature: Signature,
@@ -60,7 +42,7 @@ impl AsciiFunc {
         Self {
             signature: Signature::uniform(
                 1,
-                vec![Utf8, LargeUtf8],
+                vec![Utf8, LargeUtf8, Utf8View],
                 Volatility::Immutable,
             ),
         }
@@ -87,12 +69,92 @@ impl ScalarUDFImpl for AsciiFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(ascii::<i32>, vec![])(args),
-            DataType::LargeUtf8 => {
-                return make_scalar_function(ascii::<i64>, vec![])(args);
-            }
-            _ => internal_err!("Unsupported data type"),
+        make_scalar_function(ascii, vec![])(args)
+    }
+}
+
+fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
+where
+    V: ArrayAccessor<Item = &'a str>,
+{
+    let iter = ArrayIter::new(array);
+    let result = iter
+        .map(|string| {
+            string.map(|s| {
+                let mut chars = s.chars();
+                chars.next().map_or(0, |v| v as i32)
+            })
+        })
+        .collect::<Int32Array>();
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+/// Returns the numeric code of the first character of the argument.
+pub fn ascii(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args[0].data_type() {
+        DataType::Utf8 => {
+            let string_array = args[0].as_string::<i32>();
+            Ok(calculate_ascii(string_array)?)
+        }
+        DataType::LargeUtf8 => {
+            let string_array = args[0].as_string::<i64>();
+            Ok(calculate_ascii(string_array)?)
+        }
+        DataType::Utf8View => {
+            let string_array = args[0].as_string_view();
+            Ok(calculate_ascii(string_array)?)
         }
+        _ => internal_err!("Unsupported data type"),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::string::ascii::AsciiFunc;
+    use crate::utils::test::test_function;
+    use arrow::array::{Array, Int32Array};
+    use arrow::datatypes::DataType::Int32;
+    use datafusion_common::{Result, ScalarValue};
+    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+
+    macro_rules! test_ascii {
+        ($INPUT:expr, $EXPECTED:expr) => {
+            test_function!(
+                AsciiFunc::new(),
+                &[ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))],
+                $EXPECTED,
+                i32,
+                Int32,
+                Int32Array
+            );
+
+            test_function!(
+                AsciiFunc::new(),
+                &[ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))],
+                $EXPECTED,
+                i32,
+                Int32,
+                Int32Array
+            );
+
+            test_function!(
+                AsciiFunc::new(),
+                &[ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))],
+                $EXPECTED,
+                i32,
+                Int32,
+                Int32Array
+            );
+        };
+    }
+
+    #[test]
+    fn test_functions() -> Result<()> {
+        test_ascii!(Some(String::from("x")), Ok(Some(120)));
+        test_ascii!(Some(String::from("a")), Ok(Some(97)));
+        test_ascii!(Some(String::from("")), Ok(Some(0)));
+        test_ascii!(None, Ok(None));
+        Ok(())
     }
 }
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 4d3f72b1e8d4e..fc10a34256c52 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -500,3 +500,102 @@ select column2|| ' ' ||column3 from temp;
 ----
 rust fast
 datafusion cool
+
+### ASCII
+# Setup the initial test data
+statement ok
+create table test_source as values
+    ('Andrew', 'X'),
+    ('Xiangpeng', 'Xiangpeng'),
+    ('Raphael', 'R'),
+    (NULL, 'R');
+
+# Table with the different combination of column types
+statement ok
+create table test as
+SELECT
+  arrow_cast(column1, 'Utf8') as column1_utf8,
+  arrow_cast(column2, 'Utf8') as column2_utf8,
+  arrow_cast(column1, 'LargeUtf8') as column1_large_utf8,
+  arrow_cast(column2, 'LargeUtf8') as column2_large_utf8,
+  arrow_cast(column1, 'Utf8View') as column1_utf8view,
+  arrow_cast(column2, 'Utf8View') as column2_utf8view
+FROM test_source;
+
+# Test ASCII with utf8view against utf8view, utf8, and largeutf8
+# (should be no casts)
+query TT
+EXPLAIN SELECT
+  ASCII(column1_utf8view) as c1,
+  ASCII(column2_utf8) as c2,
+  ASCII(column2_large_utf8) as c3
+FROM test;
+----
+logical_plan
+01)Projection: ascii(test.column1_utf8view) AS c1, ascii(test.column2_utf8) AS c2, ascii(test.column2_large_utf8) AS c3
+02)--TableScan: test projection=[column2_utf8, column2_large_utf8, column1_utf8view]
+
+query III
+SELECT
+  ASCII(column1_utf8view) as c1,
+  ASCII(column2_utf8) as c2,
+  ASCII(column2_large_utf8) as c3
+FROM test;
+----
+65 88 88
+88 88 88
+82 82 82
+NULL 82 82
+
+query TT
+EXPLAIN SELECT
+  ASCII(column1_utf8) as c1,
+  ASCII(column1_large_utf8) as c2,
+  ASCII(column2_utf8view) as c3,
+  ASCII('hello') as c4,
+  ASCII(arrow_cast('world', 'Utf8View')) as c5
+FROM test;
+----
+logical_plan
+01)Projection: ascii(test.column1_utf8) AS c1, ascii(test.column1_large_utf8) AS c2, ascii(test.column2_utf8view) AS c3, Int32(104) AS c4, Int32(119) AS c5
+02)--TableScan: test projection=[column1_utf8, column1_large_utf8, column2_utf8view]
+
+query IIIII
+SELECT
+  ASCII(column1_utf8) as c1,
+  ASCII(column1_large_utf8) as c2,
+  ASCII(column2_utf8view) as c3,
+  ASCII('hello') as c4,
+  ASCII(arrow_cast('world', 'Utf8View')) as c5
+FROM test;
+----
+65 65 88 104 119
+88 88 88 104 119
+82 82 82 104 119
+NULL NULL 82 104 119
+
+# Test ASCII with literals cast to Utf8View
+query TT
+EXPLAIN SELECT
+  ASCII(arrow_cast('äöüß', 'Utf8View')) as c1,
+  ASCII(arrow_cast('', 'Utf8View')) as c2,
+  ASCII(arrow_cast(NULL, 'Utf8View')) as c3
+FROM test;
+----
+logical_plan
+01)Projection: Int32(228) AS c1, Int32(0) AS c2, Int32(NULL) AS c3
+02)--TableScan: test projection=[]
+
+query III
+SELECT
+  ASCII(arrow_cast('äöüß', 'Utf8View')) as c1,
+  ASCII(arrow_cast('', 'Utf8View')) as c2,
+  ASCII(arrow_cast(NULL, 'Utf8View')) as c3
+----
+228 0 NULL
+
+statement ok
+drop table test;
+
+statement ok
+drop table test_source;

From b9bf6c9ec4698aa7a383a936328ef7df22e8fbb3 Mon Sep 17 00:00:00 2001
From: Samuel Colvin <s@muelcolvin.com>
Date: Thu, 8 Aug 2024 17:51:37 +0100
Subject: [PATCH 248/357] Fix `Duration` vs `Interval` comparisons and
 `Interval` as LHS (#11876)

* fix duration vs. interval and interval as LHS

* add more operators to "interval vs. duration comparison" slt tests
---
 datafusion/expr/src/type_coercion/binary.rs   |  4 +-
 datafusion/sql/src/expr/value.rs              |  6 ++
 datafusion/sql/tests/cases/plan_to_sql.rs     | 31 ++++++++++
 .../sqllogictest/test_files/timestamps.slt    | 62 +++++++++++++++++++
 4 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index 8da33081d6525..6de0118f6bae7 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -1130,7 +1130,9 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
     use arrow::datatypes::TimeUnit::*;
 
     match (lhs_type, rhs_type) {
-        (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
+        (Interval(_) | Duration(_), Interval(_) | Duration(_)) => {
+            Some(Interval(MonthDayNano))
+        }
         (Date64, Date32) | (Date32, Date64) => Some(Date64),
         (Timestamp(_, None), Date64) | (Date64, Timestamp(_, None)) => {
             Some(Timestamp(Nanosecond, None))
diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index 1564f06fe4b9a..afcd182fa3435 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -227,6 +227,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 let df_op = match op {
                     BinaryOperator::Plus => Operator::Plus,
                     BinaryOperator::Minus => Operator::Minus,
+                    BinaryOperator::Eq => Operator::Eq,
+                    BinaryOperator::NotEq => Operator::NotEq,
+                    BinaryOperator::Gt => Operator::Gt,
+                    BinaryOperator::GtEq => Operator::GtEq,
+                    BinaryOperator::Lt => Operator::Lt,
+                    BinaryOperator::LtEq => Operator::LtEq,
                     _ => {
                         return not_impl_err!("Unsupported interval operator: {op:?}");
                     }
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 8f9f1dd78f93c..2ac3034873363 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -549,3 +549,34 @@ fn test_pretty_roundtrip() -> Result<()> {
 
     Ok(())
 }
+
+fn sql_round_trip(query: &str, expect: &str) {
+    let statement = Parser::new(&GenericDialect {})
+        .try_with_sql(query)
+        .unwrap()
+        .parse_statement()
+        .unwrap();
+
+    let context = MockContextProvider::default();
+    let sql_to_rel = SqlToRel::new(&context);
+    let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
+
+    let roundtrip_statement = plan_to_sql(&plan).unwrap();
+    assert_eq!(roundtrip_statement.to_string(), expect);
+}
+
+#[test]
+fn test_interval_lhs_eq() {
+    sql_round_trip(
+        "select interval '2 seconds' = interval '2 seconds'",
+        "SELECT (INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS' = INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS')",
+    );
+}
+
+#[test]
+fn test_interval_lhs_lt() {
+    sql_round_trip(
+        "select interval '2 seconds' < interval '2 seconds'",
+        "SELECT (INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS' < INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS')",
+    );
+}
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index b63aad49d152e..fb0fd8397f2df 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -3109,6 +3109,68 @@ SELECT * FROM VALUES
 2024-02-01T08:00:00Z
 2023-12-31T23:00:00Z
 
+# interval vs. duration comparison
+query B
+select (now() - now()) < interval '1 seconds';
+----
+true
+
+query B
+select (now() - now()) <= interval '1 seconds';
+----
+true
+
+query B
+select (now() - now()) = interval '0 seconds';
+----
+true
+
+query B
+select (now() - now()) != interval '1 seconds';
+----
+true
+
+query B
+select (now() - now()) > interval '-1 seconds';
+----
+true
+
+query B
+select (now() - now()) >= interval '-1 seconds';
+----
+true
+
+query B
+select arrow_cast(123, 'Duration(Nanosecond)') < interval '200 nanoseconds';
+----
+true
+
+query B
+select arrow_cast(123, 'Duration(Nanosecond)') < interval '100 nanoseconds';
+----
+false
+
+query B
+select arrow_cast(123, 'Duration(Nanosecond)') < interval '1 seconds';
+----
+true
+
+query B
+select interval '1 seconds' < arrow_cast(123, 'Duration(Nanosecond)')
+----
+false
+
+# interval as LHS
+query B
+select interval '2 seconds' = interval '2 seconds';
+----
+true
+
+query B
+select interval '1 seconds' < interval '2 seconds';
+----
+true
+
 statement ok
 drop table t;
 

From 786f353ca1859d980f2e79a9c13bd1dc1a850111 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Thu, 8 Aug 2024 12:13:33 -0600
Subject: [PATCH 249/357] chore: Prepare 41.0.0-rc1 (#11889)

* Prepare 41.0.0-rc1

* changelog

* changelog

* regenerate example parquet file
---
 Cargo.toml                        |  40 ++--
 datafusion-cli/Cargo.lock         |  62 ++---
 datafusion-cli/Cargo.toml         |   4 +-
 datafusion/core/example.parquet   | Bin 976 -> 976 bytes
 dev/changelog/41.0.0.md           | 363 ++++++++++++++++++++++++++++++
 dev/release/README.md             |  23 +-
 docs/source/user-guide/configs.md |   2 +-
 7 files changed, 434 insertions(+), 60 deletions(-)
 create mode 100644 dev/changelog/41.0.0.md

diff --git a/Cargo.toml b/Cargo.toml
index e1bd0d7aa72b2..3431c4673e0cf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -55,7 +55,7 @@ license = "Apache-2.0"
 readme = "README.md"
 repository = "https://github.com/apache/datafusion"
 rust-version = "1.76"
-version = "40.0.0"
+version = "41.0.0"
 
 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -88,25 +88,25 @@ bytes = "1.4"
 chrono = { version = "0.4.34", default-features = false }
 ctor = "0.2.0"
 dashmap = "6.0.1"
-datafusion = { path = "datafusion/core", version = "40.0.0", default-features = false }
-datafusion-catalog = { path = "datafusion/catalog", version = "40.0.0" }
-datafusion-common = { path = "datafusion/common", version = "40.0.0", default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = "40.0.0" }
-datafusion-execution = { path = "datafusion/execution", version = "40.0.0" }
-datafusion-expr = { path = "datafusion/expr", version = "40.0.0" }
-datafusion-functions = { path = "datafusion/functions", version = "40.0.0" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "40.0.0" }
-datafusion-functions-nested = { path = "datafusion/functions-nested", version = "40.0.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "40.0.0", default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "40.0.0", default-features = false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "40.0.0", default-features = false }
-datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "40.0.0" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "40.0.0" }
-datafusion-proto = { path = "datafusion/proto", version = "40.0.0" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = "40.0.0" }
-datafusion-sql = { path = "datafusion/sql", version = "40.0.0" }
-datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "40.0.0" }
-datafusion-substrait = { path = "datafusion/substrait", version = "40.0.0" }
+datafusion = { path = "datafusion/core", version = "41.0.0", default-features = false }
+datafusion-catalog = { path = "datafusion/catalog", version = "41.0.0" }
+datafusion-common = { path = "datafusion/common", version = "41.0.0", default-features = false }
+datafusion-common-runtime = { path = "datafusion/common-runtime", version = "41.0.0" }
+datafusion-execution = { path = "datafusion/execution", version = "41.0.0" }
+datafusion-expr = { path = "datafusion/expr", version = "41.0.0" }
+datafusion-functions = { path = "datafusion/functions", version = "41.0.0" }
+datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "41.0.0" }
+datafusion-functions-nested = { path = "datafusion/functions-nested", version = "41.0.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "41.0.0", default-features = false }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "41.0.0", default-features = false }
+datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "41.0.0", default-features = false }
+datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "41.0.0" }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "41.0.0" }
+datafusion-proto = { path = "datafusion/proto", version = "41.0.0" }
+datafusion-proto-common = { path = "datafusion/proto-common", version = "41.0.0" }
+datafusion-sql = { path = "datafusion/sql", version = "41.0.0" }
+datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "41.0.0" }
+datafusion-substrait = { path = "datafusion/substrait", version = "41.0.0" }
 doc-comment = "0.3"
 env_logger = "0.11"
 futures = "0.3"
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 9d20c242bbef9..2eb93da7c020e 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -874,9 +874,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.7"
+version = "1.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc"
+checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549"
 dependencies = [
  "jobserver",
  "libc",
@@ -1128,7 +1128,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1184,7 +1184,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "arrow-schema",
  "async-trait",
@@ -1196,7 +1196,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "arrow",
  "assert_cmd",
@@ -1223,7 +1223,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1244,14 +1244,14 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-execution"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -1270,7 +1270,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1287,7 +1287,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1312,7 +1312,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1328,7 +1328,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1348,7 +1348,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1366,7 +1366,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1394,7 +1394,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1406,7 +1406,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "datafusion-common",
  "datafusion-execution",
@@ -1416,7 +1416,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1448,7 +1448,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "40.0.0"
+version = "41.0.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -2049,9 +2049,9 @@ dependencies = [
 
 [[package]]
 name = "hyper-util"
-version = "0.1.6"
+version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956"
+checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
 dependencies = [
  "bytes",
  "futures-channel",
@@ -2524,9 +2524,9 @@ dependencies = [
 
 [[package]]
 name = "object"
-version = "0.36.2"
+version = "0.36.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e"
+checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9"
 dependencies = [
  "memchr",
 ]
@@ -3220,9 +3220,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.7.0"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d"
+checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0"
 
 [[package]]
 name = "rustls-webpki"
@@ -3341,18 +3341,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 
 [[package]]
 name = "serde"
-version = "1.0.204"
+version = "1.0.205"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
+checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.204"
+version = "1.0.205"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
+checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3590,15 +3590,15 @@ checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
 
 [[package]]
 name = "tempfile"
-version = "3.11.0"
+version = "3.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8fcd239983515c23a32fb82099f97d0b11b8c72f654ed659363a95c3dad7a53"
+checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64"
 dependencies = [
  "cfg-if",
  "fastrand 2.1.0",
  "once_cell",
  "rustix",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index ff6673dc97273..cbd9ffd0febab 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "datafusion-cli"
 description = "Command Line Client for DataFusion query engine."
-version = "40.0.0"
+version = "41.0.0"
 authors = ["Apache DataFusion <dev@datafusion.apache.org>"]
 edition = "2021"
 keywords = ["arrow", "datafusion", "query", "sql"]
@@ -35,7 +35,7 @@ async-trait = "0.1.41"
 aws-config = "0.55"
 aws-credential-types = "0.55"
 clap = { version = "3", features = ["derive", "cargo"] }
-datafusion = { path = "../datafusion/core", version = "40.0.0", features = [
+datafusion = { path = "../datafusion/core", version = "41.0.0", features = [
     "avro",
     "crypto_expressions",
     "datetime_expressions",
diff --git a/datafusion/core/example.parquet b/datafusion/core/example.parquet
index 94de10394b33d26a23a9888e88faa1fa90f14043..17f7473cd221426b545a5f437c42efdc6b1702b3 100644
GIT binary patch
delta 33
kcmcb>et~_%c4kRKJp(-hNgEjk1_)xvVq{<la11g80Eco0M*si-

delta 33
icmcb>et~_%c4kQfJs^~{kzrtfAcibP28IB~AVUC$Sq4S`

diff --git a/dev/changelog/41.0.0.md b/dev/changelog/41.0.0.md
new file mode 100644
index 0000000000000..3e289112c7bbd
--- /dev/null
+++ b/dev/changelog/41.0.0.md
@@ -0,0 +1,363 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion 41.0.0 Changelog
+
+This release consists of 245 commits from 69 contributors. See credits at the end of this changelog for more information.
+
+**Breaking changes:**
+
+- make unparser `Dialect` trait `Send` + `Sync` [#11504](https://github.com/apache/datafusion/pull/11504) (y-f-u)
+- Implement physical plan serialization for csv COPY plans , add `as_any`, `Debug` to `FileFormatFactory` [#11588](https://github.com/apache/datafusion/pull/11588) (Lordworms)
+- Consistent API to set parameters of aggregate and window functions (`AggregateExt` --> `ExprFunctionExt`) [#11550](https://github.com/apache/datafusion/pull/11550) (timsaucer)
+- Rename `ColumnOptions` to `ParquetColumnOptions` [#11512](https://github.com/apache/datafusion/pull/11512) (alamb)
+- Rename `input_type` --> `input_types` on AggregateFunctionExpr / AccumulatorArgs / StateFieldsArgs [#11666](https://github.com/apache/datafusion/pull/11666) (lewiszlw)
+- Rename RepartitionExec metric `repart_time` to `repartition_time` [#11703](https://github.com/apache/datafusion/pull/11703) (alamb)
+- Remove `AggregateFunctionDefinition` [#11803](https://github.com/apache/datafusion/pull/11803) (lewiszlw)
+- Skipping partial aggregation when it is not helping for high cardinality aggregates [#11627](https://github.com/apache/datafusion/pull/11627) (korowa)
+- Optionally create name of aggregate expression from expressions [#11776](https://github.com/apache/datafusion/pull/11776) (lewiszlw)
+
+**Performance related:**
+
+- feat: Optimize CASE expression for "column or null" use case [#11534](https://github.com/apache/datafusion/pull/11534) (andygrove)
+- feat: Optimize CASE expression for usage where then and else values are literals [#11553](https://github.com/apache/datafusion/pull/11553) (andygrove)
+- perf: Optimize IsNotNullExpr [#11586](https://github.com/apache/datafusion/pull/11586) (andygrove)
+
+**Implemented enhancements:**
+
+- feat: Add `fail_on_overflow` option to `BinaryExpr` [#11400](https://github.com/apache/datafusion/pull/11400) (andygrove)
+- feat: add UDF to_local_time() [#11347](https://github.com/apache/datafusion/pull/11347) (appletreeisyellow)
+- feat: switch to using proper Substrait types for IntervalYearMonth and IntervalDayTime [#11471](https://github.com/apache/datafusion/pull/11471) (Blizzara)
+- feat: support UDWFs in Substrait [#11489](https://github.com/apache/datafusion/pull/11489) (Blizzara)
+- feat: support `unnest` in GROUP BY clause [#11469](https://github.com/apache/datafusion/pull/11469) (JasonLi-cn)
+- feat: support `COUNT()` [#11229](https://github.com/apache/datafusion/pull/11229) (tshauck)
+- feat: consume and produce Substrait type extensions [#11510](https://github.com/apache/datafusion/pull/11510) (Blizzara)
+- feat: Error when a SHOW command is passed in with an accompanying non-existant variable [#11540](https://github.com/apache/datafusion/pull/11540) (itsjunetime)
+- feat: support Map literals in Substrait consumer and producer [#11547](https://github.com/apache/datafusion/pull/11547) (Blizzara)
+- feat: add bounds for unary math scalar functions [#11584](https://github.com/apache/datafusion/pull/11584) (tshauck)
+- feat: Add support for cardinality function on maps [#11801](https://github.com/apache/datafusion/pull/11801) (Weijun-H)
+- feat: support `Utf8View` type in `starts_with` function [#11787](https://github.com/apache/datafusion/pull/11787) (tshauck)
+- feat: Expose public method for optimizing physical plans [#11879](https://github.com/apache/datafusion/pull/11879) (andygrove)
+
+**Fixed bugs:**
+
+- fix: Fix eq properties regression from #10434 [#11363](https://github.com/apache/datafusion/pull/11363) (suremarc)
+- fix: make sure JOIN ON expression is boolean type [#11423](https://github.com/apache/datafusion/pull/11423) (jonahgao)
+- fix: `regexp_replace` fails when pattern or replacement is a scalar `NULL` [#11459](https://github.com/apache/datafusion/pull/11459) (Weijun-H)
+- fix: unparser generates wrong sql for derived table with columns [#11505](https://github.com/apache/datafusion/pull/11505) (y-f-u)
+- fix: make `UnKnownColumn`s not equal to others physical exprs [#11536](https://github.com/apache/datafusion/pull/11536) (jonahgao)
+- fix: fixes trig function order by [#11559](https://github.com/apache/datafusion/pull/11559) (tshauck)
+- fix: CASE with NULL [#11542](https://github.com/apache/datafusion/pull/11542) (Weijun-H)
+- fix: panic and incorrect results in `LogFunc::output_ordering()` [#11571](https://github.com/apache/datafusion/pull/11571) (jonahgao)
+- fix: expose the fluent API fn for approx_distinct instead of the module [#11644](https://github.com/apache/datafusion/pull/11644) (Michael-J-Ward)
+- fix: dont try to coerce list for regex match [#11646](https://github.com/apache/datafusion/pull/11646) (tshauck)
+- fix: regr_count now returns Uint64 [#11731](https://github.com/apache/datafusion/pull/11731) (Michael-J-Ward)
+- fix: set `null_equals_null` to false when `convert_cross_join_to_inner_join` [#11738](https://github.com/apache/datafusion/pull/11738) (jonahgao)
+- fix: Add additional required expression for natural join [#11713](https://github.com/apache/datafusion/pull/11713) (Lordworms)
+- fix: hash join tests with forced collisions [#11806](https://github.com/apache/datafusion/pull/11806) (korowa)
+- fix: `collect_columns` quadratic complexity [#11843](https://github.com/apache/datafusion/pull/11843) (crepererum)
+
+**Documentation updates:**
+
+- Minor: Add link to blog to main DataFusion website [#11356](https://github.com/apache/datafusion/pull/11356) (alamb)
+- Add `to_local_time()` in function reference docs [#11401](https://github.com/apache/datafusion/pull/11401) (appletreeisyellow)
+- Minor: Consolidate specification doc sections [#11427](https://github.com/apache/datafusion/pull/11427) (alamb)
+- Combine the Roadmap / Quarterly Roadmap sections [#11426](https://github.com/apache/datafusion/pull/11426) (alamb)
+- Minor: Add an example for backtrace pretty print [#11450](https://github.com/apache/datafusion/pull/11450) (goldmedal)
+- Docs: Document creating new extension APIs [#11425](https://github.com/apache/datafusion/pull/11425) (alamb)
+- Minor: Clarify which parquet options are used for reading/writing [#11511](https://github.com/apache/datafusion/pull/11511) (alamb)
+- Support `newlines_in_values` CSV option [#11533](https://github.com/apache/datafusion/pull/11533) (connec)
+- chore: Minor cleanup `simplify_demo()` example [#11576](https://github.com/apache/datafusion/pull/11576) (kavirajk)
+- Move Datafusion Query Optimizer to library user guide [#11563](https://github.com/apache/datafusion/pull/11563) (devesh-2002)
+- Fix typo in doc of Partitioning [#11612](https://github.com/apache/datafusion/pull/11612) (waruto210)
+- Doc: A tiny typo in scalar function's doc [#11620](https://github.com/apache/datafusion/pull/11620) (2010YOUY01)
+- Change default Parquet writer settings to match arrow-rs (except for compression & statistics) [#11558](https://github.com/apache/datafusion/pull/11558) (wiedld)
+- Rename `functions-array` to `functions-nested` [#11602](https://github.com/apache/datafusion/pull/11602) (goldmedal)
+- Add parser option enable_options_value_normalization [#11330](https://github.com/apache/datafusion/pull/11330) (xinlifoobar)
+- Add reference to #comet channel in Arrow Rust Discord server [#11637](https://github.com/apache/datafusion/pull/11637) (ajmarcus)
+- Extract catalog API to separate crate, change `TableProvider::scan` to take a trait rather than `SessionState` [#11516](https://github.com/apache/datafusion/pull/11516) (findepi)
+- doc: why nullable of list item is set to true [#11626](https://github.com/apache/datafusion/pull/11626) (jcsherin)
+- Docs: adding explicit mention of test_utils to docs [#11670](https://github.com/apache/datafusion/pull/11670) (edmondop)
+- Ensure statistic defaults in parquet writers are in sync [#11656](https://github.com/apache/datafusion/pull/11656) (wiedld)
+- Merge `string-view2` branch: reading from parquet up to 2x faster for some ClickBench queries (not on by default) [#11667](https://github.com/apache/datafusion/pull/11667) (alamb)
+- Doc: Add Sail to known users list [#11791](https://github.com/apache/datafusion/pull/11791) (shehabgamin)
+- Move min and max to user defined aggregate function, remove `AggregateFunction` / `AggregateFunctionDefinition::BuiltIn` [#11013](https://github.com/apache/datafusion/pull/11013) (edmondop)
+- Change name of MAX/MIN udaf to lowercase max/min [#11795](https://github.com/apache/datafusion/pull/11795) (edmondop)
+- doc: Add support for `map` and `make_map` functions [#11799](https://github.com/apache/datafusion/pull/11799) (Weijun-H)
+- Improve readme page in crates.io [#11809](https://github.com/apache/datafusion/pull/11809) (lewiszlw)
+- refactor: remove unneed mut for session context [#11864](https://github.com/apache/datafusion/pull/11864) (sunng87)
+
+**Other:**
+
+- Prepare 40.0.0 Release [#11343](https://github.com/apache/datafusion/pull/11343) (andygrove)
+- Support `NULL` literals in where clause [#11266](https://github.com/apache/datafusion/pull/11266) (xinlifoobar)
+- Implement TPCH substrait integration test, support tpch_6, tpch_10, t… [#11349](https://github.com/apache/datafusion/pull/11349) (Lordworms)
+- Fix bug when pushing projection under joins [#11333](https://github.com/apache/datafusion/pull/11333) (jonahgao)
+- Minor: some cosmetics in `filter.rs`, fix clippy due to logical conflict [#11368](https://github.com/apache/datafusion/pull/11368) (comphead)
+- Update prost-derive requirement from 0.12 to 0.13 [#11355](https://github.com/apache/datafusion/pull/11355) (dependabot[bot])
+- Minor: update dashmap `6.0.1` [#11335](https://github.com/apache/datafusion/pull/11335) (alamb)
+- Improve and test dataframe API examples in docs [#11290](https://github.com/apache/datafusion/pull/11290) (alamb)
+- Remove redundant `unalias_nested` calls for creating Filter's [#11340](https://github.com/apache/datafusion/pull/11340) (alamb)
+- Enable `clone_on_ref_ptr` clippy lint on optimizer [#11346](https://github.com/apache/datafusion/pull/11346) (lewiszlw)
+- Update termtree requirement from 0.4.1 to 0.5.0 [#11383](https://github.com/apache/datafusion/pull/11383) (dependabot[bot])
+- Introduce `resources_err!` error macro [#11374](https://github.com/apache/datafusion/pull/11374) (comphead)
+- Enable `clone_on_ref_ptr` clippy lint on common [#11384](https://github.com/apache/datafusion/pull/11384) (lewiszlw)
+- Track parquet writer encoding memory usage on MemoryPool [#11345](https://github.com/apache/datafusion/pull/11345) (wiedld)
+- Minor: remove clones and unnecessary Arcs in `from_substrait_rex` [#11337](https://github.com/apache/datafusion/pull/11337) (alamb)
+- Minor: Change no-statement error message to be clearer [#11394](https://github.com/apache/datafusion/pull/11394) (itsjunetime)
+- Change `array_agg` to return `null` on no input rather than empty list [#11299](https://github.com/apache/datafusion/pull/11299) (jayzhan211)
+- Minor: return "not supported" for `COUNT DISTINCT` with multiple arguments [#11391](https://github.com/apache/datafusion/pull/11391) (jonahgao)
+- Enable `clone_on_ref_ptr` clippy lint on sql [#11380](https://github.com/apache/datafusion/pull/11380) (lewiszlw)
+- Move configuration information out of example usage page [#11300](https://github.com/apache/datafusion/pull/11300) (alamb)
+- chore: reuse a single function to create the Substrait TPCH consumer test contexts [#11396](https://github.com/apache/datafusion/pull/11396) (Blizzara)
+- refactor: change error type for "no statement" [#11411](https://github.com/apache/datafusion/pull/11411) (crepererum)
+- Implement prettier SQL unparsing (more human readable) [#11186](https://github.com/apache/datafusion/pull/11186) (MohamedAbdeen21)
+- Move `overlay` planning to`ExprPlanner` [#11398](https://github.com/apache/datafusion/pull/11398) (dharanad)
+- Coerce types for all union children plans when eliminating nesting [#11386](https://github.com/apache/datafusion/pull/11386) (gruuya)
+- Add customizable equality and hash functions to UDFs [#11392](https://github.com/apache/datafusion/pull/11392) (joroKr21)
+- Implement ScalarFunction `MAKE_MAP` and `MAP` [#11361](https://github.com/apache/datafusion/pull/11361) (goldmedal)
+- Improve `CommonSubexprEliminate` rule with surely and conditionally evaluated stats [#11357](https://github.com/apache/datafusion/pull/11357) (peter-toth)
+- fix(11397): surface proper errors in ParquetSink [#11399](https://github.com/apache/datafusion/pull/11399) (wiedld)
+- Minor: Add note about SQLLancer fuzz testing to docs [#11430](https://github.com/apache/datafusion/pull/11430) (alamb)
+- Trivial: use arrow csv writer's timestamp_tz_format [#11407](https://github.com/apache/datafusion/pull/11407) (tmi)
+- Improved unparser documentation [#11395](https://github.com/apache/datafusion/pull/11395) (alamb)
+- Avoid calling shutdown after failed write of AsyncWrite [#11415](https://github.com/apache/datafusion/pull/11415) (joroKr21)
+- Short term way to make `AggregateStatistics` still work when min/max is converted to udaf [#11261](https://github.com/apache/datafusion/pull/11261) (Rachelint)
+- Implement TPCH substrait integration test, support tpch_13, tpch_14,16 [#11405](https://github.com/apache/datafusion/pull/11405) (Lordworms)
+- Minor: fix giuthub action labeler rules [#11428](https://github.com/apache/datafusion/pull/11428) (alamb)
+- Minor: change internal error to not supported error for nested field … [#11446](https://github.com/apache/datafusion/pull/11446) (alamb)
+- Minor: change Datafusion --> DataFusion in docs [#11439](https://github.com/apache/datafusion/pull/11439) (alamb)
+- Support serialization/deserialization for custom physical exprs in proto [#11387](https://github.com/apache/datafusion/pull/11387) (lewiszlw)
+- remove termtree dependency [#11416](https://github.com/apache/datafusion/pull/11416) (Kev1n8)
+- Add SessionStateBuilder and extract out the registration of defaults [#11403](https://github.com/apache/datafusion/pull/11403) (Omega359)
+- integrate consumer tests, implement tpch query 18 to 22 [#11462](https://github.com/apache/datafusion/pull/11462) (Lordworms)
+- Docs: Explain the usage of logical expressions for `create_aggregate_expr` [#11458](https://github.com/apache/datafusion/pull/11458) (jayzhan211)
+- Return scalar result when all inputs are constants in `map` and `make_map` [#11461](https://github.com/apache/datafusion/pull/11461) (Rachelint)
+- Enable `clone_on_ref_ptr` clippy lint on functions\* [#11468](https://github.com/apache/datafusion/pull/11468) (lewiszlw)
+- minor: non-overlapping `repart_time` and `send_time` metrics [#11440](https://github.com/apache/datafusion/pull/11440) (korowa)
+- Minor: rename `row_groups.rs` to `row_group_filter.rs` [#11481](https://github.com/apache/datafusion/pull/11481) (alamb)
+- Support alternate formats for unparsing `datetime` to `timestamp` and `interval` [#11466](https://github.com/apache/datafusion/pull/11466) (y-f-u)
+- chore: Add criterion benchmark for CaseExpr [#11482](https://github.com/apache/datafusion/pull/11482) (andygrove)
+- Initial support for `StringView`, merge changes from `string-view` development branch [#11402](https://github.com/apache/datafusion/pull/11402) (alamb)
+- Replace to_lowercase with to_string in sql example [#11486](https://github.com/apache/datafusion/pull/11486) (lewiszlw)
+- Minor: Make execute_input_stream Accessible for Any Sinking Operators [#11449](https://github.com/apache/datafusion/pull/11449) (berkaysynnada)
+- Enable `clone_on_ref_ptr` clippy lints on proto [#11465](https://github.com/apache/datafusion/pull/11465) (lewiszlw)
+- upgrade sqlparser 0.47 -> 0.48 [#11453](https://github.com/apache/datafusion/pull/11453) (MohamedAbdeen21)
+- Add extension hooks for encoding and decoding UDAFs and UDWFs [#11417](https://github.com/apache/datafusion/pull/11417) (joroKr21)
+- Remove element's nullability of array_agg function [#11447](https://github.com/apache/datafusion/pull/11447) (jayzhan211)
+- Get expr planners when creating new planner [#11485](https://github.com/apache/datafusion/pull/11485) (jayzhan211)
+- Support alternate format for Utf8 unparsing (CHAR) [#11494](https://github.com/apache/datafusion/pull/11494) (sgrebnov)
+- implement retract_batch for xor accumulator [#11500](https://github.com/apache/datafusion/pull/11500) (drewhayward)
+- Refactor: more clearly delineate between `TableParquetOptions` and `ParquetWriterOptions` [#11444](https://github.com/apache/datafusion/pull/11444) (wiedld)
+- chore: fix typos of common and core packages [#11520](https://github.com/apache/datafusion/pull/11520) (JasonLi-cn)
+- Move spill related functions to spill.rs [#11509](https://github.com/apache/datafusion/pull/11509) (findepi)
+- Add tests that show the different defaults for `ArrowWriter` and `TableParquetOptions` [#11524](https://github.com/apache/datafusion/pull/11524) (wiedld)
+- Create `datafusion-physical-optimizer` crate [#11507](https://github.com/apache/datafusion/pull/11507) (lewiszlw)
+- Minor: Assert `test_enabled_backtrace` requirements to run [#11525](https://github.com/apache/datafusion/pull/11525) (comphead)
+- Move handlign of NULL literals in where clause to type coercion pass [#11491](https://github.com/apache/datafusion/pull/11491) (xinlifoobar)
+- Update parquet page pruning code to use the `StatisticsExtractor` [#11483](https://github.com/apache/datafusion/pull/11483) (alamb)
+- Enable SortMergeJoin LeftAnti filtered fuzz tests [#11535](https://github.com/apache/datafusion/pull/11535) (comphead)
+- chore: fix typos of expr, functions, optimizer, physical-expr-common,… [#11538](https://github.com/apache/datafusion/pull/11538) (JasonLi-cn)
+- Minor: Remove clone in `PushDownFilter` [#11532](https://github.com/apache/datafusion/pull/11532) (jayzhan211)
+- Minor: avoid a clone in type coercion [#11530](https://github.com/apache/datafusion/pull/11530) (alamb)
+- Move array `ArrayAgg` to a `UserDefinedAggregate` [#11448](https://github.com/apache/datafusion/pull/11448) (jayzhan211)
+- Move `MAKE_MAP` to ExprPlanner [#11452](https://github.com/apache/datafusion/pull/11452) (goldmedal)
+- chore: fix typos of sql, sqllogictest and substrait packages [#11548](https://github.com/apache/datafusion/pull/11548) (JasonLi-cn)
+- Prevent bigger files from being checked in [#11508](https://github.com/apache/datafusion/pull/11508) (findepi)
+- Add dialect param to use double precision for float64 in Postgres [#11495](https://github.com/apache/datafusion/pull/11495) (Sevenannn)
+- Minor: move `SessionStateDefaults` into its own module [#11566](https://github.com/apache/datafusion/pull/11566) (alamb)
+- refactor: rewrite mega type to an enum containing both cases [#11539](https://github.com/apache/datafusion/pull/11539) (LorrensP-2158466)
+- Move `sql_compound_identifier_to_expr ` to `ExprPlanner` [#11487](https://github.com/apache/datafusion/pull/11487) (dharanad)
+- Support SortMergeJoin spilling [#11218](https://github.com/apache/datafusion/pull/11218) (comphead)
+- Fix unparser invalid sql for query with order [#11527](https://github.com/apache/datafusion/pull/11527) (y-f-u)
+- Provide DataFrame API for `map` and move `map` to `functions-array` [#11560](https://github.com/apache/datafusion/pull/11560) (goldmedal)
+- Move OutputRequirements to datafusion-physical-optimizer crate [#11579](https://github.com/apache/datafusion/pull/11579) (xinlifoobar)
+- Minor: move `Column` related tests and rename `column.rs` [#11573](https://github.com/apache/datafusion/pull/11573) (jonahgao)
+- Fix SortMergeJoin antijoin flaky condition [#11604](https://github.com/apache/datafusion/pull/11604) (comphead)
+- Improve Union Equivalence Propagation [#11506](https://github.com/apache/datafusion/pull/11506) (mustafasrepo)
+- Migrate `OrderSensitiveArrayAgg` to be a user defined aggregate [#11564](https://github.com/apache/datafusion/pull/11564) (jayzhan211)
+- Minor:Disable flaky SMJ antijoin filtered test until the fix [#11608](https://github.com/apache/datafusion/pull/11608) (comphead)
+- support Decimal256 type in datafusion-proto [#11606](https://github.com/apache/datafusion/pull/11606) (leoyvens)
+- Chore/fifo tests cleanup [#11616](https://github.com/apache/datafusion/pull/11616) (ozankabak)
+- Fix Internal Error for an INNER JOIN query [#11578](https://github.com/apache/datafusion/pull/11578) (xinlifoobar)
+- test: get file size by func metadata [#11575](https://github.com/apache/datafusion/pull/11575) (zhuliquan)
+- Improve unparser MySQL compatibility [#11589](https://github.com/apache/datafusion/pull/11589) (sgrebnov)
+- Push scalar functions into cross join [#11528](https://github.com/apache/datafusion/pull/11528) (lewiszlw)
+- Remove ArrayAgg Builtin in favor of UDF [#11611](https://github.com/apache/datafusion/pull/11611) (jayzhan211)
+- refactor: simplify `DFSchema::field_with_unqualified_name` [#11619](https://github.com/apache/datafusion/pull/11619) (jonahgao)
+- Minor: Use upstream `concat_batches` from arrow-rs [#11615](https://github.com/apache/datafusion/pull/11615) (alamb)
+- Fix : `signum` function bug when `0.0` input [#11580](https://github.com/apache/datafusion/pull/11580) (getChan)
+- Enforce uniqueness of `named_struct` field names [#11614](https://github.com/apache/datafusion/pull/11614) (dharanad)
+- Minor: unecessary row_count calculation in `CrossJoinExec` and `NestedLoopsJoinExec` [#11632](https://github.com/apache/datafusion/pull/11632) (alamb)
+- ExprBuilder for Physical Aggregate Expr [#11617](https://github.com/apache/datafusion/pull/11617) (jayzhan211)
+- Minor: avoid copying order by exprs in planner [#11634](https://github.com/apache/datafusion/pull/11634) (alamb)
+- Unify CI and pre-commit hook settings for clippy [#11640](https://github.com/apache/datafusion/pull/11640) (findepi)
+- Parsing SQL strings to Exprs with the qualified schema [#11562](https://github.com/apache/datafusion/pull/11562) (Lordworms)
+- Add some zero column tests covering LIMIT, GROUP BY, WHERE, JOIN, and WINDOW [#11624](https://github.com/apache/datafusion/pull/11624) (Kev1n8)
+- Refactor/simplify window frame utils [#11648](https://github.com/apache/datafusion/pull/11648) (ozankabak)
+- Minor: use `ready!` macro to simplify `FilterExec` [#11649](https://github.com/apache/datafusion/pull/11649) (alamb)
+- Temporarily pin toolchain version to avoid clippy errors [#11655](https://github.com/apache/datafusion/pull/11655) (findepi)
+- Fix clippy errors for Rust 1.80 [#11654](https://github.com/apache/datafusion/pull/11654) (findepi)
+- Add `CsvExecBuilder` for creating `CsvExec` [#11633](https://github.com/apache/datafusion/pull/11633) (connec)
+- chore(deps): update sqlparser requirement from 0.48 to 0.49 [#11630](https://github.com/apache/datafusion/pull/11630) (dependabot[bot])
+- Add support for USING to SQL unparser [#11636](https://github.com/apache/datafusion/pull/11636) (wackywendell)
+- Run CI with latest (Rust 1.80), add ticket references to commented out tests [#11661](https://github.com/apache/datafusion/pull/11661) (alamb)
+- Use `AccumulatorArgs::is_reversed` in `NthValueAgg` [#11669](https://github.com/apache/datafusion/pull/11669) (jcsherin)
+- Implement physical plan serialization for json Copy plans [#11645](https://github.com/apache/datafusion/pull/11645) (Lordworms)
+- Minor: improve documentation on `SessionState` [#11642](https://github.com/apache/datafusion/pull/11642) (alamb)
+- Add LimitPushdown optimization rule and CoalesceBatchesExec fetch [#11652](https://github.com/apache/datafusion/pull/11652) (alihandroid)
+- Update to arrow/parquet `52.2.0` [#11691](https://github.com/apache/datafusion/pull/11691) (alamb)
+- Minor: Rename `RepartitionMetrics::repartition_time` to `RepartitionMetrics::repart_time` to match metric [#11478](https://github.com/apache/datafusion/pull/11478) (alamb)
+- Update cache key used in rust CI script [#11641](https://github.com/apache/datafusion/pull/11641) (findepi)
+- Fix bug in `remove_join_expressions` [#11693](https://github.com/apache/datafusion/pull/11693) (jonahgao)
+- Initial changes to support using udaf min/max for statistics and opti… [#11696](https://github.com/apache/datafusion/pull/11696) (edmondop)
+- Handle nulls in approx_percentile_cont [#11721](https://github.com/apache/datafusion/pull/11721) (Dandandan)
+- Reduce repetition in try_process_group_by_unnest and try_process_unnest [#11714](https://github.com/apache/datafusion/pull/11714) (JasonLi-cn)
+- Minor: Add example for `ScalarUDF::call` [#11727](https://github.com/apache/datafusion/pull/11727) (alamb)
+- Use `cargo release` in `bench.sh` [#11722](https://github.com/apache/datafusion/pull/11722) (alamb)
+- expose some fields on session state [#11716](https://github.com/apache/datafusion/pull/11716) (waynexia)
+- Make DefaultSchemaAdapterFactory public [#11709](https://github.com/apache/datafusion/pull/11709) (adriangb)
+- Check hashes first during probing the aggr hash table [#11718](https://github.com/apache/datafusion/pull/11718) (Rachelint)
+- Implement physical plan serialization for parquet Copy plans [#11735](https://github.com/apache/datafusion/pull/11735) (Lordworms)
+- Support cross-timezone `timestamp` comparison via coercsion [#11711](https://github.com/apache/datafusion/pull/11711) (jeffreyssmith2nd)
+- Minor: Improve documentation for AggregateUDFImpl::state_fields [#11740](https://github.com/apache/datafusion/pull/11740) (lewiszlw)
+- Do not push down Sorts if it violates the sort requirements [#11678](https://github.com/apache/datafusion/pull/11678) (alamb)
+- Use upstream `StatisticsConverter` from arrow-rs in DataFusion [#11479](https://github.com/apache/datafusion/pull/11479) (alamb)
+- Fix `plan_to_sql`: Add wildcard projection to SELECT statement if no projection was set [#11744](https://github.com/apache/datafusion/pull/11744) (LatrecheYasser)
+- Use upstream `DataType::from_str` in arrow-cast [#11254](https://github.com/apache/datafusion/pull/11254) (alamb)
+- Fix documentation warnings, make CsvExecBuilder and Unparsed pub [#11729](https://github.com/apache/datafusion/pull/11729) (alamb)
+- [Minor] Add test for only nulls (empty) as input in APPROX_PERCENTILE_CONT [#11760](https://github.com/apache/datafusion/pull/11760) (Dandandan)
+- Add `TrackedMemoryPool` with better error messages on exhaustion [#11665](https://github.com/apache/datafusion/pull/11665) (wiedld)
+- Derive `Debug` for logical plan nodes [#11757](https://github.com/apache/datafusion/pull/11757) (lewiszlw)
+- Minor: add "clickbench extended" queries to slt tests [#11763](https://github.com/apache/datafusion/pull/11763) (alamb)
+- Minor: Add comment explaining rationale for hash check [#11750](https://github.com/apache/datafusion/pull/11750) (alamb)
+- Fix bug that `COUNT(DISTINCT)` on StringView panics [#11768](https://github.com/apache/datafusion/pull/11768) (XiangpengHao)
+- [Minor] Refactor approx_percentile [#11769](https://github.com/apache/datafusion/pull/11769) (Dandandan)
+- minor: always time batch_filter even when the result is an empty batch [#11775](https://github.com/apache/datafusion/pull/11775) (andygrove)
+- Improve OOM message when a single reservation request fails to get more bytes. [#11771](https://github.com/apache/datafusion/pull/11771) (wiedld)
+- [Minor] Short circuit `ApplyFunctionRewrites` if there are no function rewrites [#11765](https://github.com/apache/datafusion/pull/11765) (gruuya)
+- Fix #11692: Improve doc comments within macros [#11694](https://github.com/apache/datafusion/pull/11694) (Rafferty97)
+- Extract `CoalesceBatchesStream` to a struct [#11610](https://github.com/apache/datafusion/pull/11610) (alamb)
+- refactor: move ExecutionPlan and related structs into dedicated mod [#11759](https://github.com/apache/datafusion/pull/11759) (waynexia)
+- Minor: Add references to github issue in comments [#11784](https://github.com/apache/datafusion/pull/11784) (findepi)
+- Add docs and rename param for `Signature::numeric` [#11778](https://github.com/apache/datafusion/pull/11778) (matthewmturner)
+- Support planning `Map` literal [#11780](https://github.com/apache/datafusion/pull/11780) (goldmedal)
+- Support `LogicalPlan` `Debug` differently than `Display` [#11774](https://github.com/apache/datafusion/pull/11774) (lewiszlw)
+- Remove redundant Aggregate when `DISTINCT` & `GROUP BY` are in the same query [#11781](https://github.com/apache/datafusion/pull/11781) (mertak-synnada)
+- Minor: add ticket reference and fmt [#11805](https://github.com/apache/datafusion/pull/11805) (alamb)
+- Improve MSRV CI check to print out problems to log [#11789](https://github.com/apache/datafusion/pull/11789) (alamb)
+- Improve log func tests stability [#11808](https://github.com/apache/datafusion/pull/11808) (lewiszlw)
+- Add valid Distinct case for aggregation [#11814](https://github.com/apache/datafusion/pull/11814) (mertak-synnada)
+- Don't implement `create_sliding_accumulator` repeatedly [#11813](https://github.com/apache/datafusion/pull/11813) (lewiszlw)
+- chore(deps): update rstest requirement from 0.21.0 to 0.22.0 [#11811](https://github.com/apache/datafusion/pull/11811) (dependabot[bot])
+- Minor: Update exected output due to logical conflict [#11824](https://github.com/apache/datafusion/pull/11824) (alamb)
+- Pass scalar to `eq` inside `nullif` [#11697](https://github.com/apache/datafusion/pull/11697) (simonvandel)
+- refactor: move `aggregate_statistics` to `datafusion-physical-optimizer` [#11798](https://github.com/apache/datafusion/pull/11798) (Weijun-H)
+- Minor: refactor probe check into function `should_skip_aggregation` [#11821](https://github.com/apache/datafusion/pull/11821) (alamb)
+- Minor: consolidate `path_partition` test into `core_integration` [#11831](https://github.com/apache/datafusion/pull/11831) (alamb)
+- Move optimizer integration tests to `core_integration` [#11830](https://github.com/apache/datafusion/pull/11830) (alamb)
+- Bump deprecated version of SessionState::new_with_config_rt to 41.0.0 [#11839](https://github.com/apache/datafusion/pull/11839) (kezhuw)
+- Fix partial aggregation skipping with Decimal aggregators [#11833](https://github.com/apache/datafusion/pull/11833) (alamb)
+- Fix bug with zero-sized buffer for StringViewArray [#11841](https://github.com/apache/datafusion/pull/11841) (XiangpengHao)
+- Reduce clone of `Statistics` in `ListingTable` and `PartitionedFile` [#11802](https://github.com/apache/datafusion/pull/11802) (Rachelint)
+- Add `LogicalPlan::CreateIndex` [#11817](https://github.com/apache/datafusion/pull/11817) (lewiszlw)
+- Update `object_store` to 0.10.2 [#11860](https://github.com/apache/datafusion/pull/11860) (danlgrca)
+- Add `skipped_aggregation_rows` metric to aggregate operator [#11706](https://github.com/apache/datafusion/pull/11706) (alamb)
+- Cast `Utf8View` to `Utf8` to support `||` from `StringViewArray` [#11796](https://github.com/apache/datafusion/pull/11796) (dharanad)
+- Improve nested loop join code [#11863](https://github.com/apache/datafusion/pull/11863) (lewiszlw)
+- [Minor]: Refactor to use Result.transpose() [#11882](https://github.com/apache/datafusion/pull/11882) (djanderson)
+- support `ANY()` op [#11849](https://github.com/apache/datafusion/pull/11849) (samuelcolvin)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+    48	Andrew Lamb
+    20	张林伟
+     9	Jay Zhan
+     9	Jonah Gao
+     8	Andy Grove
+     8	Lordworms
+     8	Piotr Findeisen
+     8	wiedld
+     7	Oleks V
+     6	Jax Liu
+     5	Alex Huang
+     5	Arttu
+     5	JasonLi
+     5	Trent Hauck
+     5	Xin Li
+     4	Dharan Aditya
+     4	Edmondo Porcu
+     4	dependabot[bot]
+     4	kamille
+     4	yfu
+     3	Daniël Heres
+     3	Eduard Karacharov
+     3	Georgi Krastev
+     2	Chris Connelly
+     2	Chunchun Ye
+     2	June
+     2	Marco Neumann
+     2	Marko Grujic
+     2	Mehmet Ozan Kabak
+     2	Michael J Ward
+     2	Mohamed Abdeen
+     2	Ruihang Xia
+     2	Sergei Grebnov
+     2	Xiangpeng Hao
+     2	jcsherin
+     2	kf zheng
+     2	mertak-synnada
+     1	Adrian Garcia Badaracco
+     1	Alexander Rafferty
+     1	Alihan Çelikcan
+     1	Ariel Marcus
+     1	Berkay Şahin
+     1	Bruce Ritchie
+     1	Devesh Rahatekar
+     1	Douglas Anderson
+     1	Drew Hayward
+     1	Jeffrey Smith II
+     1	Kaviraj Kanagaraj
+     1	Kezhu Wang
+     1	Leonardo Yvens
+     1	Lorrens Pantelis
+     1	Matthew Cramerus
+     1	Matthew Turner
+     1	Mustafa Akur
+     1	Namgung Chan
+     1	Ning Sun
+     1	Peter Toth
+     1	Qianqian
+     1	Samuel Colvin
+     1	Shehab Amin
+     1	Simon Vandel Sillesen
+     1	Tim Saucer
+     1	Wendell Smith
+     1	Yasser Latreche
+     1	Yongting You
+     1	danlgrca
+     1	tmi
+     1	waruto
+     1	zhuliquan
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
diff --git a/dev/release/README.md b/dev/release/README.md
index c6bc9be2b0db7..1817b3002578a 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -124,20 +124,31 @@ Here are the commands that could be used to prepare the `38.0.0` release:
 
 Checkout the main commit to be released
 
-```
+```shell
 git fetch apache
 git checkout apache/main
 ```
 
-Update datafusion version in `datafusion/Cargo.toml` to `38.0.0`:
+Manually update the datafusion version in the root `Cargo.toml` to `38.0.0`.
 
+Run `cargo update` in the root directory and also in `datafusion-cli`:
+
+```shell
+cargo update
+cd datafustion-cli
+cargo update
+cd ..
 ```
-./dev/update_datafusion_versions.py 38.0.0
+
+Run `cargo test` to re-generate some example files:
+
+```shell
+cargo test
 ```
 
 Lastly commit the version change:
 
-```
+```shell
 git commit -a -m 'Update version'
 ```
 
@@ -193,7 +204,7 @@ For the release to become "official" it needs at least three PMC members to vote
 
 The `dev/release/verify-release-candidate.sh` is a script in this repository that can assist in the verification process. Run it like:
 
-```
+```shell
 ./dev/release/verify-release-candidate.sh 38.0.0 0
 ```
 
@@ -222,7 +233,7 @@ Congratulations! The release is now official!
 
 Tag the same release candidate commit with the final release tag
 
-```
+```shell
 git co apache/38.0.0-rc0
 git tag 38.0.0
 git push apache 38.0.0
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index badd07822ac2e..e0c8391a259a7 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -65,7 +65,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.parquet.max_statistics_size                        | 4096                      | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.parquet.created_by                                 | datafusion version 40.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.created_by                                 | datafusion version 41.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.execution.parquet.encoding                                   | NULL                      | (writing) Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                    |

From 193955e0ec4057ab9fc1747ed78b21ddc71a7e25 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Thu, 8 Aug 2024 22:38:05 +0200
Subject: [PATCH 250/357] Produce clear error message when build runs with
 conflicting features (#11895)

Throw explicit error message when `cargo build` is invoked with
conflicting features, e.g.  `cargo test --lib --tests --bins
--all-features`.
---
 benchmarks/src/bin/dfbench.rs | 5 +++++
 benchmarks/src/bin/tpch.rs    | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/benchmarks/src/bin/dfbench.rs b/benchmarks/src/bin/dfbench.rs
index 441b6cdc02933..9ce6848a063aa 100644
--- a/benchmarks/src/bin/dfbench.rs
+++ b/benchmarks/src/bin/dfbench.rs
@@ -20,6 +20,11 @@ use datafusion::error::Result;
 
 use structopt::StructOpt;
 
+#[cfg(all(feature = "snmalloc", feature = "mimalloc"))]
+compile_error!(
+    "feature \"snmalloc\" and feature \"mimalloc\" cannot be enabled at the same time"
+);
+
 #[cfg(feature = "snmalloc")]
 #[global_allocator]
 static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs
index fc0f4ca0613c1..3270b082cfb43 100644
--- a/benchmarks/src/bin/tpch.rs
+++ b/benchmarks/src/bin/tpch.rs
@@ -21,6 +21,11 @@ use datafusion::error::Result;
 use datafusion_benchmarks::tpch;
 use structopt::StructOpt;
 
+#[cfg(all(feature = "snmalloc", feature = "mimalloc"))]
+compile_error!(
+    "feature \"snmalloc\" and feature \"mimalloc\" cannot be enabled at the same time"
+);
+
 #[cfg(feature = "snmalloc")]
 #[global_allocator]
 static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;

From 368df80b7fa511d59e7612e35f49839eb0249882 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 8 Aug 2024 16:39:05 -0400
Subject: [PATCH 251/357] Add tests for StringView / character functions, fix
 `regexp_like` and `regexp_match` to work with StringView (#11753)

* Minor: Add tests for StringView / character functions

* Fix regexp_like and regexp_match to work with StringVeiw

* Update for ASCII and BTRIM

* Add comment about why it is ok to return boolean with catchall match

* Fix character_length

* Add ticket references
---
 datafusion/functions/src/regex/regexplike.rs  |   9 +-
 datafusion/functions/src/regex/regexpmatch.rs |  12 +-
 .../functions/src/unicode/character_length.rs |   2 +-
 .../sqllogictest/test_files/string_view.slt   | 509 +++++++++++++++---
 4 files changed, 427 insertions(+), 105 deletions(-)

diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs
index 09b96a28c1074..20029ba005c49 100644
--- a/datafusion/functions/src/regex/regexplike.rs
+++ b/datafusion/functions/src/regex/regexplike.rs
@@ -75,13 +75,10 @@ impl ScalarUDFImpl for RegexpLikeFunc {
         use DataType::*;
 
         Ok(match &arg_types[0] {
-            LargeUtf8 | Utf8 => Boolean,
             Null => Null,
-            other => {
-                return plan_err!(
-                    "The regexp_like function can only accept strings. Got {other}"
-                );
-            }
+            // Type coercion is done by DataFusion based on signature, so if we
+            // get here, the first argument is always a string
+            _ => Boolean,
         })
     }
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs
index f57d3c17bd72b..764acd7de757d 100644
--- a/datafusion/functions/src/regex/regexpmatch.rs
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -74,17 +74,9 @@ impl ScalarUDFImpl for RegexpMatchFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        use DataType::*;
-
         Ok(match &arg_types[0] {
-            LargeUtf8 => List(Arc::new(Field::new("item", LargeUtf8, true))),
-            Utf8 => List(Arc::new(Field::new("item", Utf8, true))),
-            Null => Null,
-            other => {
-                return plan_err!(
-                    "The regexp_match function can only accept strings. Got {other}"
-                );
-            }
+            DataType::Null => DataType::Null,
+            other => DataType::List(Arc::new(Field::new("item", other.clone(), true))),
         })
     }
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs
index cee1a57bc6d9d..e46ee162ff12e 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -44,7 +44,7 @@ impl CharacterLengthFunc {
         Self {
             signature: Signature::uniform(
                 1,
-                vec![Utf8, LargeUtf8],
+                vec![Utf8, LargeUtf8, Utf8View],
                 Volatility::Immutable,
             ),
             aliases: vec![String::from("length"), String::from("char_length")],
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index fc10a34256c52..e7166690580f9 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -425,102 +425,26 @@ logical_plan
 01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1, starts_with(test.column1_utf8view, Utf8View("")) AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
 02)--TableScan: test projection=[column1_utf8view]
 
-statement ok
-drop table test;
-
-# coercion from stringview to integer, as input to make_date
-query D
-select make_date(arrow_cast('2024', 'Utf8View'), arrow_cast('01', 'Utf8View'), arrow_cast('23', 'Utf8View'))
-----
-2024-01-23
-
-# coercions between stringview and date types
-statement ok
-create table dates (dt date) as values
-    (date '2024-01-23'),
-    (date '2023-11-30');
-
-query D
-select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt;
-----
-2024-01-23
-
-statement ok
-drop table dates;
-
-statement ok
-create table temp as values
-('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')),
-('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool', 'Utf8View'));
-
-query T
-select column2||' is fast' from temp;
-----
-rust is fast
-datafusion is fast
-
-
-query T
-select column2 || ' is ' || column3 from temp;
-----
-rust is fast
-datafusion is cool
+# Ensure string functions use native StringView implementation
+# and do not fall back to Utf8 or LargeUtf8
+# Should see no casts to Utf8 in the plans below
 
+## Ensure no casts for LIKE/ILIKE
 query TT
-explain select column2 || 'is' || column3 from temp;
-----
-logical_plan
-01)Projection: CAST(temp.column2 AS Utf8) || Utf8("is") || CAST(temp.column3 AS Utf8)
-02)--TableScan: temp projection=[column2, column3]
-
-
-query TT
-explain select column2||' is fast' from temp;
+EXPLAIN SELECT
+  column1_utf8view like 'foo' as "like",
+  column1_utf8view ilike 'foo' as "ilike"
+FROM test;
 ----
 logical_plan
-01)Projection: CAST(temp.column2 AS Utf8) || Utf8(" is fast")
-02)--TableScan: temp projection=[column2]
-
+01)Projection: test.column1_utf8view LIKE Utf8View("foo") AS like, test.column1_utf8view ILIKE Utf8View("foo") AS ilike
+02)--TableScan: test projection=[column1_utf8view]
 
-query T
-select column2||column3 from temp;
-----
-rustfast
-datafusioncool
 
-query TT
-explain select column2||column3 from temp;
-----
-logical_plan
-01)Projection: CAST(temp.column2 AS Utf8) || CAST(temp.column3 AS Utf8)
-02)--TableScan: temp projection=[column2, column3]
 
-query T
-select column2|| ' ' ||column3 from temp;
-----
-rust fast
-datafusion cool
+## Ensure no casts for ASCII
 
 ### ASCII
-# Setup the initial test data
-statement ok
-create table test_source as values
-    ('Andrew', 'X'),
-    ('Xiangpeng', 'Xiangpeng'),
-    ('Raphael', 'R'),
-    (NULL, 'R');
-
-# Table with the different combination of column types
-statement ok
-create table test as
-SELECT
-  arrow_cast(column1, 'Utf8') as column1_utf8,
-  arrow_cast(column2, 'Utf8') as column2_utf8,
-  arrow_cast(column1, 'LargeUtf8') as column1_large_utf8,
-  arrow_cast(column2, 'LargeUtf8') as column2_large_utf8,
-  arrow_cast(column1, 'Utf8View') as column1_utf8view,
-  arrow_cast(column2, 'Utf8View') as column2_utf8view
-FROM test_source;
 
 # Test ASCII with utf8view against utf8view, utf8, and largeutf8
 # (should be no casts)
@@ -594,8 +518,417 @@ SELECT
 ----
 228 0 NULL
 
+## Ensure no casts for BTRIM
+query TT
+EXPLAIN SELECT
+  BTRIM(column1_utf8view, 'foo') AS l
+FROM test;
+----
+logical_plan
+01)Projection: btrim(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for CHARACTER_LENGTH
+query TT
+EXPLAIN SELECT
+  CHARACTER_LENGTH(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: character_length(test.column1_utf8view) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for CONCAT
+## TODO https://github.com/apache/datafusion/issues/11836
+query TT
+EXPLAIN SELECT
+  concat(column1_utf8view, column2_utf8view) as c
+FROM test;
+----
+logical_plan
+01)Projection: concat(CAST(test.column1_utf8view AS Utf8), CAST(test.column2_utf8view AS Utf8)) AS c
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+## Ensure no casts for CONCAT_WS
+## TODO https://github.com/apache/datafusion/issues/11837
+query TT
+EXPLAIN SELECT
+  concat_ws(', ', column1_utf8view, column2_utf8view) as c
+FROM test;
+----
+logical_plan
+01)Projection: concat_ws(Utf8(", "), CAST(test.column1_utf8view AS Utf8), CAST(test.column2_utf8view AS Utf8)) AS c
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+## Ensure no casts for CONTAINS
+## TODO https://github.com/apache/datafusion/issues/11838
+query TT
+EXPLAIN SELECT
+  CONTAINS(column1_utf8view, 'foo') as c1,
+  CONTAINS(column2_utf8view, column2_utf8view) as c2
+FROM test;
+----
+logical_plan
+01)Projection: contains(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c1, contains(__common_expr_1, __common_expr_1) AS c2
+02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view
+03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+## Ensure no casts for ENDS_WITH
+## TODO https://github.com/apache/datafusion/issues/11852
+query TT
+EXPLAIN SELECT
+  ENDS_WITH(column1_utf8view, 'foo') as c1,
+  ENDS_WITH(column2_utf8view, column2_utf8view) as c2
+FROM test;
+----
+logical_plan
+01)Projection: ends_with(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c1, ends_with(__common_expr_1, __common_expr_1) AS c2
+02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view
+03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+
+## Ensure no casts for INITCAP
+## TODO https://github.com/apache/datafusion/issues/11853
+query TT
+EXPLAIN SELECT
+  INITCAP(column1_utf8view) as c
+FROM test;
+----
+logical_plan
+01)Projection: initcap(CAST(test.column1_utf8view AS Utf8)) AS c
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for LEVENSHTEIN
+## TODO https://github.com/apache/datafusion/issues/11854
+query TT
+EXPLAIN SELECT
+  levenshtein(column1_utf8view, 'foo') as c1,
+  levenshtein(column1_utf8view, column2_utf8view) as c2
+FROM test;
+----
+logical_plan
+01)Projection: levenshtein(__common_expr_1, Utf8("foo")) AS c1, levenshtein(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2
+02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
+03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+## Ensure no casts for LOWER
+## TODO https://github.com/apache/datafusion/issues/11855
+query TT
+EXPLAIN SELECT
+  LOWER(column1_utf8view) as c1
+FROM test;
+----
+logical_plan
+01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for LTRIM
+## TODO https://github.com/apache/datafusion/issues/11856
+query TT
+EXPLAIN SELECT
+  LTRIM(column1_utf8view) as c1
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(CAST(test.column1_utf8view AS Utf8)) AS c1
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for LPAD
+## TODO https://github.com/apache/datafusion/issues/11857
+query TT
+EXPLAIN SELECT
+  LPAD(column1_utf8view, 12, ' ') as c1
+FROM test;
+----
+logical_plan
+01)Projection: lpad(CAST(test.column1_utf8view AS Utf8), Int64(12), Utf8(" ")) AS c1
+02)--TableScan: test projection=[column1_utf8view]
+
+
+## Ensure no casts for OCTET_LENGTH
+## TODO https://github.com/apache/datafusion/issues/11858
+query TT
+EXPLAIN SELECT
+  OCTET_LENGTH(column1_utf8view) as c1
+FROM test;
+----
+logical_plan
+01)Projection: octet_length(CAST(test.column1_utf8view AS Utf8)) AS c1
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for OVERLAY
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  OVERLAY(column1_utf8view PLACING 'foo' FROM 2 ) as c1
+FROM test;
+----
+logical_plan
+01)Projection: overlay(CAST(test.column1_utf8view AS Utf8), Utf8("foo"), Int64(2)) AS c1
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for REGEXP_LIKE
+query TT
+EXPLAIN SELECT
+  REGEXP_LIKE(column1_utf8view, '^https?://(?:www\.)?([^/]+)/.*$') AS k
+FROM test;
+----
+logical_plan
+01)Projection: regexp_like(CAST(test.column1_utf8view AS Utf8), Utf8("^https?://(?:www\.)?([^/]+)/.*$")) AS k
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for REGEXP_MATCH
+query TT
+EXPLAIN SELECT
+  REGEXP_MATCH(column1_utf8view, '^https?://(?:www\.)?([^/]+)/.*$') AS k
+FROM test;
+----
+logical_plan
+01)Projection: regexp_match(CAST(test.column1_utf8view AS Utf8), Utf8("^https?://(?:www\.)?([^/]+)/.*$")) AS k
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for REGEXP_REPLACE
+query TT
+EXPLAIN SELECT
+  REGEXP_REPLACE(column1_utf8view, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k
+FROM test;
+----
+logical_plan
+01)Projection: regexp_replace(test.column1_utf8view, Utf8("^https?://(?:www\.)?([^/]+)/.*$"), Utf8("\1")) AS k
+02)--TableScan: test projection=[column1_utf8view]
+
+
+## Ensure no casts for REPEAT
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  REPEAT(column1_utf8view, 2) as c1
+FROM test;
+----
+logical_plan
+01)Projection: repeat(CAST(test.column1_utf8view AS Utf8), Int64(2)) AS c1
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for REPLACE
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  REPLACE(column1_utf8view, 'foo', 'bar') as c1,
+  REPLACE(column1_utf8view, column2_utf8view, 'bar') as c2
+FROM test;
+----
+logical_plan
+01)Projection: replace(__common_expr_1, Utf8("foo"), Utf8("bar")) AS c1, replace(__common_expr_1, CAST(test.column2_utf8view AS Utf8), Utf8("bar")) AS c2
+02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
+03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+## Ensure no casts for REVERSE
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  REVERSE(column1_utf8view) as c1
+FROM test;
+----
+logical_plan
+01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for RTRIM
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view) as c1,
+  RTRIM(column1_utf8view, 'foo') as c2
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(__common_expr_1) AS c1, rtrim(__common_expr_1, Utf8("foo")) AS c2
+02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
+03)----TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for RIGHT
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  RIGHT(column1_utf8view, 3) as c2
+FROM test;
+----
+logical_plan
+01)Projection: right(CAST(test.column1_utf8view AS Utf8), Int64(3)) AS c2
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for RPAD
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  RPAD(column1_utf8view, 1) as c1,
+  RPAD(column1_utf8view, 2, column2_utf8view) as c2
+FROM test;
+----
+logical_plan
+01)Projection: rpad(__common_expr_1, Int64(1)) AS c1, rpad(__common_expr_1, Int64(2), CAST(test.column2_utf8view AS Utf8)) AS c2
+02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
+03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+
+## Ensure no casts for RTRIM
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view) as c,
+  RTRIM(column1_utf8view, column2_utf8view) as c1
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(__common_expr_1) AS c, rtrim(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c1
+02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
+03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+## Ensure no casts for SPLIT_PART
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  SPLIT_PART(column1_utf8view, 'f', 1) as c
+FROM test;
+----
+logical_plan
+01)Projection: split_part(CAST(test.column1_utf8view AS Utf8), Utf8("f"), Int64(1)) AS c
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for STRPOS
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  STRPOS(column1_utf8view, 'f') as c,
+  STRPOS(column1_utf8view, column2_utf8view) as c2
+FROM test;
+----
+logical_plan
+01)Projection: strpos(__common_expr_1, Utf8("f")) AS c, strpos(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2
+02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
+03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+## Ensure no casts for SUBSTR
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  SUBSTR(column1_utf8view, 1) as c,
+  SUBSTR(column1_utf8view, 1 ,2) as c2
+FROM test;
+----
+logical_plan
+01)Projection: substr(__common_expr_1, Int64(1)) AS c, substr(__common_expr_1, Int64(1), Int64(2)) AS c2
+02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
+03)----TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts on columns for STARTS_WITH
+query TT
+EXPLAIN SELECT
+  STARTS_WITH(column1_utf8view, 'foo') as c,
+  STARTS_WITH(column1_utf8view, column2_utf8view) as c2
+FROM test;
+----
+logical_plan
+01)Projection: starts_with(test.column1_utf8view, Utf8View("foo")) AS c, starts_with(test.column1_utf8view, test.column2_utf8view) AS c2
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+## Ensure no casts for TRANSLATE
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  TRANSLATE(column1_utf8view, 'foo', 'bar') as c
+FROM test;
+----
+logical_plan
+01)Projection: translate(CAST(test.column1_utf8view AS Utf8), Utf8("foo"), Utf8("bar")) AS c
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for FIND_IN_SET
+## TODO file ticket
+query TT
+EXPLAIN SELECT
+  FIND_IN_SET(column1_utf8view, 'a,b,c,d') as c
+FROM test;
+----
+logical_plan
+01)Projection: find_in_set(CAST(test.column1_utf8view AS Utf8), Utf8("a,b,c,d")) AS c
+02)--TableScan: test projection=[column1_utf8view]
+
+
+
+
 statement ok
 drop table test;
 
+# coercion from stringview to integer, as input to make_date
+query D
+select make_date(arrow_cast('2024', 'Utf8View'), arrow_cast('01', 'Utf8View'), arrow_cast('23', 'Utf8View'))
+----
+2024-01-23
+
+# coercions between stringview and date types
+statement ok
+create table dates (dt date) as values
+    (date '2024-01-23'),
+    (date '2023-11-30');
+
+query D
+select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt;
+----
+2024-01-23
+
+statement ok
+drop table dates;
+
 statement ok
-drop table test_source;
+create table temp as values
+('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')),
+('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool', 'Utf8View'));
+
+query T
+select column2||' is fast' from temp;
+----
+rust is fast
+datafusion is fast
+
+
+query T
+select column2 || ' is ' || column3 from temp;
+----
+rust is fast
+datafusion is cool
+
+query TT
+explain select column2 || 'is' || column3 from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || Utf8("is") || CAST(temp.column3 AS Utf8)
+02)--TableScan: temp projection=[column2, column3]
+
+
+query TT
+explain select column2||' is fast' from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || Utf8(" is fast")
+02)--TableScan: temp projection=[column2]
+
+
+query T
+select column2||column3 from temp;
+----
+rustfast
+datafusioncool
+
+query TT
+explain select column2||column3 from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || CAST(temp.column3 AS Utf8)
+02)--TableScan: temp projection=[column2, column3]
+
+query T
+select column2|| ' ' ||column3 from temp;
+----
+rust fast
+datafusion cool

From 86030a1ff713cc9709a81a1e9df82d4d13b8818d Mon Sep 17 00:00:00 2001
From: yfu <fevin86@gmail.com>
Date: Fri, 9 Aug 2024 06:39:19 +1000
Subject: [PATCH 252/357] fix: invalid sqls when unparsing derived table with
 columns contains calculations, limit/order/distinct (#11756)

* Fix unparser derived table with columns include calculations, limit/order/distinct (#24)

* compare format output to make sure the two level of projects match

* add method to find inner projection that could be nested under limit/order/distinct

* use format! for matching in unparser sort optimization too

* refactor

* use to_string and also put comments in

* clippy

* fix unparser derived table contains cast (#25)

* fix unparser derived table contains cast

* remove dbg
---
 datafusion/sql/src/unparser/plan.rs       |  67 +------------
 datafusion/sql/src/unparser/rewrite.rs    | 109 ++++++++++++++++++++--
 datafusion/sql/tests/cases/plan_to_sql.rs |  32 +++++++
 3 files changed, 139 insertions(+), 69 deletions(-)

diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index e08f25d3c27ce..277efd5fe7002 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -30,8 +30,10 @@ use super::{
         BuilderError, DerivedRelationBuilder, QueryBuilder, RelationBuilder,
         SelectBuilder, TableRelationBuilder, TableWithJoinsBuilder,
     },
-    rewrite::normalize_union_schema,
-    rewrite::rewrite_plan_for_sort_on_non_projected_fields,
+    rewrite::{
+        normalize_union_schema, rewrite_plan_for_sort_on_non_projected_fields,
+        subquery_alias_inner_query_and_columns,
+    },
     utils::{find_agg_node_within_select, unproject_window_exprs, AggVariant},
     Unparser,
 };
@@ -687,67 +689,6 @@ impl Unparser<'_> {
     }
 }
 
-// This logic is to work out the columns and inner query for SubqueryAlias plan for both types of
-// subquery
-// - `(SELECT column_a as a from table) AS A`
-// - `(SELECT column_a from table) AS A (a)`
-//
-// A roundtrip example for table alias with columns
-//
-// query: SELECT id FROM (SELECT j1_id from j1) AS c (id)
-//
-// LogicPlan:
-// Projection: c.id
-//   SubqueryAlias: c
-//     Projection: j1.j1_id AS id
-//       Projection: j1.j1_id
-//         TableScan: j1
-//
-// Before introducing this logic, the unparsed query would be `SELECT c.id FROM (SELECT j1.j1_id AS
-// id FROM (SELECT j1.j1_id FROM j1)) AS c`.
-// The query is invalid as `j1.j1_id` is not a valid identifier in the derived table
-// `(SELECT j1.j1_id FROM j1)`
-//
-// With this logic, the unparsed query will be:
-// `SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)`
-//
-// Caveat: this won't handle the case like `select * from (select 1, 2) AS a (b, c)`
-// as the parser gives a wrong plan which has mismatch `Int(1)` types: Literal and
-// Column in the Projections. Once the parser side is fixed, this logic should work
-fn subquery_alias_inner_query_and_columns(
-    subquery_alias: &datafusion_expr::SubqueryAlias,
-) -> (&LogicalPlan, Vec<Ident>) {
-    let plan: &LogicalPlan = subquery_alias.input.as_ref();
-
-    let LogicalPlan::Projection(outer_projections) = plan else {
-        return (plan, vec![]);
-    };
-
-    // check if it's projection inside projection
-    let LogicalPlan::Projection(inner_projection) = outer_projections.input.as_ref()
-    else {
-        return (plan, vec![]);
-    };
-
-    let mut columns: Vec<Ident> = vec![];
-    // check if the inner projection and outer projection have a matching pattern like
-    //     Projection: j1.j1_id AS id
-    //       Projection: j1.j1_id
-    for (i, inner_expr) in inner_projection.expr.iter().enumerate() {
-        let Expr::Alias(ref outer_alias) = &outer_projections.expr[i] else {
-            return (plan, vec![]);
-        };
-
-        if outer_alias.expr.as_ref() != inner_expr {
-            return (plan, vec![]);
-        };
-
-        columns.push(outer_alias.name.as_str().into());
-    }
-
-    (outer_projections.input.as_ref(), columns)
-}
-
 impl From<BuilderError> for DataFusionError {
     fn from(e: BuilderError) -> Self {
         DataFusionError::External(Box::new(e))
diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
index fba95ad48f32f..f6725485f9208 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -25,6 +25,7 @@ use datafusion_common::{
     Result,
 };
 use datafusion_expr::{Expr, LogicalPlan, Projection, Sort};
+use sqlparser::ast::Ident;
 
 /// Normalize the schema of a union plan to remove qualifiers from the schema fields and sort expressions.
 ///
@@ -137,14 +138,25 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
     let inner_exprs = inner_p
         .expr
         .iter()
-        .map(|f| {
-            if let Expr::Alias(alias) = f {
+        .enumerate()
+        .map(|(i, f)| match f {
+            Expr::Alias(alias) => {
                 let a = Expr::Column(alias.name.clone().into());
                 map.insert(a.clone(), f.clone());
                 a
-            } else {
+            }
+            Expr::Column(_) => {
+                map.insert(
+                    Expr::Column(inner_p.schema.field(i).name().into()),
+                    f.clone(),
+                );
                 f.clone()
             }
+            _ => {
+                let a = Expr::Column(inner_p.schema.field(i).name().into());
+                map.insert(a.clone(), f.clone());
+                a
+            }
         })
         .collect::<Vec<_>>();
 
@@ -155,9 +167,17 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
         }
     }
 
-    if collects.iter().collect::<HashSet<_>>()
-        == inner_exprs.iter().collect::<HashSet<_>>()
-    {
+    // Compare outer collects Expr::to_string with inner collected transformed values
+    // alias -> alias column
+    // column -> remain
+    // others, extract schema field name
+    let outer_collects = collects.iter().map(Expr::to_string).collect::<HashSet<_>>();
+    let inner_collects = inner_exprs
+        .iter()
+        .map(Expr::to_string)
+        .collect::<HashSet<_>>();
+
+    if outer_collects == inner_collects {
         let mut sort = sort.clone();
         let mut inner_p = inner_p.clone();
 
@@ -175,3 +195,80 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
         None
     }
 }
+
+// This logic is to work out the columns and inner query for SubqueryAlias plan for both types of
+// subquery
+// - `(SELECT column_a as a from table) AS A`
+// - `(SELECT column_a from table) AS A (a)`
+//
+// A roundtrip example for table alias with columns
+//
+// query: SELECT id FROM (SELECT j1_id from j1) AS c (id)
+//
+// LogicPlan:
+// Projection: c.id
+//   SubqueryAlias: c
+//     Projection: j1.j1_id AS id
+//       Projection: j1.j1_id
+//         TableScan: j1
+//
+// Before introducing this logic, the unparsed query would be `SELECT c.id FROM (SELECT j1.j1_id AS
+// id FROM (SELECT j1.j1_id FROM j1)) AS c`.
+// The query is invalid as `j1.j1_id` is not a valid identifier in the derived table
+// `(SELECT j1.j1_id FROM j1)`
+//
+// With this logic, the unparsed query will be:
+// `SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)`
+//
+// Caveat: this won't handle the case like `select * from (select 1, 2) AS a (b, c)`
+// as the parser gives a wrong plan which has mismatch `Int(1)` types: Literal and
+// Column in the Projections. Once the parser side is fixed, this logic should work
+pub(super) fn subquery_alias_inner_query_and_columns(
+    subquery_alias: &datafusion_expr::SubqueryAlias,
+) -> (&LogicalPlan, Vec<Ident>) {
+    let plan: &LogicalPlan = subquery_alias.input.as_ref();
+
+    let LogicalPlan::Projection(outer_projections) = plan else {
+        return (plan, vec![]);
+    };
+
+    // check if it's projection inside projection
+    let Some(inner_projection) = find_projection(outer_projections.input.as_ref()) else {
+        return (plan, vec![]);
+    };
+
+    let mut columns: Vec<Ident> = vec![];
+    // check if the inner projection and outer projection have a matching pattern like
+    //     Projection: j1.j1_id AS id
+    //       Projection: j1.j1_id
+    for (i, inner_expr) in inner_projection.expr.iter().enumerate() {
+        let Expr::Alias(ref outer_alias) = &outer_projections.expr[i] else {
+            return (plan, vec![]);
+        };
+
+        // inner projection schema fields store the projection name which is used in outer
+        // projection expr
+        let inner_expr_string = match inner_expr {
+            Expr::Column(_) => inner_expr.to_string(),
+            _ => inner_projection.schema.field(i).name().clone(),
+        };
+
+        if outer_alias.expr.to_string() != inner_expr_string {
+            return (plan, vec![]);
+        };
+
+        columns.push(outer_alias.name.as_str().into());
+    }
+
+    (outer_projections.input.as_ref(), columns)
+}
+
+fn find_projection(logical_plan: &LogicalPlan) -> Option<&Projection> {
+    match logical_plan {
+        LogicalPlan::Projection(p) => Some(p),
+        LogicalPlan::Limit(p) => find_projection(p.input.as_ref()),
+        LogicalPlan::Distinct(p) => find_projection(p.input().as_ref()),
+        LogicalPlan::Sort(p) => find_projection(p.input.as_ref()),
+        _ => None,
+    }
+}
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 2ac3034873363..9bbdbe8dbfc91 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -373,6 +373,38 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
             parser_dialect: Box::new(GenericDialect {}),
             unparser_dialect: Box::new(UnparserDefaultDialect {}),
         },
+        // Test query that has calculation in derived table with columns
+        TestStatementWithDialect {
+            sql: "SELECT id FROM (SELECT j1_id + 1 * 3 from j1) AS c (id)",
+            expected: r#"SELECT c.id FROM (SELECT (j1.j1_id + (1 * 3)) FROM j1) AS c (id)"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        // Test query that has limit/distinct/order in derived table with columns
+        TestStatementWithDialect {
+            sql: "SELECT id FROM (SELECT distinct (j1_id + 1 * 3) FROM j1 LIMIT 1) AS c (id)",
+            expected: r#"SELECT c.id FROM (SELECT DISTINCT (j1.j1_id + (1 * 3)) FROM j1 LIMIT 1) AS c (id)"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        TestStatementWithDialect {
+            sql: "SELECT id FROM (SELECT j1_id + 1 FROM j1 ORDER BY j1_id DESC LIMIT 1) AS c (id)",
+            expected: r#"SELECT c.id FROM (SELECT (j1.j1_id + 1) FROM j1 ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 1) AS c (id)"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        TestStatementWithDialect {
+            sql: "SELECT id FROM (SELECT CAST((CAST(j1_id as BIGINT) + 1) as int) * 10 FROM j1 LIMIT 1) AS c (id)",
+            expected: r#"SELECT c.id FROM (SELECT (CAST((CAST(j1.j1_id AS BIGINT) + 1) AS INTEGER) * 10) FROM j1 LIMIT 1) AS c (id)"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        },
+        TestStatementWithDialect {
+            sql: "SELECT id FROM (SELECT CAST(j1_id as BIGINT) + 1 FROM j1 ORDER BY j1_id LIMIT 1) AS c (id)",
+            expected: r#"SELECT c.id FROM (SELECT (CAST(j1.j1_id AS BIGINT) + 1) FROM j1 ORDER BY j1.j1_id ASC NULLS LAST LIMIT 1) AS c (id)"#,
+            parser_dialect: Box::new(GenericDialect {}),
+            unparser_dialect: Box::new(UnparserDefaultDialect {}),
+        }
     ];
 
     for query in tests {

From 1c9583ab95310fb1afa93fec88432ed6536da749 Mon Sep 17 00:00:00 2001
From: Xiangpeng Hao <haoxiangpeng123@gmail.com>
Date: Fri, 9 Aug 2024 04:39:44 +0800
Subject: [PATCH 253/357] Avoid unecessary copy when reading arrow files
 (#11840)

* avoid copy

* fmt

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../core/src/datasource/physical_plan/arrow_file.rs   | 11 +++++++++--
 datafusion/proto-common/src/from_proto/mod.rs         |  4 ++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index a1ee6fbe13412..b4edc221c1f83 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -31,6 +31,7 @@ use crate::physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
 };
 
+use arrow::buffer::Buffer;
 use arrow_ipc::reader::FileDecoder;
 use arrow_schema::SchemaRef;
 use datafusion_common::config::ConfigOptions;
@@ -296,7 +297,10 @@ impl FileOpener for ArrowOpener {
                     for (dict_block, dict_result) in
                         footer.dictionaries().iter().flatten().zip(dict_results)
                     {
-                        decoder.read_dictionary(dict_block, &dict_result.into())?;
+                        decoder.read_dictionary(
+                            dict_block,
+                            &Buffer::from_bytes(dict_result.into()),
+                        )?;
                     }
 
                     // filter recordbatches according to range
@@ -332,7 +336,10 @@ impl FileOpener for ArrowOpener {
                             .zip(recordbatch_results)
                             .filter_map(move |(block, data)| {
                                 decoder
-                                    .read_record_batch(&block, &data.into())
+                                    .read_record_batch(
+                                        &block,
+                                        &Buffer::from_bytes(data.into()),
+                                    )
                                     .transpose()
                             }),
                     )
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index 3487f43ae24e9..feb4c11aa8091 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -408,7 +408,7 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
                         "Error IPC message while deserializing ScalarValue::List: {e}"
                     ))
                 })?;
-                let buffer = Buffer::from(arrow_data);
+                let buffer = Buffer::from(arrow_data.as_slice());
 
                 let ipc_batch = message.header_as_record_batch().ok_or_else(|| {
                     Error::General(
@@ -423,7 +423,7 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
                             "Error IPC message while deserializing ScalarValue::List dictionary message: {e}"
                         ))
                     })?;
-                    let buffer = Buffer::from(arrow_data);
+                    let buffer = Buffer::from(arrow_data.as_slice());
 
                     let dict_batch = message.header_as_dictionary_batch().ok_or_else(|| {
                         Error::General(

From 56be714ecaf0b0271d1c8a64cbc41d1dbc972228 Mon Sep 17 00:00:00 2001
From: Xin Li <33629085+xinlifoobar@users.noreply.github.com>
Date: Thu, 8 Aug 2024 13:40:06 -0700
Subject: [PATCH 254/357] Support NULL literal in Min/Max (#11812)

* Support NULL literal in Min/Max

* Fix ut

* fix fmt

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/core/src/dataframe/mod.rs             | 5 ++++-
 datafusion/core/tests/dataframe/describe.rs      | 4 ++--
 datafusion/functions-aggregate/src/min_max.rs    | 2 ++
 datafusion/sqllogictest/test_files/aggregate.slt | 6 ++++++
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index cc1a63cc05f7a..5fa65cb0da428 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -717,7 +717,10 @@ impl DataFrame {
                             {
                                 let column =
                                     batchs[0].column_by_name(field.name()).unwrap();
-                                if field.data_type().is_numeric() {
+
+                                if column.data_type().is_null() {
+                                    Arc::new(StringArray::from(vec!["null"]))
+                                } else if field.data_type().is_numeric() {
                                     cast(column, &DataType::Float64)?
                                 } else {
                                     cast(column, &DataType::Utf8)?
diff --git a/datafusion/core/tests/dataframe/describe.rs b/datafusion/core/tests/dataframe/describe.rs
index e446d71473be1..9321481efbd2e 100644
--- a/datafusion/core/tests/dataframe/describe.rs
+++ b/datafusion/core/tests/dataframe/describe.rs
@@ -102,8 +102,8 @@ async fn describe_null() -> Result<()> {
         "| null_count | 0    | 1    |",
         "| mean       | null | null |",
         "| std        | null | null |",
-        "| min        | null | null |",
-        "| max        | null | null |",
+        "| min        | a    | null |",
+        "| max        | a    | null |",
         "| median     | null | null |",
         "+------------+------+------+"
     ];
diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
index 18028e358b211..f19d6d767ba11 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -304,6 +304,7 @@ macro_rules! typed_min_max_batch {
 macro_rules! min_max_batch {
     ($VALUES:expr, $OP:ident) => {{
         match $VALUES.data_type() {
+            DataType::Null => ScalarValue::Null,
             DataType::Decimal128(precision, scale) => {
                 typed_min_max_batch!(
                     $VALUES,
@@ -579,6 +580,7 @@ macro_rules! interval_min_max {
 macro_rules! min_max {
     ($VALUE:expr, $DELTA:expr, $OP:ident) => {{
         Ok(match ($VALUE, $DELTA) {
+            (ScalarValue::Null, ScalarValue::Null) => ScalarValue::Null,
             (
                 lhs @ ScalarValue::Decimal128(lhsv, lhsp, lhss),
                 rhs @ ScalarValue::Decimal128(rhsv, rhsp, rhss)
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 8a5222143356f..c68a6c345caa9 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -5548,3 +5548,9 @@ set datafusion.explain.logical_plan_only = false;
 
 statement ok
 drop table employee_csv;
+
+# test null literal handling in supported aggregate functions
+query I??III?T
+select count(null), min(null), max(null), bit_and(NULL), bit_or(NULL), bit_xor(NULL), nth_value(NULL, 1), string_agg(NULL, ',');
+----
+0 NULL NULL NULL NULL NULL NULL NULL
\ No newline at end of file

From 20fbd88521a9687e0bf2db6c68d83955fb46e723 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Thu, 8 Aug 2024 22:40:30 +0200
Subject: [PATCH 255/357] Remove many `crate::` imports in listing table
 provider module (#11887)

This is part of isolating this module in order to be able to move it out
of core. This commit attempts to replace all `crate::` imports that are
possible to avoid today (i.e. which would be replaced when listing table
provider was moved to separate crate), leaving those that cannot be
replaced. This makes it easy to notice the remaining coupling between
the listing table provider module and the core.
---
 .../core/src/datasource/listing/helpers.rs    |  8 +++---
 datafusion/core/src/datasource/listing/mod.rs |  4 +--
 .../core/src/datasource/listing/table.rs      | 25 ++++++++-----------
 datafusion/core/src/datasource/listing/url.rs |  2 +-
 4 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs
index 67af8ef12c8b5..b5dd2dd12e10a 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -21,11 +21,11 @@ use std::collections::HashMap;
 use std::mem;
 use std::sync::Arc;
 
+use super::ListingTableUrl;
 use super::PartitionedFile;
-use crate::datasource::listing::ListingTableUrl;
 use crate::execution::context::SessionState;
-use crate::logical_expr::{BinaryExpr, Operator};
-use crate::{error::Result, scalar::ScalarValue};
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::{BinaryExpr, Operator};
 
 use arrow::{
     array::{Array, ArrayRef, AsArray, StringBuilder},
@@ -518,8 +518,8 @@ mod tests {
 
     use futures::StreamExt;
 
-    use crate::logical_expr::{case, col, lit, Expr};
     use crate::test::object_store::make_test_store_and_state;
+    use datafusion_expr::{case, col, lit, Expr};
 
     use super::*;
 
diff --git a/datafusion/core/src/datasource/listing/mod.rs b/datafusion/core/src/datasource/listing/mod.rs
index 21a60614cff2d..c5a441aacf1d2 100644
--- a/datafusion/core/src/datasource/listing/mod.rs
+++ b/datafusion/core/src/datasource/listing/mod.rs
@@ -22,8 +22,8 @@ mod helpers;
 mod table;
 mod url;
 
-use crate::error::Result;
 use chrono::TimeZone;
+use datafusion_common::Result;
 use datafusion_common::{ScalarValue, Statistics};
 use futures::Stream;
 use object_store::{path::Path, ObjectMeta};
@@ -162,7 +162,7 @@ impl From<ObjectMeta> for PartitionedFile {
 
 #[cfg(test)]
 mod tests {
-    use crate::datasource::listing::ListingTableUrl;
+    use super::ListingTableUrl;
     use datafusion_execution::object_store::{
         DefaultObjectStoreRegistry, ObjectStoreRegistry,
     };
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index bb86ac3ae4161..89066d8234acc 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -24,19 +24,18 @@ use std::{any::Any, sync::Arc};
 use super::helpers::{expr_applicable_for_cols, pruned_partition_list, split_files};
 use super::PartitionedFile;
 
-use crate::catalog::TableProvider;
-use crate::datasource::{create_ordering, get_statistics_with_limit, TableType};
+use super::ListingTableUrl;
+use crate::datasource::{create_ordering, get_statistics_with_limit};
 use crate::datasource::{
     file_format::{file_compression_type::FileCompressionType, FileFormat},
-    listing::ListingTableUrl,
     physical_plan::{FileScanConfig, FileSinkConfig},
 };
-use crate::{
-    error::{DataFusionError, Result},
-    execution::context::SessionState,
-    logical_expr::{utils::conjunction, Expr, TableProviderFilterPushDown},
-    physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics},
-};
+use crate::execution::context::SessionState;
+use datafusion_catalog::TableProvider;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::TableType;
+use datafusion_expr::{utils::conjunction, Expr, TableProviderFilterPushDown};
+use datafusion_physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics};
 
 use arrow::datatypes::{DataType, Field, SchemaBuilder, SchemaRef};
 use arrow_schema::Schema;
@@ -1051,12 +1050,12 @@ mod tests {
     use crate::datasource::file_format::parquet::ParquetFormat;
     use crate::datasource::{provider_as_source, MemTable};
     use crate::execution::options::ArrowReadOptions;
-    use crate::physical_plan::collect;
     use crate::prelude::*;
     use crate::{
         assert_batches_eq,
         test::{columns, object_store::register_test_store},
     };
+    use datafusion_physical_plan::collect;
 
     use arrow::record_batch::RecordBatch;
     use arrow_schema::SortOptions;
@@ -1154,10 +1153,8 @@ mod tests {
         let options = ListingOptions::new(Arc::new(ParquetFormat::default()));
         let schema = options.infer_schema(&state, &table_path).await.unwrap();
 
-        use crate::{
-            datasource::file_format::parquet::ParquetFormat,
-            physical_plan::expressions::col as physical_col,
-        };
+        use crate::datasource::file_format::parquet::ParquetFormat;
+        use datafusion_physical_plan::expressions::col as physical_col;
         use std::ops::Add;
 
         // (file_sort_order, expected_result)
diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs
index 7566df628ed71..1701707fdb726 100644
--- a/datafusion/core/src/datasource/listing/url.rs
+++ b/datafusion/core/src/datasource/listing/url.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::datasource::object_store::ObjectStoreUrl;
 use crate::execution::context::SessionState;
 use datafusion_common::{DataFusionError, Result};
+use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_optimizer::OptimizerConfig;
 use futures::stream::BoxStream;
 use futures::{StreamExt, TryStreamExt};

From 56f8e3527b78282511e51701223971f04244c8cf Mon Sep 17 00:00:00 2001
From: Michael J Ward <Michael-J-Ward@users.noreply.github.com>
Date: Thu, 8 Aug 2024 15:41:27 -0500
Subject: [PATCH 256/357] feat: expose centroids in approx_percentile_cont
 fluent api (#11878)

* feat: expose centroids in approx_percentile_count fluent api

Closes https://github.com/apache/datafusion/issues/11877

* avoid repeated import prefix in function signature

* update test_fn_approx_percentile_cont so that adjusting centroids changes the result
---
 .../tests/dataframe/dataframe_functions.rs    | 19 ++++++++++++++--
 .../src/approx_percentile_cont.rs             | 22 +++++++++++++------
 .../tests/cases/roundtrip_logical_plan.rs     |  3 ++-
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index 7a0e9888a61c1..1bd90fce839d0 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -360,7 +360,7 @@ async fn test_fn_approx_median() -> Result<()> {
 
 #[tokio::test]
 async fn test_fn_approx_percentile_cont() -> Result<()> {
-    let expr = approx_percentile_cont(col("b"), lit(0.5));
+    let expr = approx_percentile_cont(col("b"), lit(0.5), None);
 
     let expected = [
         "+---------------------------------------------+",
@@ -381,7 +381,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
         None::<&str>,
         "arg_2".to_string(),
     ));
-    let expr = approx_percentile_cont(col("b"), alias_expr);
+    let expr = approx_percentile_cont(col("b"), alias_expr, None);
     let df = create_test_table().await?;
     let expected = [
         "+--------------------------------------+",
@@ -394,6 +394,21 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
 
     assert_batches_eq!(expected, &batches);
 
+    // with number of centroids set
+    let expr = approx_percentile_cont(col("b"), lit(0.5), Some(lit(2)));
+    let expected = [
+        "+------------------------------------------------------+",
+        "| approx_percentile_cont(test.b,Float64(0.5),Int32(2)) |",
+        "+------------------------------------------------------+",
+        "| 30                                                   |",
+        "+------------------------------------------------------+",
+    ];
+
+    let df = create_test_table().await?;
+    let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
+
+    assert_batches_eq!(expected, &batches);
+
     Ok(())
 }
 
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index af2a26fd05ece..ffa623c13b0bf 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -46,13 +46,21 @@ use datafusion_physical_expr_common::aggregate::tdigest::{
 };
 use datafusion_physical_expr_common::utils::limited_convert_logical_expr_to_physical_expr_with_dfschema;
 
-make_udaf_expr_and_func!(
-    ApproxPercentileCont,
-    approx_percentile_cont,
-    expression percentile,
-    "Computes the approximate percentile continuous of a set of numbers",
-    approx_percentile_cont_udaf
-);
+create_func!(ApproxPercentileCont, approx_percentile_cont_udaf);
+
+/// Computes the approximate percentile continuous of a set of numbers
+pub fn approx_percentile_cont(
+    expression: Expr,
+    percentile: Expr,
+    centroids: Option<Expr>,
+) -> Expr {
+    let args = if let Some(centroids) = centroids {
+        vec![expression, percentile, centroids]
+    } else {
+        vec![expression, percentile]
+    };
+    approx_percentile_cont_udaf().call(args)
+}
 
 pub struct ApproxPercentileCont {
     signature: Signature,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index e5c226418441a..a18fa03b2d151 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -887,7 +887,8 @@ async fn roundtrip_expr_api() -> Result<()> {
         stddev_pop(lit(2.2)),
         approx_distinct(lit(2)),
         approx_median(lit(2)),
-        approx_percentile_cont(lit(2), lit(0.5)),
+        approx_percentile_cont(lit(2), lit(0.5), None),
+        approx_percentile_cont(lit(2), lit(0.5), Some(lit(50))),
         approx_percentile_cont_with_weight(lit(2), lit(1), lit(0.5)),
         grouping(lit(1)),
         bit_and(lit(2)),

From 7c41323fa46963a6889adafd5d4a8e43c69291b2 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Fri, 9 Aug 2024 09:00:02 +0800
Subject: [PATCH 257/357] Rename  `Expr::display_name` to  `Expr::schema_name`,
 make `UNNEST` naming conform to convention (#11797)

* introduce schema_name

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* reuse for simple case

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* unnest + udf

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add display name for udf

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix name in udf

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rename unnest

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm column

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* inlis

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* udaf

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* case

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* use write

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix window

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* window

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* like and similar to

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm display name

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cliip

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* display

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix conflict

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix merge conflict

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* with display

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* function for exprs

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* clippy

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/core/src/physical_planner.rs       |   4 +-
 datafusion/expr/src/expr.rs                   | 617 +++++++++---------
 datafusion/expr/src/expr_rewriter/mod.rs      |  16 +-
 datafusion/expr/src/expr_rewriter/order_by.rs |   4 +-
 datafusion/expr/src/expr_schema.rs            |   2 +-
 datafusion/expr/src/logical_plan/builder.rs   |  10 +-
 datafusion/expr/src/logical_plan/plan.rs      |   4 +-
 datafusion/expr/src/udf.rs                    |  24 +-
 datafusion/expr/src/utils.rs                  |   4 +-
 datafusion/functions-nested/src/expr_ext.rs   |   4 +-
 datafusion/functions-nested/src/extract.rs    |  51 +-
 datafusion/functions-nested/src/utils.rs      |   7 +-
 datafusion/functions/src/core/expr_ext.rs     |   2 +-
 datafusion/functions/src/core/getfield.rs     |  22 +-
 .../optimizer/src/analyzer/type_coercion.rs   |  14 +-
 .../optimizer/src/common_subexpr_eliminate.rs |   2 +-
 datafusion/optimizer/src/decorrelate.rs       |   9 +-
 .../optimizer/src/optimize_projections/mod.rs |   8 +-
 datafusion/optimizer/src/push_down_filter.rs  |   4 +-
 .../optimizer/src/scalar_subquery_to_join.rs  |   4 +-
 .../src/single_distinct_to_groupby.rs         |   2 +-
 datafusion/proto/tests/cases/serialize.rs     |   2 +-
 datafusion/sql/src/unparser/utils.rs          |   2 +-
 datafusion/sql/src/utils.rs                   |  26 +-
 datafusion/sql/tests/cases/plan_to_sql.rs     |   8 +-
 datafusion/sqllogictest/test_files/array.slt  |   4 +-
 .../test_files/push_down_filter.slt           |  40 +-
 datafusion/sqllogictest/test_files/unnest.slt |  18 +-
 .../substrait/src/logical_plan/consumer.rs    |   2 +-
 .../tests/cases/consumer_integration.rs       |   4 +-
 30 files changed, 494 insertions(+), 426 deletions(-)

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 58b02c08e34cd..ab0765ac0deb7 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -1529,7 +1529,7 @@ pub fn create_window_expr(
     // unpack aliased logical expressions, e.g. "sum(col) over () as total"
     let (name, e) = match e {
         Expr::Alias(Alias { expr, name, .. }) => (name.clone(), expr.as_ref()),
-        _ => (e.display_name()?, e),
+        _ => (e.schema_name().to_string(), e),
     };
     create_window_expr_with_name(e, name, logical_schema, execution_props)
 }
@@ -1620,7 +1620,7 @@ pub fn create_aggregate_expr_and_maybe_filter(
     // unpack (nested) aliased logical expressions, e.g. "sum(col) as total"
     let (name, e) = match e {
         Expr::Alias(Alias { expr, name, .. }) => (Some(name.clone()), expr.as_ref()),
-        Expr::AggregateFunction(_) => (e.display_name().ok(), e),
+        Expr::AggregateFunction(_) => (Some(e.schema_name().to_string()), e),
         _ => (None, e),
     };
 
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index edf45a244e1f2..5030a95d3c8ab 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -983,10 +983,35 @@ impl PartialOrd for Expr {
 }
 
 impl Expr {
-    /// Returns the name of this expression as it should appear in a schema. This name
-    /// will not include any CAST expressions.
+    #[deprecated(since = "40.0.0", note = "use schema_name instead")]
     pub fn display_name(&self) -> Result<String> {
-        create_name(self)
+        Ok(self.schema_name().to_string())
+    }
+
+    /// The name of the column (field) that this `Expr` will produce.
+    ///
+    /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
+    /// [`Schema`] will have a field with this name.
+    ///
+    /// Note that the resulting string is subtlety different than the `Display`
+    /// representation for certain `Expr`. Some differences:
+    ///
+    /// 1. [`Expr::Alias`], which shows only the alias itself
+    /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression
+    ///
+    /// # Example
+    /// ```
+    /// # use datafusion_expr::{col, lit};
+    /// let expr = col("foo").eq(lit(42));
+    /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string());
+    ///
+    /// let expr = col("foo").alias("bar").eq(lit(11));
+    /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string());
+    /// ```
+    ///
+    /// [`Schema`]: arrow::datatypes::Schema
+    pub fn schema_name(&self) -> impl Display + '_ {
+        SchemaDisplay(self)
     }
 
     /// Returns a full and complete string representation of this expression.
@@ -1119,7 +1144,7 @@ impl Expr {
         match self {
             // call Expr::display_name() on a Expr::Sort will throw an error
             Expr::Sort(Sort { expr, .. }) => expr.name_for_alias(),
-            expr => expr.display_name(),
+            expr => Ok(expr.schema_name().to_string()),
         }
     }
 
@@ -1127,7 +1152,6 @@ impl Expr {
     /// alias if necessary.
     pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
         let new_name = self.name_for_alias()?;
-
         if new_name == original_name {
             return Ok(self);
         }
@@ -1749,6 +1773,287 @@ macro_rules! expr_vec_fmt {
     }};
 }
 
+struct SchemaDisplay<'a>(&'a Expr);
+impl<'a> Display for SchemaDisplay<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self.0 {
+            // The same as Display
+            Expr::Column(_)
+            | Expr::Literal(_)
+            | Expr::ScalarVariable(..)
+            | Expr::Sort(_)
+            | Expr::OuterReferenceColumn(..)
+            | Expr::Placeholder(_)
+            | Expr::Wildcard { .. } => write!(f, "{}", self.0),
+
+            Expr::AggregateFunction(AggregateFunction {
+                func,
+                args,
+                distinct,
+                filter,
+                order_by,
+                null_treatment,
+            }) => {
+                write!(
+                    f,
+                    "{}({}{})",
+                    func.name(),
+                    if *distinct { "DISTINCT " } else { "" },
+                    schema_name_from_exprs_comma_seperated_without_space(args)?
+                )?;
+
+                if let Some(null_treatment) = null_treatment {
+                    write!(f, " {}", null_treatment)?;
+                }
+
+                if let Some(filter) = filter {
+                    write!(f, " FILTER (WHERE {filter})")?;
+                };
+
+                if let Some(order_by) = order_by {
+                    write!(f, " ORDER BY [{}]", schema_name_from_exprs(order_by)?)?;
+                };
+
+                Ok(())
+            }
+            // expr is not shown since it is aliased
+            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
+            Expr::Between(Between {
+                expr,
+                negated,
+                low,
+                high,
+            }) => {
+                if *negated {
+                    write!(
+                        f,
+                        "{} NOT BETWEEN {} AND {}",
+                        SchemaDisplay(expr),
+                        SchemaDisplay(low),
+                        SchemaDisplay(high),
+                    )
+                } else {
+                    write!(
+                        f,
+                        "{} BETWEEN {} AND {}",
+                        SchemaDisplay(expr),
+                        SchemaDisplay(low),
+                        SchemaDisplay(high),
+                    )
+                }
+            }
+            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
+                write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),)
+            }
+            Expr::Case(Case {
+                expr,
+                when_then_expr,
+                else_expr,
+            }) => {
+                write!(f, "CASE ")?;
+
+                if let Some(e) = expr {
+                    write!(f, "{} ", SchemaDisplay(e))?;
+                }
+
+                for (when, then) in when_then_expr {
+                    write!(
+                        f,
+                        "WHEN {} THEN {} ",
+                        SchemaDisplay(when),
+                        SchemaDisplay(then),
+                    )?;
+                }
+
+                if let Some(e) = else_expr {
+                    write!(f, "ELSE {} ", SchemaDisplay(e))?;
+                }
+
+                write!(f, "END")
+            }
+            // cast expr is not shown to be consistant with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
+            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
+                write!(f, "{}", SchemaDisplay(expr))
+            }
+            Expr::InList(InList {
+                expr,
+                list,
+                negated,
+            }) => {
+                let inlist_name = schema_name_from_exprs(list)?;
+
+                if *negated {
+                    write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name)
+                } else {
+                    write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name)
+                }
+            }
+            Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"),
+            Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"),
+            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
+                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
+            }
+            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
+                write!(f, "GROUPING SETS (")?;
+                for exprs in lists_of_exprs.iter() {
+                    write!(f, "({})", schema_name_from_exprs(exprs)?)?;
+                }
+                write!(f, ")")
+            }
+            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
+                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
+            }
+            Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)),
+            Expr::IsNotNull(expr) => {
+                write!(f, "{} IS NOT NULL", SchemaDisplay(expr))
+            }
+            Expr::IsUnknown(expr) => {
+                write!(f, "{} IS UNKNOWN", SchemaDisplay(expr))
+            }
+            Expr::IsNotUnknown(expr) => {
+                write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr))
+            }
+            Expr::InSubquery(InSubquery { negated: true, .. }) => {
+                write!(f, "NOT IN")
+            }
+            Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"),
+            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)),
+            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)),
+            Expr::IsNotTrue(expr) => {
+                write!(f, "{} IS NOT TRUE", SchemaDisplay(expr))
+            }
+            Expr::IsNotFalse(expr) => {
+                write!(f, "{} IS NOT FALSE", SchemaDisplay(expr))
+            }
+            Expr::Like(Like {
+                negated,
+                expr,
+                pattern,
+                escape_char,
+                case_insensitive,
+            }) => {
+                write!(
+                    f,
+                    "{} {}{} {}",
+                    SchemaDisplay(expr),
+                    if *negated { "NOT " } else { "" },
+                    if *case_insensitive { "ILIKE" } else { "LIKE" },
+                    SchemaDisplay(pattern),
+                )?;
+
+                if let Some(char) = escape_char {
+                    write!(f, " CHAR '{char}'")?;
+                }
+
+                Ok(())
+            }
+            Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)),
+            Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)),
+            Expr::Unnest(Unnest { expr }) => {
+                write!(f, "UNNEST({})", SchemaDisplay(expr))
+            }
+            Expr::ScalarFunction(ScalarFunction { func, args }) => {
+                match func.schema_name(args) {
+                    Ok(name) => {
+                        write!(f, "{name}")
+                    }
+                    Err(e) => {
+                        write!(f, "got error from schema_name {}", e)
+                    }
+                }
+            }
+            Expr::ScalarSubquery(Subquery { subquery, .. }) => {
+                write!(f, "{}", subquery.schema().field(0).name())
+            }
+            Expr::SimilarTo(Like {
+                negated,
+                expr,
+                pattern,
+                escape_char,
+                ..
+            }) => {
+                write!(
+                    f,
+                    "{} {} {}",
+                    SchemaDisplay(expr),
+                    if *negated {
+                        "NOT SIMILAR TO"
+                    } else {
+                        "SIMILAR TO"
+                    },
+                    SchemaDisplay(pattern),
+                )?;
+                if let Some(char) = escape_char {
+                    write!(f, " CHAR '{char}'")?;
+                }
+
+                Ok(())
+            }
+            Expr::WindowFunction(WindowFunction {
+                fun,
+                args,
+                partition_by,
+                order_by,
+                window_frame,
+                null_treatment,
+            }) => {
+                write!(
+                    f,
+                    "{}({})",
+                    fun,
+                    schema_name_from_exprs_comma_seperated_without_space(args)?
+                )?;
+
+                if let Some(null_treatment) = null_treatment {
+                    write!(f, " {}", null_treatment)?;
+                }
+
+                if !partition_by.is_empty() {
+                    write!(
+                        f,
+                        " PARTITION BY [{}]",
+                        schema_name_from_exprs(partition_by)?
+                    )?;
+                }
+
+                if !order_by.is_empty() {
+                    write!(f, " ORDER BY [{}]", schema_name_from_exprs(order_by)?)?;
+                };
+
+                write!(f, " {window_frame}")
+            }
+        }
+    }
+}
+
+/// Get schema_name for Vector of expressions
+///
+/// Internal usage. Please call `schema_name_from_exprs` instead
+// TODO: Use ", " to standardize the formatting of Vec<Expr>,
+// <https://github.com/apache/datafusion/issues/10364>
+pub(crate) fn schema_name_from_exprs_comma_seperated_without_space(
+    exprs: &[Expr],
+) -> Result<String, fmt::Error> {
+    schema_name_from_exprs_inner(exprs, ",")
+}
+
+/// Get schema_name for Vector of expressions
+pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
+    schema_name_from_exprs_inner(exprs, ", ")
+}
+
+fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> {
+    let mut s = String::new();
+    for (i, e) in exprs.iter().enumerate() {
+        if i > 0 {
+            write!(&mut s, "{sep}")?;
+        }
+        write!(&mut s, "{}", SchemaDisplay(e))?;
+    }
+
+    Ok(s)
+}
+
 /// Format expressions for display as part of a logical plan. In many cases, this will produce
 /// similar output to `Expr.name()` except that column names will be prefixed with '#'.
 impl fmt::Display for Expr {
@@ -1827,6 +2132,10 @@ impl fmt::Display for Expr {
             Expr::ScalarFunction(fun) => {
                 fmt_function(f, fun.name(), false, &fun.args, true)
             }
+            // TODO: use udf's display_name, need to fix the seperator issue, <https://github.com/apache/datafusion/issues/10364>
+            // Expr::ScalarFunction(ScalarFunction { func, args }) => {
+            //     write!(f, "{}", func.display_name(args).unwrap())
+            // }
             Expr::WindowFunction(WindowFunction {
                 fun,
                 args,
@@ -1961,6 +2270,7 @@ impl fmt::Display for Expr {
             },
             Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
             Expr::Unnest(Unnest { expr }) => {
+                // TODO: use Display instead of Debug, there is non-unique expression name in projection issue.
                 write!(f, "UNNEST({expr:?})")
             }
         }
@@ -1979,7 +2289,6 @@ fn fmt_function(
         false => args.iter().map(|arg| format!("{arg:?}")).collect(),
     };
 
-    // let args: Vec<String> = args.iter().map(|arg| format!("{:?}", arg)).collect();
     let distinct_str = match distinct {
         true => "DISTINCT ",
         false => "",
@@ -1987,297 +2296,6 @@ fn fmt_function(
     write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
 }
 
-fn write_function_name<W: Write>(
-    w: &mut W,
-    fun: &str,
-    distinct: bool,
-    args: &[Expr],
-) -> Result<()> {
-    write!(w, "{}(", fun)?;
-    if distinct {
-        w.write_str("DISTINCT ")?;
-    }
-    write_names_join(w, args, ",")?;
-    w.write_str(")")?;
-    Ok(())
-}
-
-/// Returns a readable name of an expression based on the input schema.
-/// This function recursively transverses the expression for names such as "CAST(a > 2)".
-pub(crate) fn create_name(e: &Expr) -> Result<String> {
-    let mut s = String::new();
-    write_name(&mut s, e)?;
-    Ok(s)
-}
-
-fn write_name<W: Write>(w: &mut W, e: &Expr) -> Result<()> {
-    match e {
-        Expr::Alias(Alias { name, .. }) => write!(w, "{}", name)?,
-        Expr::Column(c) => write!(w, "{}", c.flat_name())?,
-        Expr::OuterReferenceColumn(_, c) => write!(w, "outer_ref({})", c.flat_name())?,
-        Expr::ScalarVariable(_, variable_names) => {
-            write!(w, "{}", variable_names.join("."))?
-        }
-        Expr::Literal(value) => write!(w, "{value:?}")?,
-        Expr::BinaryExpr(binary_expr) => {
-            write_name(w, binary_expr.left.as_ref())?;
-            write!(w, " {} ", binary_expr.op)?;
-            write_name(w, binary_expr.right.as_ref())?;
-        }
-        Expr::Like(Like {
-            negated,
-            expr,
-            pattern,
-            escape_char,
-            case_insensitive,
-        }) => {
-            write!(
-                w,
-                "{} {}{} {}",
-                expr,
-                if *negated { "NOT " } else { "" },
-                if *case_insensitive { "ILIKE" } else { "LIKE" },
-                pattern,
-            )?;
-            if let Some(char) = escape_char {
-                write!(w, " CHAR '{char}'")?;
-            }
-        }
-        Expr::SimilarTo(Like {
-            negated,
-            expr,
-            pattern,
-            escape_char,
-            case_insensitive: _,
-        }) => {
-            write!(
-                w,
-                "{} {} {}",
-                expr,
-                if *negated {
-                    "NOT SIMILAR TO"
-                } else {
-                    "SIMILAR TO"
-                },
-                pattern,
-            )?;
-            if let Some(char) = escape_char {
-                write!(w, " CHAR '{char}'")?;
-            }
-        }
-        Expr::Case(case) => {
-            write!(w, "CASE ")?;
-            if let Some(e) = &case.expr {
-                write_name(w, e)?;
-                w.write_str(" ")?;
-            }
-            for (when, then) in &case.when_then_expr {
-                w.write_str("WHEN ")?;
-                write_name(w, when)?;
-                w.write_str(" THEN ")?;
-                write_name(w, then)?;
-                w.write_str(" ")?;
-            }
-            if let Some(e) = &case.else_expr {
-                w.write_str("ELSE ")?;
-                write_name(w, e)?;
-                w.write_str(" ")?;
-            }
-            w.write_str("END")?;
-        }
-        Expr::Cast(Cast { expr, .. }) => {
-            // CAST does not change the expression name
-            write_name(w, expr)?;
-        }
-        Expr::TryCast(TryCast { expr, .. }) => {
-            // CAST does not change the expression name
-            write_name(w, expr)?;
-        }
-        Expr::Not(expr) => {
-            w.write_str("NOT ")?;
-            write_name(w, expr)?;
-        }
-        Expr::Negative(expr) => {
-            w.write_str("(- ")?;
-            write_name(w, expr)?;
-            w.write_str(")")?;
-        }
-        Expr::IsNull(expr) => {
-            write_name(w, expr)?;
-            w.write_str(" IS NULL")?;
-        }
-        Expr::IsNotNull(expr) => {
-            write_name(w, expr)?;
-            w.write_str(" IS NOT NULL")?;
-        }
-        Expr::IsTrue(expr) => {
-            write_name(w, expr)?;
-            w.write_str(" IS TRUE")?;
-        }
-        Expr::IsFalse(expr) => {
-            write_name(w, expr)?;
-            w.write_str(" IS FALSE")?;
-        }
-        Expr::IsUnknown(expr) => {
-            write_name(w, expr)?;
-            w.write_str(" IS UNKNOWN")?;
-        }
-        Expr::IsNotTrue(expr) => {
-            write_name(w, expr)?;
-            w.write_str(" IS NOT TRUE")?;
-        }
-        Expr::IsNotFalse(expr) => {
-            write_name(w, expr)?;
-            w.write_str(" IS NOT FALSE")?;
-        }
-        Expr::IsNotUnknown(expr) => {
-            write_name(w, expr)?;
-            w.write_str(" IS NOT UNKNOWN")?;
-        }
-        Expr::Exists(Exists { negated: true, .. }) => w.write_str("NOT EXISTS")?,
-        Expr::Exists(Exists { negated: false, .. }) => w.write_str("EXISTS")?,
-        Expr::InSubquery(InSubquery { negated: true, .. }) => w.write_str("NOT IN")?,
-        Expr::InSubquery(InSubquery { negated: false, .. }) => w.write_str("IN")?,
-        Expr::ScalarSubquery(subquery) => {
-            w.write_str(subquery.subquery.schema().field(0).name().as_str())?;
-        }
-        Expr::Unnest(Unnest { expr }) => {
-            w.write_str("unnest(")?;
-            write_name(w, expr)?;
-            w.write_str(")")?;
-        }
-        Expr::ScalarFunction(fun) => {
-            w.write_str(fun.func.display_name(&fun.args)?.as_str())?;
-        }
-        Expr::WindowFunction(WindowFunction {
-            fun,
-            args,
-            window_frame,
-            partition_by,
-            order_by,
-            null_treatment,
-        }) => {
-            write_function_name(w, &fun.to_string(), false, args)?;
-
-            if let Some(nt) = null_treatment {
-                w.write_str(" ")?;
-                write!(w, "{}", nt)?;
-            }
-            if !partition_by.is_empty() {
-                w.write_str(" ")?;
-                write!(w, "PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
-            }
-            if !order_by.is_empty() {
-                w.write_str(" ")?;
-                write!(w, "ORDER BY [{}]", expr_vec_fmt!(order_by))?;
-            }
-            w.write_str(" ")?;
-            write!(w, "{window_frame}")?;
-        }
-        Expr::AggregateFunction(AggregateFunction {
-            func,
-            distinct,
-            args,
-            filter,
-            order_by,
-            null_treatment,
-        }) => {
-            write_function_name(w, func.name(), *distinct, args)?;
-            if let Some(fe) = filter {
-                write!(w, " FILTER (WHERE {fe})")?;
-            };
-            if let Some(order_by) = order_by {
-                write!(w, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
-            };
-            if let Some(nt) = null_treatment {
-                write!(w, " {}", nt)?;
-            }
-        }
-        Expr::GroupingSet(grouping_set) => match grouping_set {
-            GroupingSet::Rollup(exprs) => {
-                write!(w, "ROLLUP (")?;
-                write_names(w, exprs.as_slice())?;
-                write!(w, ")")?;
-            }
-            GroupingSet::Cube(exprs) => {
-                write!(w, "CUBE (")?;
-                write_names(w, exprs.as_slice())?;
-                write!(w, ")")?;
-            }
-            GroupingSet::GroupingSets(lists_of_exprs) => {
-                write!(w, "GROUPING SETS (")?;
-                for (i, exprs) in lists_of_exprs.iter().enumerate() {
-                    if i != 0 {
-                        write!(w, ", ")?;
-                    }
-                    write!(w, "(")?;
-                    write_names(w, exprs.as_slice())?;
-                    write!(w, ")")?;
-                }
-                write!(w, ")")?;
-            }
-        },
-        Expr::InList(InList {
-            expr,
-            list,
-            negated,
-        }) => {
-            write_name(w, expr)?;
-            let list = list.iter().map(create_name);
-            if *negated {
-                write!(w, " NOT IN ({list:?})")?;
-            } else {
-                write!(w, " IN ({list:?})")?;
-            }
-        }
-        Expr::Between(Between {
-            expr,
-            negated,
-            low,
-            high,
-        }) => {
-            write_name(w, expr)?;
-            if *negated {
-                write!(w, " NOT BETWEEN ")?;
-            } else {
-                write!(w, " BETWEEN ")?;
-            }
-            write_name(w, low)?;
-            write!(w, " AND ")?;
-            write_name(w, high)?;
-        }
-        Expr::Sort { .. } => {
-            return internal_err!("Create name does not support sort expression")
-        }
-        Expr::Wildcard { qualifier } => match qualifier {
-            Some(qualifier) => {
-                return internal_err!(
-                    "Create name does not support qualified wildcard, got {qualifier}"
-                )
-            }
-            None => write!(w, "*")?,
-        },
-        Expr::Placeholder(Placeholder { id, .. }) => write!(w, "{}", id)?,
-    };
-    Ok(())
-}
-
-fn write_names<W: Write>(w: &mut W, exprs: &[Expr]) -> Result<()> {
-    exprs.iter().try_for_each(|e| write_name(w, e))
-}
-
-fn write_names_join<W: Write>(w: &mut W, exprs: &[Expr], sep: &str) -> Result<()> {
-    let mut iter = exprs.iter();
-    if let Some(first_arg) = iter.next() {
-        write_name(w, first_arg)?;
-    }
-    for a in iter {
-        w.write_str(sep)?;
-        write_name(w, a)?;
-    }
-    Ok(())
-}
-
 pub fn create_function_physical_name(
     fun: &str,
     distinct: bool,
@@ -2394,7 +2412,7 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
             let expr = create_physical_name(expr, false)?;
             Ok(format!("{expr} IS NOT UNKNOWN"))
         }
-        Expr::ScalarFunction(fun) => fun.func.display_name(&fun.args),
+        Expr::ScalarFunction(fun) => fun.func.schema_name(&fun.args),
         Expr::WindowFunction(WindowFunction {
             fun,
             args,
@@ -2552,7 +2570,6 @@ mod test {
         let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END";
         assert_eq!(expected, expr.canonical_name());
         assert_eq!(expected, format!("{expr}"));
-        assert_eq!(expected, expr.display_name()?);
         Ok(())
     }
 
@@ -2567,7 +2584,7 @@ mod test {
         assert_eq!(expected_canonical, format!("{expr}"));
         // note that CAST intentionally has a name that is different from its `Display`
         // representation. CAST does not change the name of expressions.
-        assert_eq!("Float32(1.23)", expr.display_name()?);
+        assert_eq!("Float32(1.23)", expr.schema_name().to_string());
         Ok(())
     }
 
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index bf2bfe2c39326..0dc41d4a9ac1a 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -173,7 +173,7 @@ pub fn create_col_from_scalar_expr(
             name,
         )),
         _ => {
-            let scalar_column = scalar_expr.display_name()?;
+            let scalar_column = scalar_expr.schema_name().to_string();
             Ok(Column::new(
                 Some::<TableReference>(subqry_alias.into()),
                 scalar_column,
@@ -475,16 +475,14 @@ mod test {
         let expr = rewrite_preserving_name(expr_from.clone(), &mut rewriter).unwrap();
 
         let original_name = match &expr_from {
-            Expr::Sort(Sort { expr, .. }) => expr.display_name(),
-            expr => expr.display_name(),
-        }
-        .unwrap();
+            Expr::Sort(Sort { expr, .. }) => expr.schema_name().to_string(),
+            expr => expr.schema_name().to_string(),
+        };
 
         let new_name = match &expr {
-            Expr::Sort(Sort { expr, .. }) => expr.display_name(),
-            expr => expr.display_name(),
-        }
-        .unwrap();
+            Expr::Sort(Sort { expr, .. }) => expr.schema_name().to_string(),
+            expr => expr.schema_name().to_string(),
+        };
 
         assert_eq!(
             original_name, new_name,
diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs
index 3d79caa21fde3..bbb855801c3ea 100644
--- a/datafusion/expr/src/expr_rewriter/order_by.rs
+++ b/datafusion/expr/src/expr_rewriter/order_by.rs
@@ -108,8 +108,8 @@ fn rewrite_in_terms_of_projection(
         };
 
         // expr is an actual expr like min(t.c2), but we are looking
-        // for a column with the same "min(C2)", so translate there
-        let name = normalized_expr.display_name()?;
+        // for a column with the same "MIN(C2)", so translate there
+        let name = normalized_expr.schema_name().to_string();
 
         let search_col = Expr::Column(Column {
             relation: None,
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 9faeb8aed506c..73123819ba99a 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -474,7 +474,7 @@ impl ExprSchemable for Expr {
                 let (data_type, nullable) = self.data_type_and_nullable(input_schema)?;
                 Ok((
                     None,
-                    Field::new(self.display_name()?, data_type, nullable)
+                    Field::new(self.schema_name().to_string(), data_type, nullable)
                         .with_metadata(self.metadata(input_schema)?)
                         .into(),
                 ))
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index aa2ea4ae1c266..4ef346656ff40 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1298,15 +1298,15 @@ fn add_group_by_exprs_from_dependencies(
     // c1 + 1` produces an output field named `"c1 + 1"`
     let mut group_by_field_names = group_expr
         .iter()
-        .map(|e| e.display_name())
-        .collect::<Result<Vec<_>>>()?;
+        .map(|e| e.schema_name().to_string())
+        .collect::<Vec<_>>();
 
     if let Some(target_indices) =
         get_target_functional_dependencies(schema, &group_by_field_names)
     {
         for idx in target_indices {
             let expr = Expr::Column(Column::from(schema.qualified_field(idx)));
-            let expr_name = expr.display_name()?;
+            let expr_name = expr.schema_name().to_string();
             if !group_by_field_names.contains(&expr_name) {
                 group_by_field_names.push(expr_name);
                 group_expr.push(expr);
@@ -1323,7 +1323,7 @@ pub(crate) fn validate_unique_names<'a>(
     let mut unique_names = HashMap::new();
 
     expressions.into_iter().enumerate().try_for_each(|(position, expr)| {
-        let name = expr.display_name()?;
+        let name = expr.schema_name().to_string();
         match unique_names.get(&name) {
             None => {
                 unique_names.insert(name, (position, expr));
@@ -1557,7 +1557,7 @@ pub fn wrap_projection_for_join_if_necessary(
             if let Some(col) = key.try_as_col() {
                 Ok(col.clone())
             } else {
-                let name = key.display_name()?;
+                let name = key.schema_name().to_string();
                 Ok(Column::from_name(name))
             }
         })
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 02176a506a251..c5538d8880a7f 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -2740,8 +2740,8 @@ fn calc_func_dependencies_for_aggregate(
     if !contains_grouping_set(group_expr) {
         let group_by_expr_names = group_expr
             .iter()
-            .map(|item| item.display_name())
-            .collect::<Result<Vec<_>>>()?;
+            .map(|item| item.schema_name().to_string())
+            .collect::<Vec<_>>();
         let aggregate_func_dependencies = aggregate_functional_dependencies(
             input.schema(),
             &group_by_expr_names,
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 5ba6e3007ead4..34b5909f0a5a3 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -26,7 +26,7 @@ use arrow::datatypes::DataType;
 
 use datafusion_common::{not_impl_err, ExprSchema, Result};
 
-use crate::expr::create_name;
+use crate::expr::schema_name_from_exprs_comma_seperated_without_space;
 use crate::interval_arithmetic::Interval;
 use crate::simplify::{ExprSimplifyResult, SimplifyInfo};
 use crate::sort_properties::{ExprProperties, SortProperties};
@@ -154,6 +154,13 @@ impl ScalarUDF {
         self.inner.display_name(args)
     }
 
+    /// Returns this function's schema_name.
+    ///
+    /// See [`ScalarUDFImpl::schema_name`] for more details
+    pub fn schema_name(&self, args: &[Expr]) -> Result<String> {
+        self.inner.schema_name(args)
+    }
+
     /// Returns the aliases for this function.
     ///
     /// See [`ScalarUDF::with_aliases`] for more details
@@ -345,12 +352,23 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
     fn name(&self) -> &str;
 
     /// Returns the user-defined display name of the UDF given the arguments
-    ///
     fn display_name(&self, args: &[Expr]) -> Result<String> {
-        let names: Vec<String> = args.iter().map(create_name).collect::<Result<_>>()?;
+        let names: Vec<String> = args.iter().map(ToString::to_string).collect();
+        // TODO: join with ", " to standardize the formatting of Vec<Expr>, <https://github.com/apache/datafusion/issues/10364>
         Ok(format!("{}({})", self.name(), names.join(",")))
     }
 
+    /// Returns the name of the column this expression would create
+    ///
+    /// See [`Expr::schema_name`] for details
+    fn schema_name(&self, args: &[Expr]) -> Result<String> {
+        Ok(format!(
+            "{}({})",
+            self.name(),
+            schema_name_from_exprs_comma_seperated_without_space(args)?
+        ))
+    }
+
     /// Returns the function's [`Signature`] for information about what input
     /// types are accepted and the function's Volatility.
     fn signature(&self) -> &Signature;
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 65a70b6732668..c3e4505ed19c7 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -798,7 +798,9 @@ pub fn expr_as_column_expr(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
             let (qualifier, field) = plan.schema().qualified_field_from_column(col)?;
             Ok(Expr::from(Column::from((qualifier, field))))
         }
-        _ => Ok(Expr::Column(Column::from_name(expr.display_name()?))),
+        _ => Ok(Expr::Column(Column::from_name(
+            expr.schema_name().to_string(),
+        ))),
     }
 }
 
diff --git a/datafusion/functions-nested/src/expr_ext.rs b/datafusion/functions-nested/src/expr_ext.rs
index 3524d62d0bc42..4da4a3f583b7c 100644
--- a/datafusion/functions-nested/src/expr_ext.rs
+++ b/datafusion/functions-nested/src/expr_ext.rs
@@ -38,7 +38,7 @@ use crate::extract::{array_element, array_slice};
 /// # use datafusion_functions_nested::expr_ext::IndexAccessor;
 /// let expr = col("c1")
 ///    .index(lit(3));
-/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]");
+/// assert_eq!(expr.schema_name().to_string(), "c1[Int32(3)]");
 /// ```
 pub trait IndexAccessor {
     fn index(self, key: Expr) -> Expr;
@@ -68,7 +68,7 @@ impl IndexAccessor for Expr {
 /// # use datafusion_functions_nested::expr_ext::SliceAccessor;
 /// let expr = col("c1")
 ///    .range(lit(2), lit(4));
-/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]");
+/// assert_eq!(expr.schema_name().to_string(), "c1[Int32(2):Int32(4)]");
 /// ```
 pub trait SliceAccessor {
     fn range(self, start: Expr, stop: Expr) -> Expr;
diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs
index af4e36926b687..b9e82f371369a 100644
--- a/datafusion/functions-nested/src/extract.rs
+++ b/datafusion/functions-nested/src/extract.rs
@@ -40,7 +40,7 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::utils::{get_arg_name, make_scalar_function};
+use crate::utils::make_scalar_function;
 
 // Create static instances of ScalarUDFs for each function
 make_udf_expr_and_func!(
@@ -97,11 +97,24 @@ impl ScalarUDFImpl for ArrayElement {
     }
 
     fn display_name(&self, args: &[Expr]) -> Result<String> {
-        Ok(format!(
-            "{}[{}]",
-            get_arg_name(args, 0),
-            get_arg_name(args, 1)
-        ))
+        let args_name = args.iter().map(ToString::to_string).collect::<Vec<_>>();
+        if args_name.len() != 2 {
+            return exec_err!("expect 2 args, got {}", args_name.len());
+        }
+
+        Ok(format!("{}[{}]", args_name[0], args_name[1]))
+    }
+
+    fn schema_name(&self, args: &[Expr]) -> Result<String> {
+        let args_name = args
+            .iter()
+            .map(|e| e.schema_name().to_string())
+            .collect::<Vec<_>>();
+        if args_name.len() != 2 {
+            return exec_err!("expect 2 args, got {}", args_name.len());
+        }
+
+        Ok(format!("{}[{}]", args_name[0], args_name[1]))
     }
 
     fn signature(&self) -> &Signature {
@@ -254,14 +267,24 @@ impl ScalarUDFImpl for ArraySlice {
     }
 
     fn display_name(&self, args: &[Expr]) -> Result<String> {
-        Ok(format!(
-            "{}[{}]",
-            get_arg_name(args, 0),
-            (1..args.len())
-                .map(|i| get_arg_name(args, i))
-                .collect::<Vec<String>>()
-                .join(":")
-        ))
+        let args_name = args.iter().map(ToString::to_string).collect::<Vec<_>>();
+        if let Some((arr, indexes)) = args_name.split_first() {
+            Ok(format!("{arr}[{}]", indexes.join(":")))
+        } else {
+            exec_err!("no argument")
+        }
+    }
+
+    fn schema_name(&self, args: &[Expr]) -> Result<String> {
+        let args_name = args
+            .iter()
+            .map(|e| e.schema_name().to_string())
+            .collect::<Vec<_>>();
+        if let Some((arr, indexes)) = args_name.split_first() {
+            Ok(format!("{arr}[{}]", indexes.join(":")))
+        } else {
+            exec_err!("no argument")
+        }
     }
 
     fn name(&self) -> &str {
diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs
index f396c3b22581c..688e1633e5cf6 100644
--- a/datafusion/functions-nested/src/utils.rs
+++ b/datafusion/functions-nested/src/utils.rs
@@ -32,7 +32,7 @@ use datafusion_common::{exec_err, plan_err, Result, ScalarValue};
 
 use core::any::type_name;
 use datafusion_common::DataFusionError;
-use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionImplementation};
+use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation};
 
 macro_rules! downcast_arg {
     ($ARG:expr, $ARRAY_TYPE:ident) => {{
@@ -253,11 +253,6 @@ pub(crate) fn compute_array_dims(
     }
 }
 
-/// Returns the name of the argument at index `i`, or an empty string if the index is out of bounds.
-pub(super) fn get_arg_name(args: &[Expr], i: usize) -> String {
-    args.get(i).map(ToString::to_string).unwrap_or_default()
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/functions/src/core/expr_ext.rs b/datafusion/functions/src/core/expr_ext.rs
index d80df0f334abe..af05f447f1c1e 100644
--- a/datafusion/functions/src/core/expr_ext.rs
+++ b/datafusion/functions/src/core/expr_ext.rs
@@ -41,7 +41,7 @@ use super::expr_fn::get_field;
 /// # use datafusion_functions::core::expr_ext::FieldAccessor;
 /// let expr = col("c1")
 ///    .field("my_field");
-/// assert_eq!(expr.display_name().unwrap(), "c1[my_field]");
+/// assert_eq!(expr.schema_name().to_string(), "c1[my_field]");
 /// ```
 pub trait FieldAccessor {
     fn field(self, name: impl Literal) -> Expr;
diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs
index 2c2e36b91b13a..a51f895c5084b 100644
--- a/datafusion/functions/src/core/getfield.rs
+++ b/datafusion/functions/src/core/getfield.rs
@@ -74,7 +74,27 @@ impl ScalarUDFImpl for GetFieldFunc {
             }
         };
 
-        Ok(format!("{}[{}]", args[0].display_name()?, name))
+        Ok(format!("{}[{}]", args[0], name))
+    }
+
+    fn schema_name(&self, args: &[Expr]) -> Result<String> {
+        if args.len() != 2 {
+            return exec_err!(
+                "get_field function requires 2 arguments, got {}",
+                args.len()
+            );
+        }
+
+        let name = match &args[1] {
+            Expr::Literal(name) => name,
+            _ => {
+                return exec_err!(
+                    "get_field function requires the argument field_name to be a string"
+                );
+            }
+        };
+
+        Ok(format!("{}[{}]", args[0].schema_name(), name))
     }
 
     fn signature(&self) -> &Signature {
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 2823b0fca2d12..2bb859d84ad79 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -1040,9 +1040,7 @@ mod test {
         let expr = col("a").in_list(vec![lit(1_i32), lit(4_i8), lit(8_i64)], false);
         let empty = empty_with_type(DataType::Int64);
         let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
-        let expected =
-            "Projection: a IN ([CAST(Int32(1) AS Int64), CAST(Int8(4) AS Int64), Int64(8)]) AS a IN (Map { iter: Iter([Literal(Int32(1)), Literal(Int8(4)), Literal(Int64(8))]) })\
-             \n  EmptyRelation";
+        let expected = "Projection: a IN ([CAST(Int32(1) AS Int64), CAST(Int8(4) AS Int64), Int64(8)])\n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?;
 
         // a in (1,4,8), a is decimal
@@ -1055,9 +1053,7 @@ mod test {
             )?),
         }));
         let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
-        let expected =
-            "Projection: CAST(a AS Decimal128(24, 4)) IN ([CAST(Int32(1) AS Decimal128(24, 4)), CAST(Int8(4) AS Decimal128(24, 4)), CAST(Int64(8) AS Decimal128(24, 4))]) AS a IN (Map { iter: Iter([Literal(Int32(1)), Literal(Int8(4)), Literal(Int64(8))]) })\
-             \n  EmptyRelation";
+        let expected = "Projection: CAST(a AS Decimal128(24, 4)) IN ([CAST(Int32(1) AS Decimal128(24, 4)), CAST(Int8(4) AS Decimal128(24, 4)), CAST(Int64(8) AS Decimal128(24, 4))])\n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)
     }
 
@@ -1150,8 +1146,7 @@ mod test {
         let like_expr = Expr::Like(Like::new(false, expr, pattern, None, false));
         let empty = empty_with_type(DataType::Utf8);
         let plan = LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty)?);
-        let expected = "Projection: a LIKE CAST(NULL AS Utf8) AS a LIKE NULL\
-             \n  EmptyRelation";
+        let expected = "Projection: a LIKE CAST(NULL AS Utf8)\n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?;
 
         let expr = Box::new(col("a"));
@@ -1179,8 +1174,7 @@ mod test {
         let ilike_expr = Expr::Like(Like::new(false, expr, pattern, None, true));
         let empty = empty_with_type(DataType::Utf8);
         let plan = LogicalPlan::Projection(Projection::try_new(vec![ilike_expr], empty)?);
-        let expected = "Projection: a ILIKE CAST(NULL AS Utf8) AS a ILIKE NULL\
-             \n  EmptyRelation";
+        let expected = "Projection: a ILIKE CAST(NULL AS Utf8)\n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?;
 
         let expr = Box::new(col("a"));
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 9cd9e4dece26a..45e5409ae9ac1 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -1108,7 +1108,7 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_, '_> {
                     self.down_index += 1;
                 }
 
-                let expr_name = expr.display_name()?;
+                let expr_name = expr.schema_name().to_string();
                 let (_, expr_alias) =
                     self.common_exprs.entry(expr_id).or_insert_with(|| {
                         let expr_alias = self.alias_generator.next(CSE_PREFIX);
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index fdd9ef8a8b0b8..16b4e43abcd5a 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -452,7 +452,8 @@ fn agg_exprs_evaluation_result_on_empty_batch(
         let simplifier = ExprSimplifier::new(info);
         let result_expr = simplifier.simplify(result_expr)?;
         if matches!(result_expr, Expr::Literal(ScalarValue::Int64(_))) {
-            expr_result_map_for_count_bug.insert(e.display_name()?, result_expr);
+            expr_result_map_for_count_bug
+                .insert(e.schema_name().to_string(), result_expr);
         }
     }
     Ok(())
@@ -490,7 +491,7 @@ fn proj_exprs_evaluation_result_on_empty_batch(
             let expr_name = match expr {
                 Expr::Alias(Alias { name, .. }) => name.to_string(),
                 Expr::Column(Column { relation: _, name }) => name.to_string(),
-                _ => expr.display_name()?,
+                _ => expr.schema_name().to_string(),
             };
             expr_result_map_for_count_bug.insert(expr_name, result_expr);
         }
@@ -546,8 +547,8 @@ fn filter_exprs_evaluation_result_on_empty_batch(
                         )],
                         else_expr: Some(Box::new(Expr::Literal(ScalarValue::Null))),
                     });
-                    expr_result_map_for_count_bug
-                        .insert(new_expr.display_name()?, new_expr);
+                    let expr_key = new_expr.schema_name().to_string();
+                    expr_result_map_for_count_bug.insert(expr_key, new_expr);
                 }
                 None
             }
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index d014b9149aabf..ac4ed87a4a1a2 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -135,8 +135,8 @@ fn optimize_projections(
             let group_by_expr_existing = aggregate
                 .group_expr
                 .iter()
-                .map(|group_by_expr| group_by_expr.display_name())
-                .collect::<Result<Vec<_>>>()?;
+                .map(|group_by_expr| group_by_expr.schema_name().to_string())
+                .collect::<Vec<_>>();
 
             let new_group_bys = if let Some(simplest_groupby_indices) =
                 get_required_group_by_exprs_indices(
@@ -1928,8 +1928,8 @@ mod tests {
             WindowFunctionDefinition::AggregateUDF(max_udaf()),
             vec![col("test.b")],
         ));
-        let col1 = col(max1.display_name()?);
-        let col2 = col(max2.display_name()?);
+        let col1 = col(max1.schema_name().to_string());
+        let col2 = col(max2.schema_name().to_string());
 
         let plan = LogicalPlanBuilder::from(table_scan)
             .window(vec![max1])?
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 4254d3464662b..8455919c35a83 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -817,7 +817,7 @@ impl OptimizerRule for PushDownFilter {
                 let group_expr_columns = agg
                     .group_expr
                     .iter()
-                    .map(|e| Ok(Column::from_qualified_name(e.display_name()?)))
+                    .map(|e| Ok(Column::from_qualified_name(e.schema_name().to_string())))
                     .collect::<Result<HashSet<_>>>()?;
 
                 let predicates = split_conjunction_owned(filter.predicate.clone());
@@ -838,7 +838,7 @@ impl OptimizerRule for PushDownFilter {
                 // So we need create a replace_map, add {`a+b` --> Expr(Column(a)+Column(b))}
                 let mut replace_map = HashMap::new();
                 for expr in &agg.group_expr {
-                    replace_map.insert(expr.display_name()?, expr.clone());
+                    replace_map.insert(expr.schema_name().to_string(), expr.clone());
                 }
                 let replaced_push_predicates = push_predicates
                     .into_iter()
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 3c66da21aff65..c79180b79256b 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -188,9 +188,9 @@ impl OptimizerRule for ScalarSubqueryToJoin {
 
                 let mut proj_exprs = vec![];
                 for expr in projection.expr.iter() {
-                    let old_expr_name = expr.display_name()?;
+                    let old_expr_name = expr.schema_name().to_string();
                     let new_expr = expr_to_rewrite_expr_map.get(expr).unwrap();
-                    let new_expr_name = new_expr.display_name()?;
+                    let new_expr_name = new_expr.schema_name().to_string();
                     if new_expr_name != old_expr_name {
                         proj_exprs.push(new_expr.clone().alias(old_expr_name))
                     } else {
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index 5b43957924474..30cae17eaf9f8 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -194,7 +194,7 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                                 }
                                 let arg = args.swap_remove(0);
 
-                                if group_fields_set.insert(arg.display_name()?) {
+                                if group_fields_set.insert(arg.schema_name().to_string()) {
                                     inner_group_exprs
                                         .push(arg.alias(SINGLE_DISTINCT_ALIAS));
                                 }
diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs
index cc683e778ebca..f28098d83b970 100644
--- a/datafusion/proto/tests/cases/serialize.rs
+++ b/datafusion/proto/tests/cases/serialize.rs
@@ -276,7 +276,7 @@ fn test_expression_serialization_roundtrip() {
     /// Extracts the first part of a function name
     /// 'foo(bar)' -> 'foo'
     fn extract_function_name(expr: &Expr) -> String {
-        let name = expr.display_name().unwrap();
+        let name = expr.schema_name().to_string();
         name.split('(').next().unwrap().to_string()
     }
 }
diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs
index 71f64f1cf459e..c1b3fe18f7e70 100644
--- a/datafusion/sql/src/unparser/utils.rs
+++ b/datafusion/sql/src/unparser/utils.rs
@@ -115,7 +115,7 @@ pub(crate) fn unproject_window_exprs(expr: &Expr, windows: &[&Window]) -> Result
                 if let Some(unproj) = windows
                     .iter()
                     .flat_map(|w| w.window_expr.iter())
-                    .find(|window_expr| window_expr.display_name().unwrap() == c.name)
+                    .find(|window_expr| window_expr.schema_name().to_string() == c.name)
                 {
                     Ok(Transformed::yes(unproj.clone()))
                 } else {
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 3b044646e6cb2..5cdc546e02675 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -329,7 +329,7 @@ pub(crate) fn transform_bottom_unnest(
             // Full context, we are trying to plan the execution as InnerProjection->Unnest->OuterProjection
             // inside unnest execution, each column inside the inner projection
             // will be transformed into new columns. Thus we need to keep track of these placeholding column names
-            let placeholder_name = unnest_expr.display_name()?;
+            let placeholder_name = unnest_expr.schema_name().to_string();
 
             unnest_placeholder_columns.push(placeholder_name.clone());
             // Add alias for the argument expression, to avoid naming conflicts
@@ -402,7 +402,7 @@ pub(crate) fn transform_bottom_unnest(
         } else {
             // We need to evaluate the expr in the inner projection,
             // outer projection just select its name
-            let column_name = transformed_expr.display_name()?;
+            let column_name = transformed_expr.schema_name().to_string();
             inner_projection_exprs.push(transformed_expr);
             Ok(vec![Expr::Column(Column::from_name(column_name))])
         }
@@ -469,16 +469,16 @@ mod tests {
         assert_eq!(
             transformed_exprs,
             vec![
-                col("unnest(struct_col).field1"),
-                col("unnest(struct_col).field2"),
+                col("UNNEST(struct_col).field1"),
+                col("UNNEST(struct_col).field2"),
             ]
         );
-        assert_eq!(unnest_placeholder_columns, vec!["unnest(struct_col)"]);
+        assert_eq!(unnest_placeholder_columns, vec!["UNNEST(struct_col)"]);
         // still reference struct_col in original schema but with alias,
         // to avoid colliding with the projection on the column itself if any
         assert_eq!(
             inner_projection_exprs,
-            vec![col("struct_col").alias("unnest(struct_col)"),]
+            vec![col("struct_col").alias("UNNEST(struct_col)"),]
         );
 
         // unnest(array_col) + 1
@@ -491,12 +491,12 @@ mod tests {
         )?;
         assert_eq!(
             unnest_placeholder_columns,
-            vec!["unnest(struct_col)", "unnest(array_col)"]
+            vec!["UNNEST(struct_col)", "UNNEST(array_col)"]
         );
         // only transform the unnest children
         assert_eq!(
             transformed_exprs,
-            vec![col("unnest(array_col)").add(lit(1i64))]
+            vec![col("UNNEST(array_col)").add(lit(1i64))]
         );
 
         // keep appending to the current vector
@@ -505,8 +505,8 @@ mod tests {
         assert_eq!(
             inner_projection_exprs,
             vec![
-                col("struct_col").alias("unnest(struct_col)"),
-                col("array_col").alias("unnest(array_col)")
+                col("struct_col").alias("UNNEST(struct_col)"),
+                col("array_col").alias("UNNEST(array_col)")
             ]
         );
 
@@ -553,17 +553,17 @@ mod tests {
         // Only the inner most/ bottom most unnest is transformed
         assert_eq!(
             transformed_exprs,
-            vec![unnest(col("unnest(struct_col[matrix])"))]
+            vec![unnest(col("UNNEST(struct_col[matrix])"))]
         );
         assert_eq!(
             unnest_placeholder_columns,
-            vec!["unnest(struct_col[matrix])"]
+            vec!["UNNEST(struct_col[matrix])"]
         );
         assert_eq!(
             inner_projection_exprs,
             vec![col("struct_col")
                 .field("matrix")
-                .alias("unnest(struct_col[matrix])"),]
+                .alias("UNNEST(struct_col[matrix])"),]
         );
 
         Ok(())
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 9bbdbe8dbfc91..179fc108e6d20 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -447,10 +447,10 @@ fn test_unnest_logical_plan() -> Result<()> {
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
-    let expected = "Projection: unnest(unnest_table.struct_col).field1, unnest(unnest_table.struct_col).field2, unnest(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\
-        \n  Unnest: lists[unnest(unnest_table.array_col)] structs[unnest(unnest_table.struct_col)]\
-        \n    Projection: unnest_table.struct_col AS unnest(unnest_table.struct_col), unnest_table.array_col AS unnest(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\
-        \n      TableScan: unnest_table";
+    let expected = "Projection: UNNEST(unnest_table.struct_col).field1, UNNEST(unnest_table.struct_col).field2, UNNEST(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\
+    \n  Unnest: lists[UNNEST(unnest_table.array_col)] structs[UNNEST(unnest_table.struct_col)]\
+    \n    Projection: unnest_table.struct_col AS UNNEST(unnest_table.struct_col), unnest_table.array_col AS UNNEST(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\
+    \n      TableScan: unnest_table";
 
     assert_eq!(format!("{plan}"), expected);
 
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index b71bc765ba376..2b8b9429cce2e 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -1137,7 +1137,7 @@ from arrays_values_without_nulls;
 ## array_element (aliases: array_extract, list_extract, list_element)
 
 # Testing with empty arguments should result in an error
-query error DataFusion error: Error during planning: Error during planning: array_element does not support zero arguments.
+query error DataFusion error: Error during planning: Error during planning: array_element does not support zero arguments
 select array_element();
 
 # array_element error
@@ -1979,7 +1979,7 @@ select array_slice(a, -1, 2, 1), array_slice(a, -1, 2),
 [6.0] [6.0] [] []
 
 # Testing with empty arguments should result in an error
-query error DataFusion error: Error during planning: Error during planning: array_slice does not support zero arguments.
+query error DataFusion error: Error during planning: Error during planning: array_slice does not support zero arguments
 select array_slice();
 
 
diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt b/datafusion/sqllogictest/test_files/push_down_filter.slt
index 3ca187ddee84e..2d74c1fc69945 100644
--- a/datafusion/sqllogictest/test_files/push_down_filter.slt
+++ b/datafusion/sqllogictest/test_files/push_down_filter.slt
@@ -36,9 +36,9 @@ query TT
 explain select uc2 from (select unnest(column2) as uc2, column1 from v) where column1 = 2;
 ----
 logical_plan
-01)Projection: unnest(v.column2) AS uc2
-02)--Unnest: lists[unnest(v.column2)] structs[]
-03)----Projection: v.column2 AS unnest(v.column2), v.column1
+01)Projection: UNNEST(v.column2) AS uc2
+02)--Unnest: lists[UNNEST(v.column2)] structs[]
+03)----Projection: v.column2 AS UNNEST(v.column2), v.column1
 04)------Filter: v.column1 = Int64(2)
 05)--------TableScan: v projection=[column1, column2]
 
@@ -53,11 +53,11 @@ query TT
 explain select uc2 from (select unnest(column2) as uc2, column1 from v) where uc2 > 3;
 ----
 logical_plan
-01)Projection: unnest(v.column2) AS uc2
-02)--Filter: unnest(v.column2) > Int64(3)
-03)----Projection: unnest(v.column2)
-04)------Unnest: lists[unnest(v.column2)] structs[]
-05)--------Projection: v.column2 AS unnest(v.column2), v.column1
+01)Projection: UNNEST(v.column2) AS uc2
+02)--Filter: UNNEST(v.column2) > Int64(3)
+03)----Projection: UNNEST(v.column2)
+04)------Unnest: lists[UNNEST(v.column2)] structs[]
+05)--------Projection: v.column2 AS UNNEST(v.column2), v.column1
 06)----------TableScan: v projection=[column1, column2]
 
 query II
@@ -71,10 +71,10 @@ query TT
 explain select uc2, column1 from  (select unnest(column2) as uc2, column1 from v) where uc2 > 3 AND column1 = 2;
 ----
 logical_plan
-01)Projection: unnest(v.column2) AS uc2, v.column1
-02)--Filter: unnest(v.column2) > Int64(3)
-03)----Unnest: lists[unnest(v.column2)] structs[]
-04)------Projection: v.column2 AS unnest(v.column2), v.column1
+01)Projection: UNNEST(v.column2) AS uc2, v.column1
+02)--Filter: UNNEST(v.column2) > Int64(3)
+03)----Unnest: lists[UNNEST(v.column2)] structs[]
+04)------Projection: v.column2 AS UNNEST(v.column2), v.column1
 05)--------Filter: v.column1 = Int64(2)
 06)----------TableScan: v projection=[column1, column2]
 
@@ -90,10 +90,10 @@ query TT
 explain select uc2, column1 from  (select unnest(column2) as uc2, column1 from v) where uc2 > 3 OR column1 = 2;
 ----
 logical_plan
-01)Projection: unnest(v.column2) AS uc2, v.column1
-02)--Filter: unnest(v.column2) > Int64(3) OR v.column1 = Int64(2)
-03)----Unnest: lists[unnest(v.column2)] structs[]
-04)------Projection: v.column2 AS unnest(v.column2), v.column1
+01)Projection: UNNEST(v.column2) AS uc2, v.column1
+02)--Filter: UNNEST(v.column2) > Int64(3) OR v.column1 = Int64(2)
+03)----Unnest: lists[UNNEST(v.column2)] structs[]
+04)------Projection: v.column2 AS UNNEST(v.column2), v.column1
 05)--------TableScan: v projection=[column1, column2]
 
 statement ok
@@ -112,10 +112,10 @@ query TT
 explain select * from (select column1, unnest(column2) as o from d) where o['a'] = 1;
 ----
 logical_plan
-01)Projection: d.column1, unnest(d.column2) AS o
-02)--Filter: get_field(unnest(d.column2), Utf8("a")) = Int64(1)
-03)----Unnest: lists[unnest(d.column2)] structs[]
-04)------Projection: d.column1, d.column2 AS unnest(d.column2)
+01)Projection: d.column1, UNNEST(d.column2) AS o
+02)--Filter: get_field(UNNEST(d.column2), Utf8("a")) = Int64(1)
+03)----Unnest: lists[UNNEST(d.column2)] structs[]
+04)------Projection: d.column1, d.column2 AS UNNEST(d.column2)
 05)--------TableScan: d projection=[column1, column2]
 
 
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index d818c0e927953..4957011b8ba23 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -539,21 +539,21 @@ query TT
 explain select unnest(unnest(unnest(column3)['c1'])), column3 from recursive_unnest_table;
 ----
 logical_plan
-01)Unnest: lists[unnest(unnest(unnest(recursive_unnest_table.column3)[c1]))] structs[]
-02)--Projection: unnest(unnest(recursive_unnest_table.column3)[c1]) AS unnest(unnest(unnest(recursive_unnest_table.column3)[c1])), recursive_unnest_table.column3
-03)----Unnest: lists[unnest(unnest(recursive_unnest_table.column3)[c1])] structs[]
-04)------Projection: get_field(unnest(recursive_unnest_table.column3), Utf8("c1")) AS unnest(unnest(recursive_unnest_table.column3)[c1]), recursive_unnest_table.column3
-05)--------Unnest: lists[unnest(recursive_unnest_table.column3)] structs[]
-06)----------Projection: recursive_unnest_table.column3 AS unnest(recursive_unnest_table.column3), recursive_unnest_table.column3
+01)Unnest: lists[UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1]))] structs[]
+02)--Projection: UNNEST(UNNEST(recursive_unnest_table.column3)[c1]) AS UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1])), recursive_unnest_table.column3
+03)----Unnest: lists[UNNEST(UNNEST(recursive_unnest_table.column3)[c1])] structs[]
+04)------Projection: get_field(UNNEST(recursive_unnest_table.column3), Utf8("c1")) AS UNNEST(UNNEST(recursive_unnest_table.column3)[c1]), recursive_unnest_table.column3
+05)--------Unnest: lists[UNNEST(recursive_unnest_table.column3)] structs[]
+06)----------Projection: recursive_unnest_table.column3 AS UNNEST(recursive_unnest_table.column3), recursive_unnest_table.column3
 07)------------TableScan: recursive_unnest_table projection=[column3]
 physical_plan
 01)UnnestExec
-02)--ProjectionExec: expr=[unnest(unnest(recursive_unnest_table.column3)[c1])@0 as unnest(unnest(unnest(recursive_unnest_table.column3)[c1])), column3@1 as column3]
+02)--ProjectionExec: expr=[UNNEST(UNNEST(recursive_unnest_table.column3)[c1])@0 as UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1])), column3@1 as column3]
 03)----UnnestExec
-04)------ProjectionExec: expr=[get_field(unnest(recursive_unnest_table.column3)@0, c1) as unnest(unnest(recursive_unnest_table.column3)[c1]), column3@1 as column3]
+04)------ProjectionExec: expr=[get_field(UNNEST(recursive_unnest_table.column3)@0, c1) as UNNEST(UNNEST(recursive_unnest_table.column3)[c1]), column3@1 as column3]
 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 06)----------UnnestExec
-07)------------ProjectionExec: expr=[column3@0 as unnest(recursive_unnest_table.column3), column3@0 as column3]
+07)------------ProjectionExec: expr=[column3@0 as UNNEST(recursive_unnest_table.column3), column3@0 as column3]
 08)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 ## group by unnest
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 89f2efec66aa8..f2756bb06d1eb 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -417,7 +417,7 @@ pub async fn from_substrait_rel(
                     }
                     // Ensure the expression has a unique display name, so that project's
                     // validate_unique_names doesn't fail
-                    let name = x.display_name()?;
+                    let name = x.schema_name().to_string();
                     let mut new_name = name.clone();
                     let mut i = 0;
                     while names.contains(&new_name) {
diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs
index 360377c231a36..0a86d27e013c0 100644
--- a/datafusion/substrait/tests/cases/consumer_integration.rs
+++ b/datafusion/substrait/tests/cases/consumer_integration.rs
@@ -358,8 +358,8 @@ mod tests {
 
         let plan = from_substrait_plan(&ctx, &proto).await?;
         let plan_str = format!("{}", plan);
-        assert_eq!(plan_str, "Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END) / sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount) AS PROMO_REVENUE\
-        \n  Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END), sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount)]]\
+        assert_eq!(plan_str, "Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE Utf8(\"PROMO%\") THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END) / sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount) AS PROMO_REVENUE\
+        \n  Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE Utf8(\"PROMO%\") THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END), sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount)]]\
         \n    Projection: CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_0.l_discount) ELSE Decimal128(Some(0),19,0) END, FILENAME_PLACEHOLDER_0.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_0.l_discount)\
         \n      Filter: FILENAME_PLACEHOLDER_0.l_partkey = FILENAME_PLACEHOLDER_1.p_partkey AND FILENAME_PLACEHOLDER_0.l_shipdate >= Date32(\"1995-09-01\") AND FILENAME_PLACEHOLDER_0.l_shipdate < CAST(Utf8(\"1995-10-01\") AS Date32)\
         \n        Inner Join:  Filter: Boolean(true)\

From b5d7931dea01773b8d79b8824ce68dfc502cad7b Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Fri, 9 Aug 2024 15:42:51 +0200
Subject: [PATCH 258/357] Make `CommonSubexprEliminate` top-down like (#11683)

* Make `CommonSubexprEliminate` top-down like

* fix top-down recursion, fix unit tests to use real a Optimizer to verify behavior on plans

* Extract result of `find_common_exprs` into a struct (#4)

* Extract the result of find_common_exprs into a struct

* Make naming consistent

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../optimizer/src/common_subexpr_eliminate.rs | 785 ++++++++++--------
 1 file changed, 417 insertions(+), 368 deletions(-)

diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 45e5409ae9ac1..feccf5679efbc 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -28,11 +28,10 @@ use crate::utils::NamePreserver;
 use datafusion_common::alias::AliasGenerator;
 use datafusion_common::hash_utils::combine_hashes;
 use datafusion_common::tree_node::{
-    Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter, TreeNodeVisitor,
-};
-use datafusion_common::{
-    internal_datafusion_err, qualified_name, Column, DFSchema, DFSchemaRef, Result,
+    Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
+    TreeNodeVisitor,
 };
+use datafusion_common::{qualified_name, Column, DFSchema, DFSchemaRef, Result};
 use datafusion_expr::expr::{Alias, ScalarFunction};
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::{
@@ -144,6 +143,23 @@ pub struct CommonSubexprEliminate {
     random_state: RandomState,
 }
 
+/// The result of potentially rewriting a list of expressions to eliminate common
+/// subexpressions.
+#[derive(Debug)]
+enum FoundCommonExprs {
+    /// No common expressions were found
+    No { original_exprs_list: Vec<Vec<Expr>> },
+    /// Common expressions were found
+    Yes {
+        /// extracted common expressions
+        common_exprs: Vec<(Expr, String)>,
+        /// new expressions with common subexpressions replaced
+        new_exprs_list: Vec<Vec<Expr>>,
+        /// original expressions
+        original_exprs_list: Vec<Vec<Expr>>,
+    },
+}
+
 impl CommonSubexprEliminate {
     pub fn new() -> Self {
         Self {
@@ -217,8 +233,7 @@ impl CommonSubexprEliminate {
         expr_stats: &ExprStats<'n>,
         common_exprs: &mut CommonExprs<'n>,
         alias_generator: &AliasGenerator,
-    ) -> Result<Transformed<Vec<Vec<Expr>>>> {
-        let mut transformed = false;
+    ) -> Result<Vec<Vec<Expr>>> {
         exprs_list
             .into_iter()
             .zip(arrays_list.iter())
@@ -227,69 +242,65 @@ impl CommonSubexprEliminate {
                     .into_iter()
                     .zip(arrays.iter())
                     .map(|(expr, id_array)| {
-                        let replaced = replace_common_expr(
+                        replace_common_expr(
                             expr,
                             id_array,
                             expr_stats,
                             common_exprs,
                             alias_generator,
-                        )?;
-                        // remember if this expression was actually replaced
-                        transformed |= replaced.transformed;
-                        Ok(replaced.data)
+                        )
                     })
                     .collect::<Result<Vec<_>>>()
             })
             .collect::<Result<Vec<_>>>()
-            .map(|rewritten_exprs_list| {
-                // propagate back transformed information
-                Transformed::new_transformed(rewritten_exprs_list, transformed)
-            })
     }
 
-    /// Rewrites the expression in `exprs_list` with common sub-expressions
-    /// replaced with a new column and adds a ProjectionExec on top of `input`
-    /// which computes any replaced common sub-expressions.
+    /// Extracts common sub-expressions and rewrites `exprs_list`.
     ///
-    /// Returns a tuple of:
-    /// 1. The rewritten expressions
-    /// 2. A `LogicalPlan::Projection` with input of `input` that computes any
-    ///    common sub-expressions that were used
-    fn rewrite_expr(
+    /// Returns `FoundCommonExprs` recording the result of the extraction
+    fn find_common_exprs(
         &self,
         exprs_list: Vec<Vec<Expr>>,
-        arrays_list: Vec<Vec<IdArray>>,
-        input: LogicalPlan,
-        expr_stats: &ExprStats,
         config: &dyn OptimizerConfig,
-    ) -> Result<Transformed<(Vec<Vec<Expr>>, LogicalPlan)>> {
-        let mut transformed = false;
-        let mut common_exprs = CommonExprs::new();
-
-        let rewrite_exprs = self.rewrite_exprs_list(
-            exprs_list,
-            arrays_list,
-            expr_stats,
-            &mut common_exprs,
-            &config.alias_generator(),
-        )?;
-        transformed |= rewrite_exprs.transformed;
+        expr_mask: ExprMask,
+    ) -> Result<Transformed<FoundCommonExprs>> {
+        let mut found_common = false;
+        let mut expr_stats = ExprStats::new();
+        let id_arrays_list = exprs_list
+            .iter()
+            .map(|exprs| {
+                self.to_arrays(exprs, &mut expr_stats, expr_mask).map(
+                    |(fc, id_arrays)| {
+                        found_common |= fc;
 
-        let new_input = self.rewrite(input, config)?;
-        transformed |= new_input.transformed;
-        let mut new_input = new_input.data;
+                        id_arrays
+                    },
+                )
+            })
+            .collect::<Result<Vec<_>>>()?;
+        if found_common {
+            let mut common_exprs = CommonExprs::new();
+            let new_exprs_list = self.rewrite_exprs_list(
+                // Must clone as Identifiers use references to original expressions so we have
+                // to keep the original expressions intact.
+                exprs_list.clone(),
+                id_arrays_list,
+                &expr_stats,
+                &mut common_exprs,
+                &config.alias_generator(),
+            )?;
+            assert!(!common_exprs.is_empty());
 
-        if !common_exprs.is_empty() {
-            assert!(transformed);
-            new_input = build_common_expr_project_plan(new_input, common_exprs)?;
+            Ok(Transformed::yes(FoundCommonExprs::Yes {
+                common_exprs: common_exprs.into_values().collect(),
+                new_exprs_list,
+                original_exprs_list: exprs_list,
+            }))
+        } else {
+            Ok(Transformed::no(FoundCommonExprs::No {
+                original_exprs_list: exprs_list,
+            }))
         }
-
-        // return the transformed information
-
-        Ok(Transformed::new_transformed(
-            (rewrite_exprs.data, new_input),
-            transformed,
-        ))
     }
 
     fn try_optimize_proj(
@@ -353,96 +364,86 @@ impl CommonSubexprEliminate {
         window: Window,
         config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
-        // collect all window expressions from any number of LogicalPlanWindow
-        let (mut window_exprs, mut window_schemas, mut plan) =
+        // Collects window expressions from consecutive `LogicalPlan::Window` nodes into
+        // a list.
+        let (window_expr_list, window_schemas, input) =
             get_consecutive_window_exprs(window);
 
-        let mut found_common = false;
-        let mut expr_stats = ExprStats::new();
-        let arrays_per_window = window_exprs
-            .iter()
-            .map(|window_expr| {
-                self.to_arrays(window_expr, &mut expr_stats, ExprMask::Normal)
-                    .map(|(fc, id_arrays)| {
-                        found_common |= fc;
-
-                        id_arrays
+        // Extract common sub-expressions from the list.
+        self.find_common_exprs(window_expr_list, config, ExprMask::Normal)?
+            .map_data(|common| match common {
+                // If there are common sub-expressions, then the insert a projection node
+                // with the common expressions between the new window nodes and the
+                // original input.
+                FoundCommonExprs::Yes {
+                    common_exprs,
+                    new_exprs_list,
+                    original_exprs_list,
+                } => {
+                    build_common_expr_project_plan(input, common_exprs).map(|new_input| {
+                        (new_exprs_list, new_input, Some(original_exprs_list))
                     })
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        if found_common {
-            // save the original names
-            let name_preserver = NamePreserver::new(&plan);
-            let mut saved_names = window_exprs
-                .iter()
-                .map(|exprs| {
-                    exprs
-                        .iter()
-                        .map(|expr| name_preserver.save(expr))
-                        .collect::<Result<Vec<_>>>()
+                }
+                FoundCommonExprs::No {
+                    original_exprs_list,
+                } => Ok((original_exprs_list, input, None)),
+            })?
+            // Recurse into the new input.
+            // (This is similar to what a `ApplyOrder::TopDown` optimizer rule would do.)
+            .transform_data(|(new_window_expr_list, new_input, window_expr_list)| {
+                self.rewrite(new_input, config)?.map_data(|new_input| {
+                    Ok((new_window_expr_list, new_input, window_expr_list))
                 })
-                .collect::<Result<Vec<_>>>()?;
-
-            assert_eq!(window_exprs.len(), arrays_per_window.len());
-            let num_window_exprs = window_exprs.len();
-            let rewritten_window_exprs = self.rewrite_expr(
-                // Must clone as Identifiers use references to original expressions so we
-                // have to keep the original expressions intact.
-                window_exprs.clone(),
-                arrays_per_window,
-                plan,
-                &expr_stats,
-                config,
-            )?;
-            let transformed = rewritten_window_exprs.transformed;
-            assert!(transformed);
-
-            let (mut new_expr, new_input) = rewritten_window_exprs.data;
-
-            let mut plan = new_input;
-
-            // Construct consecutive window operator, with their corresponding new
-            // window expressions.
-            //
-            // Note this iterates over, `new_expr` and `saved_names` which are the
-            // same length, in reverse order
-            assert_eq!(num_window_exprs, new_expr.len());
-            assert_eq!(num_window_exprs, saved_names.len());
-            while let (Some(new_window_expr), Some(saved_names)) =
-                (new_expr.pop(), saved_names.pop())
-            {
-                assert_eq!(new_window_expr.len(), saved_names.len());
-
-                // Rename re-written window expressions with original name, to
-                // preserve the output schema
-                let new_window_expr = new_window_expr
-                    .into_iter()
-                    .zip(saved_names.into_iter())
-                    .map(|(new_window_expr, saved_name)| {
-                        saved_name.restore(new_window_expr)
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                plan = LogicalPlan::Window(Window::try_new(
-                    new_window_expr,
-                    Arc::new(plan),
-                )?);
-            }
-
-            Ok(Transformed::new_transformed(plan, transformed))
-        } else {
-            while let (Some(window_expr), Some(schema)) =
-                (window_exprs.pop(), window_schemas.pop())
-            {
-                plan = LogicalPlan::Window(Window {
-                    input: Arc::new(plan),
-                    window_expr,
-                    schema,
-                });
-            }
-
-            Ok(Transformed::no(plan))
-        }
+            })?
+            // Rebuild the consecutive window nodes.
+            .map_data(|(new_window_expr_list, new_input, window_expr_list)| {
+                // If there were common expressions extracted, then we need to make sure
+                // we restore the original column names.
+                // TODO: Although `find_common_exprs()` inserts aliases around extracted
+                //  common expressions this doesn't mean that the original column names
+                //  (schema) are preserved due to the inserted aliases are not always at
+                //  the top of the expression.
+                //  Let's consider improving `find_common_exprs()` to always keep column
+                //  names and get rid of additional name preserving logic here.
+                if let Some(window_expr_list) = window_expr_list {
+                    let name_preserver = NamePreserver::new_for_projection();
+                    let saved_names = window_expr_list
+                        .iter()
+                        .map(|exprs| {
+                            exprs
+                                .iter()
+                                .map(|expr| name_preserver.save(expr))
+                                .collect::<Result<Vec<_>>>()
+                        })
+                        .collect::<Result<Vec<_>>>()?;
+                    new_window_expr_list.into_iter().zip(saved_names).try_rfold(
+                        new_input,
+                        |plan, (new_window_expr, saved_names)| {
+                            let new_window_expr = new_window_expr
+                                .into_iter()
+                                .zip(saved_names)
+                                .map(|(new_window_expr, saved_name)| {
+                                    saved_name.restore(new_window_expr)
+                                })
+                                .collect::<Result<Vec<_>>>()?;
+                            Window::try_new(new_window_expr, Arc::new(plan))
+                                .map(LogicalPlan::Window)
+                        },
+                    )
+                } else {
+                    new_window_expr_list
+                        .into_iter()
+                        .zip(window_schemas)
+                        .try_rfold(new_input, |plan, (new_window_expr, schema)| {
+                            Window::try_new_with_schema(
+                                new_window_expr,
+                                Arc::new(plan),
+                                schema,
+                            )
+                            .map(LogicalPlan::Window)
+                        })
+                }
+            })
     }
 
     fn try_optimize_aggregate(
@@ -454,136 +455,184 @@ impl CommonSubexprEliminate {
             group_expr,
             aggr_expr,
             input,
-            schema: orig_schema,
+            schema,
             ..
         } = aggregate;
-        // track transformed information
-        let mut transformed = false;
-
-        let name_perserver = NamePreserver::new_for_projection();
-        let saved_names = aggr_expr
-            .iter()
-            .map(|expr| name_perserver.save(expr))
-            .collect::<Result<Vec<_>>>()?;
-
-        let mut expr_stats = ExprStats::new();
-        // rewrite inputs
-        let (group_found_common, group_arrays) =
-            self.to_arrays(&group_expr, &mut expr_stats, ExprMask::Normal)?;
-        let (aggr_found_common, aggr_arrays) =
-            self.to_arrays(&aggr_expr, &mut expr_stats, ExprMask::Normal)?;
-        let (new_aggr_expr, new_group_expr, new_input) =
-            if group_found_common || aggr_found_common {
-                // rewrite both group exprs and aggr_expr
-                let rewritten = self.rewrite_expr(
-                    // Must clone as Identifiers use references to original expressions so
-                    // we have to keep the original expressions intact.
-                    vec![group_expr.clone(), aggr_expr.clone()],
-                    vec![group_arrays, aggr_arrays],
-                    unwrap_arc(input),
-                    &expr_stats,
-                    config,
-                )?;
-                assert!(rewritten.transformed);
-                transformed |= rewritten.transformed;
-                let (mut new_expr, new_input) = rewritten.data;
-
-                // note the reversed pop order.
-                let new_aggr_expr = pop_expr(&mut new_expr)?;
-                let new_group_expr = pop_expr(&mut new_expr)?;
-
-                (new_aggr_expr, new_group_expr, Arc::new(new_input))
-            } else {
-                (aggr_expr, group_expr, input)
-            };
+        let input = unwrap_arc(input);
+        // Extract common sub-expressions from the aggregate and grouping expressions.
+        self.find_common_exprs(vec![group_expr, aggr_expr], config, ExprMask::Normal)?
+            .map_data(|common| {
+                match common {
+                    // If there are common sub-expressions, then insert a projection node
+                    // with the common expressions between the new aggregate node and the
+                    // original input.
+                    FoundCommonExprs::Yes {
+                        common_exprs,
+                        mut new_exprs_list,
+                        mut original_exprs_list,
+                    } => {
+                        let new_aggr_expr = new_exprs_list.pop().unwrap();
+                        let new_group_expr = new_exprs_list.pop().unwrap();
+
+                        build_common_expr_project_plan(input, common_exprs).map(
+                            |new_input| {
+                                let aggr_expr = original_exprs_list.pop().unwrap();
+                                (
+                                    new_aggr_expr,
+                                    new_group_expr,
+                                    new_input,
+                                    Some(aggr_expr),
+                                )
+                            },
+                        )
+                    }
 
-        // create potential projection on top
-        let mut expr_stats = ExprStats::new();
-        let (aggr_found_common, aggr_arrays) = self.to_arrays(
-            &new_aggr_expr,
-            &mut expr_stats,
-            ExprMask::NormalAndAggregates,
-        )?;
-        if aggr_found_common {
-            let mut common_exprs = CommonExprs::new();
-            let mut rewritten_exprs = self.rewrite_exprs_list(
-                // Must clone as Identifiers use references to original expressions so we
-                // have to keep the original expressions intact.
-                vec![new_aggr_expr.clone()],
-                vec![aggr_arrays],
-                &expr_stats,
-                &mut common_exprs,
-                &config.alias_generator(),
-            )?;
-            assert!(rewritten_exprs.transformed);
-            let rewritten = pop_expr(&mut rewritten_exprs.data)?;
+                    FoundCommonExprs::No {
+                        mut original_exprs_list,
+                    } => {
+                        let new_aggr_expr = original_exprs_list.pop().unwrap();
+                        let new_group_expr = original_exprs_list.pop().unwrap();
 
-            assert!(!common_exprs.is_empty());
-            let mut agg_exprs = common_exprs
-                .into_values()
-                .map(|(expr, expr_alias)| expr.alias(expr_alias))
-                .collect::<Vec<_>>();
-
-            let new_input_schema = Arc::clone(new_input.schema());
-            let mut proj_exprs = vec![];
-            for expr in &new_group_expr {
-                extract_expressions(expr, &new_input_schema, &mut proj_exprs)?
-            }
-            for (expr_rewritten, expr_orig) in rewritten.into_iter().zip(new_aggr_expr) {
-                if expr_rewritten == expr_orig {
-                    if let Expr::Alias(Alias { expr, name, .. }) = expr_rewritten {
-                        agg_exprs.push(expr.alias(&name));
-                        proj_exprs.push(Expr::Column(Column::from_name(name)));
-                    } else {
-                        let expr_alias = config.alias_generator().next(CSE_PREFIX);
-                        let (qualifier, field) =
-                            expr_rewritten.to_field(&new_input_schema)?;
-                        let out_name = qualified_name(qualifier.as_ref(), field.name());
-
-                        agg_exprs.push(expr_rewritten.alias(&expr_alias));
-                        proj_exprs.push(
-                            Expr::Column(Column::from_name(expr_alias)).alias(out_name),
-                        );
+                        Ok((new_aggr_expr, new_group_expr, input, None))
                     }
-                } else {
-                    proj_exprs.push(expr_rewritten);
                 }
-            }
-
-            let agg = LogicalPlan::Aggregate(Aggregate::try_new(
-                new_input,
-                new_group_expr,
-                agg_exprs,
-            )?);
-
-            Projection::try_new(proj_exprs, Arc::new(agg))
-                .map(LogicalPlan::Projection)
-                .map(Transformed::yes)
-        } else {
-            // TODO: How exactly can the name or the schema change in this case?
-            //  In theory `new_aggr_expr` and `new_group_expr` are either the original expressions or they were crafted via `rewrite_expr()`, that keeps the original expression names.
-            //  If this is really needed can we have UT for it?
-            // Alias aggregation expressions if they have changed
-            let new_aggr_expr = new_aggr_expr
-                .into_iter()
-                .zip(saved_names.into_iter())
-                .map(|(new_expr, saved_name)| saved_name.restore(new_expr))
-                .collect::<Result<Vec<Expr>>>()?;
-            // Since group_expr may have changed, schema may also. Use try_new method.
-            let new_agg = if transformed {
-                Aggregate::try_new(new_input, new_group_expr, new_aggr_expr)?
-            } else {
-                Aggregate::try_new_with_schema(
-                    new_input,
-                    new_group_expr,
-                    new_aggr_expr,
-                    orig_schema,
+            })?
+            // Recurse into the new input.
+            // (This is similar to what a `ApplyOrder::TopDown` optimizer rule would do.)
+            .transform_data(|(new_aggr_expr, new_group_expr, new_input, aggr_expr)| {
+                self.rewrite(new_input, config)?.map_data(|new_input| {
+                    Ok((
+                        new_aggr_expr,
+                        new_group_expr,
+                        aggr_expr,
+                        Arc::new(new_input),
+                    ))
+                })
+            })?
+            // Try extracting common aggregate expressions and rebuild the aggregate node.
+            .transform_data(|(new_aggr_expr, new_group_expr, aggr_expr, new_input)| {
+                // Extract common aggregate sub-expressions from the aggregate expressions.
+                self.find_common_exprs(
+                    vec![new_aggr_expr],
+                    config,
+                    ExprMask::NormalAndAggregates,
                 )?
-            };
-            let new_agg = LogicalPlan::Aggregate(new_agg);
-
-            Ok(Transformed::new_transformed(new_agg, transformed))
-        }
+                .map_data(|common| {
+                    match common {
+                        FoundCommonExprs::Yes {
+                            common_exprs,
+                            mut new_exprs_list,
+                            mut original_exprs_list,
+                        } => {
+                            let rewritten_aggr_expr = new_exprs_list.pop().unwrap();
+                            let new_aggr_expr = original_exprs_list.pop().unwrap();
+
+                            let mut agg_exprs = common_exprs
+                                .into_iter()
+                                .map(|(expr, expr_alias)| expr.alias(expr_alias))
+                                .collect::<Vec<_>>();
+
+                            let new_input_schema = Arc::clone(new_input.schema());
+                            let mut proj_exprs = vec![];
+                            for expr in &new_group_expr {
+                                extract_expressions(
+                                    expr,
+                                    &new_input_schema,
+                                    &mut proj_exprs,
+                                )?
+                            }
+                            for (expr_rewritten, expr_orig) in
+                                rewritten_aggr_expr.into_iter().zip(new_aggr_expr)
+                            {
+                                if expr_rewritten == expr_orig {
+                                    if let Expr::Alias(Alias { expr, name, .. }) =
+                                        expr_rewritten
+                                    {
+                                        agg_exprs.push(expr.alias(&name));
+                                        proj_exprs
+                                            .push(Expr::Column(Column::from_name(name)));
+                                    } else {
+                                        let expr_alias =
+                                            config.alias_generator().next(CSE_PREFIX);
+                                        let (qualifier, field) =
+                                            expr_rewritten.to_field(&new_input_schema)?;
+                                        let out_name = qualified_name(
+                                            qualifier.as_ref(),
+                                            field.name(),
+                                        );
+
+                                        agg_exprs.push(expr_rewritten.alias(&expr_alias));
+                                        proj_exprs.push(
+                                            Expr::Column(Column::from_name(expr_alias))
+                                                .alias(out_name),
+                                        );
+                                    }
+                                } else {
+                                    proj_exprs.push(expr_rewritten);
+                                }
+                            }
+
+                            let agg = LogicalPlan::Aggregate(Aggregate::try_new(
+                                new_input,
+                                new_group_expr,
+                                agg_exprs,
+                            )?);
+                            Projection::try_new(proj_exprs, Arc::new(agg))
+                                .map(LogicalPlan::Projection)
+                        }
+
+                        // If there aren't any common aggregate sub-expressions, then just
+                        // rebuild the aggregate node.
+                        FoundCommonExprs::No {
+                            mut original_exprs_list,
+                        } => {
+                            let rewritten_aggr_expr = original_exprs_list.pop().unwrap();
+
+                            // If there were common expressions extracted, then we need to
+                            // make sure we restore the original column names.
+                            // TODO: Although `find_common_exprs()` inserts aliases around
+                            //  extracted common expressions this doesn't mean that the
+                            //  original column names (schema) are preserved due to the
+                            //  inserted aliases are not always at the top of the
+                            //  expression.
+                            //  Let's consider improving `find_common_exprs()` to always
+                            //  keep column names and get rid of additional name
+                            //  preserving logic here.
+                            if let Some(aggr_expr) = aggr_expr {
+                                let name_perserver = NamePreserver::new_for_projection();
+                                let saved_names = aggr_expr
+                                    .iter()
+                                    .map(|expr| name_perserver.save(expr))
+                                    .collect::<Result<Vec<_>>>()?;
+                                let new_aggr_expr = rewritten_aggr_expr
+                                    .into_iter()
+                                    .zip(saved_names.into_iter())
+                                    .map(|(new_expr, saved_name)| {
+                                        saved_name.restore(new_expr)
+                                    })
+                                    .collect::<Result<Vec<Expr>>>()?;
+
+                                // Since `group_expr` may have changed, schema may also.
+                                // Use `try_new()` method.
+                                Aggregate::try_new(
+                                    new_input,
+                                    new_group_expr,
+                                    new_aggr_expr,
+                                )
+                                .map(LogicalPlan::Aggregate)
+                            } else {
+                                Aggregate::try_new_with_schema(
+                                    new_input,
+                                    new_group_expr,
+                                    rewritten_aggr_expr,
+                                    schema,
+                                )
+                                .map(LogicalPlan::Aggregate)
+                            }
+                        }
+                    }
+                })
+            })
     }
 
     /// Rewrites the expr list and input to remove common subexpressions
@@ -602,32 +651,35 @@ impl CommonSubexprEliminate {
     ///    that computes the common subexpressions
     fn try_unary_plan(
         &self,
-        expr: Vec<Expr>,
+        exprs: Vec<Expr>,
         input: LogicalPlan,
         config: &dyn OptimizerConfig,
     ) -> Result<Transformed<(Vec<Expr>, LogicalPlan)>> {
-        let mut expr_stats = ExprStats::new();
-        let (found_common, id_arrays) =
-            self.to_arrays(&expr, &mut expr_stats, ExprMask::Normal)?;
-
-        if found_common {
-            let rewritten = self.rewrite_expr(
-                // Must clone as Identifiers use references to original expressions so we
-                // have to keep the original expressions intact.
-                vec![expr.clone()],
-                vec![id_arrays],
-                input,
-                &expr_stats,
-                config,
-            )?;
-            assert!(rewritten.transformed);
-            rewritten.map_data(|(mut new_expr, new_input)| {
-                assert_eq!(new_expr.len(), 1);
-                Ok((new_expr.pop().unwrap(), new_input))
+        // Extract common sub-expressions from the expressions.
+        self.find_common_exprs(vec![exprs], config, ExprMask::Normal)?
+            .map_data(|common| match common {
+                FoundCommonExprs::Yes {
+                    common_exprs,
+                    mut new_exprs_list,
+                    original_exprs_list: _,
+                } => {
+                    let new_exprs = new_exprs_list.pop().unwrap();
+                    build_common_expr_project_plan(input, common_exprs)
+                        .map(|new_input| (new_exprs, new_input))
+                }
+                FoundCommonExprs::No {
+                    mut original_exprs_list,
+                } => {
+                    let new_exprs = original_exprs_list.pop().unwrap();
+                    Ok((new_exprs, input))
+                }
+            })?
+            // Recurse into the new input.
+            // (This is similar to what a `ApplyOrder::TopDown` optimizer rule would do.)
+            .transform_data(|(new_exprs, new_input)| {
+                self.rewrite(new_input, config)?
+                    .map_data(|new_input| Ok((new_exprs, new_input)))
             })
-        } else {
-            Ok(Transformed::no((expr, input)))
-        }
     }
 }
 
@@ -665,7 +717,7 @@ impl CommonSubexprEliminate {
 fn get_consecutive_window_exprs(
     window: Window,
 ) -> (Vec<Vec<Expr>>, Vec<DFSchemaRef>, LogicalPlan) {
-    let mut window_exprs = vec![];
+    let mut window_expr_list = vec![];
     let mut window_schemas = vec![];
     let mut plan = LogicalPlan::Window(window);
     while let LogicalPlan::Window(Window {
@@ -674,12 +726,12 @@ fn get_consecutive_window_exprs(
         schema,
     }) = plan
     {
-        window_exprs.push(window_expr);
+        window_expr_list.push(window_expr);
         window_schemas.push(schema);
 
         plan = unwrap_arc(input);
     }
-    (window_exprs, window_schemas, plan)
+    (window_expr_list, window_schemas, plan)
 }
 
 impl OptimizerRule for CommonSubexprEliminate {
@@ -688,7 +740,10 @@ impl OptimizerRule for CommonSubexprEliminate {
     }
 
     fn apply_order(&self) -> Option<ApplyOrder> {
-        Some(ApplyOrder::TopDown)
+        // This rule handles recursion itself in a `ApplyOrder::TopDown` like manner.
+        // This is because in some cases adjacent nodes are collected (e.g. `Window`) and
+        // CSEd as a group, which can't be done in a simple `ApplyOrder::TopDown` rule.
+        None
     }
 
     fn rewrite(
@@ -726,8 +781,9 @@ impl OptimizerRule for CommonSubexprEliminate {
             | LogicalPlan::Unnest(_)
             | LogicalPlan::RecursiveQuery(_)
             | LogicalPlan::Prepare(_) => {
-                // ApplyOrder::TopDown handles recursion
-                Transformed::no(plan)
+                // This rule handles recursion itself in a `ApplyOrder::TopDown` like
+                // manner.
+                plan.map_children(|c| self.rewrite(c, config))?
             }
         };
 
@@ -753,12 +809,6 @@ impl Default for CommonSubexprEliminate {
     }
 }
 
-fn pop_expr(new_expr: &mut Vec<Vec<Expr>>) -> Result<Vec<Expr>> {
-    new_expr
-        .pop()
-        .ok_or_else(|| internal_datafusion_err!("Failed to pop expression"))
-}
-
 /// Build the "intermediate" projection plan that evaluates the extracted common
 /// expressions.
 ///
@@ -771,11 +821,11 @@ fn pop_expr(new_expr: &mut Vec<Vec<Expr>>) -> Result<Vec<Expr>> {
 /// expr_stats: the set of common subexpressions
 fn build_common_expr_project_plan(
     input: LogicalPlan,
-    common_exprs: CommonExprs,
+    common_exprs: Vec<(Expr, String)>,
 ) -> Result<LogicalPlan> {
     let mut fields_set = BTreeSet::new();
     let mut project_exprs = common_exprs
-        .into_values()
+        .into_iter()
         .map(|(expr, expr_alias)| {
             fields_set.insert(expr_alias.clone());
             Ok(expr.alias(expr_alias))
@@ -1147,7 +1197,7 @@ fn replace_common_expr<'n>(
     expr_stats: &ExprStats<'n>,
     common_exprs: &mut CommonExprs<'n>,
     alias_generator: &AliasGenerator,
-) -> Result<Transformed<Expr>> {
+) -> Result<Expr> {
     if id_array.is_empty() {
         Ok(Transformed::no(expr))
     } else {
@@ -1160,6 +1210,7 @@ fn replace_common_expr<'n>(
             alias_generator,
         })
     }
+    .data()
 }
 
 #[cfg(test)]
@@ -1178,42 +1229,22 @@ mod test {
     };
     use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder};
 
+    use super::*;
     use crate::optimizer::OptimizerContext;
     use crate::test::*;
+    use crate::Optimizer;
     use datafusion_expr::test::function_stub::{avg, sum};
 
-    use super::*;
-
-    fn assert_non_optimized_plan_eq(
-        expected: &str,
-        plan: LogicalPlan,
-        config: Option<&dyn OptimizerConfig>,
-    ) {
-        assert_eq!(expected, format!("{plan}"), "Unexpected starting plan");
-        let optimizer = CommonSubexprEliminate::new();
-        let default_config = OptimizerContext::new();
-        let config = config.unwrap_or(&default_config);
-        let optimized_plan = optimizer.rewrite(plan, config).unwrap();
-        assert!(!optimized_plan.transformed, "unexpectedly optimize plan");
-        let optimized_plan = optimized_plan.data;
-        assert_eq!(
-            expected,
-            format!("{optimized_plan}"),
-            "Unexpected optimized plan"
-        );
-    }
-
     fn assert_optimized_plan_eq(
         expected: &str,
         plan: LogicalPlan,
         config: Option<&dyn OptimizerConfig>,
     ) {
-        let optimizer = CommonSubexprEliminate::new();
+        let optimizer =
+            Optimizer::with_rules(vec![Arc::new(CommonSubexprEliminate::new())]);
         let default_config = OptimizerContext::new();
         let config = config.unwrap_or(&default_config);
-        let optimized_plan = optimizer.rewrite(plan, config).unwrap();
-        assert!(optimized_plan.transformed, "failed to optimize plan");
-        let optimized_plan = optimized_plan.data;
+        let optimized_plan = optimizer.optimize(plan, config, |_, _| ()).unwrap();
         let formatted_plan = format!("{optimized_plan}");
         assert_eq!(expected, formatted_plan);
     }
@@ -1603,7 +1634,7 @@ mod test {
         let expected = "Projection: Int32(1) + test.a, test.a + Int32(1)\
         \n  TableScan: test";
 
-        assert_non_optimized_plan_eq(expected, plan, None);
+        assert_optimized_plan_eq(expected, plan, None);
 
         Ok(())
     }
@@ -1621,41 +1652,25 @@ mod test {
         \n  Projection: Int32(1) + test.a, test.a\
         \n    TableScan: test";
 
-        assert_non_optimized_plan_eq(expected, plan, None);
+        assert_optimized_plan_eq(expected, plan, None);
         Ok(())
     }
 
-    fn test_identifier(hash: u64, expr: &Expr) -> Identifier {
-        Identifier { hash, expr }
-    }
-
     #[test]
     fn redundant_project_fields() {
         let table_scan = test_table_scan().unwrap();
         let c_plus_a = col("c") + col("a");
         let b_plus_a = col("b") + col("a");
-        let common_exprs_1 = CommonExprs::from([
-            (
-                test_identifier(0, &c_plus_a),
-                (c_plus_a.clone(), format!("{CSE_PREFIX}_1")),
-            ),
-            (
-                test_identifier(1, &b_plus_a),
-                (b_plus_a.clone(), format!("{CSE_PREFIX}_2")),
-            ),
-        ]);
+        let common_exprs_1 = vec![
+            (c_plus_a, format!("{CSE_PREFIX}_1")),
+            (b_plus_a, format!("{CSE_PREFIX}_2")),
+        ];
         let c_plus_a_2 = col(format!("{CSE_PREFIX}_1"));
         let b_plus_a_2 = col(format!("{CSE_PREFIX}_2"));
-        let common_exprs_2 = CommonExprs::from([
-            (
-                test_identifier(3, &c_plus_a_2),
-                (c_plus_a_2.clone(), format!("{CSE_PREFIX}_3")),
-            ),
-            (
-                test_identifier(4, &b_plus_a_2),
-                (b_plus_a_2.clone(), format!("{CSE_PREFIX}_4")),
-            ),
-        ]);
+        let common_exprs_2 = vec![
+            (c_plus_a_2, format!("{CSE_PREFIX}_3")),
+            (b_plus_a_2, format!("{CSE_PREFIX}_4")),
+        ];
         let project = build_common_expr_project_plan(table_scan, common_exprs_1).unwrap();
         let project_2 = build_common_expr_project_plan(project, common_exprs_2).unwrap();
 
@@ -1676,28 +1691,16 @@ mod test {
             .unwrap();
         let c_plus_a = col("test1.c") + col("test1.a");
         let b_plus_a = col("test1.b") + col("test1.a");
-        let common_exprs_1 = CommonExprs::from([
-            (
-                test_identifier(0, &c_plus_a),
-                (c_plus_a.clone(), format!("{CSE_PREFIX}_1")),
-            ),
-            (
-                test_identifier(1, &b_plus_a),
-                (b_plus_a.clone(), format!("{CSE_PREFIX}_2")),
-            ),
-        ]);
+        let common_exprs_1 = vec![
+            (c_plus_a, format!("{CSE_PREFIX}_1")),
+            (b_plus_a, format!("{CSE_PREFIX}_2")),
+        ];
         let c_plus_a_2 = col(format!("{CSE_PREFIX}_1"));
         let b_plus_a_2 = col(format!("{CSE_PREFIX}_2"));
-        let common_exprs_2 = CommonExprs::from([
-            (
-                test_identifier(3, &c_plus_a_2),
-                (c_plus_a_2.clone(), format!("{CSE_PREFIX}_3")),
-            ),
-            (
-                test_identifier(4, &b_plus_a_2),
-                (b_plus_a_2.clone(), format!("{CSE_PREFIX}_4")),
-            ),
-        ]);
+        let common_exprs_2 = vec![
+            (c_plus_a_2, format!("{CSE_PREFIX}_3")),
+            (b_plus_a_2, format!("{CSE_PREFIX}_4")),
+        ];
         let project = build_common_expr_project_plan(join, common_exprs_1).unwrap();
         let project_2 = build_common_expr_project_plan(project, common_exprs_2).unwrap();
 
@@ -1963,6 +1966,52 @@ mod test {
         Ok(())
     }
 
+    #[test]
+    fn test_non_top_level_common_expression() -> Result<()> {
+        let table_scan = test_table_scan()?;
+
+        let common_expr = col("a") + col("b");
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .project(vec![
+                common_expr.clone().alias("c1"),
+                common_expr.alias("c2"),
+            ])?
+            .project(vec![col("c1"), col("c2")])?
+            .build()?;
+
+        let expected = "Projection: c1, c2\
+        \n  Projection: __common_expr_1 AS c1, __common_expr_1 AS c2\
+        \n    Projection: test.a + test.b AS __common_expr_1, test.a, test.b, test.c\
+        \n      TableScan: test";
+
+        assert_optimized_plan_eq(expected, plan, None);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_nested_common_expression() -> Result<()> {
+        let table_scan = test_table_scan()?;
+
+        let nested_common_expr = col("a") + col("b");
+        let common_expr = nested_common_expr.clone() * nested_common_expr;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .project(vec![
+                common_expr.clone().alias("c1"),
+                common_expr.alias("c2"),
+            ])?
+            .build()?;
+
+        let expected = "Projection: __common_expr_1 AS c1, __common_expr_1 AS c2\
+        \n  Projection: __common_expr_2 * __common_expr_2 AS __common_expr_1, test.a, test.b, test.c\
+        \n    Projection: test.a + test.b AS __common_expr_2, test.a, test.b, test.c\
+        \n      TableScan: test";
+
+        assert_optimized_plan_eq(expected, plan, None);
+
+        Ok(())
+    }
+
     /// returns a "random" function that is marked volatile (aka each invocation
     /// returns a different value)
     ///

From e088945c38b74bb1d86dcbb88a69dfc21d59e375 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Fri, 9 Aug 2024 22:21:21 +0800
Subject: [PATCH 259/357] UDAF refactor: Add PhysicalExpr trait dependency on
 `datafusion-expr` and remove logical expressions requirement for creating
 physical aggregate expression (#11845)

* init draft

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* production ready

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix merge conflict

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* mv accumulator out

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rename

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix doc and cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* clippy + doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rename exprs

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm create_aggregate_expr_with_dfschema

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* revert change in calc_requirements

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* doc and cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm dfschema

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm input types

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rename return_type

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* upd doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* move group accumulator adapter to functions-aggregate-common

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 Cargo.toml                                    |  10 +-
 datafusion-cli/Cargo.lock                     |  47 +-
 datafusion/core/Cargo.toml                    |   1 +
 datafusion/core/src/lib.rs                    |   5 +
 .../combine_partial_final_agg.rs              |   2 +-
 .../src/physical_optimizer/limit_pushdown.rs  |   3 +-
 datafusion/core/src/physical_planner.rs       |  39 +-
 datafusion/core/src/test_util/mod.rs          |   2 +-
 .../core/tests/fuzz_cases/aggregate_fuzz.rs   |   2 +-
 datafusion/expr-common/Cargo.toml             |  43 +
 .../{expr => expr-common}/src/accumulator.rs  |   0
 .../src/columnar_value.rs                     |   0
 .../src/groups_accumulator.rs                 |   8 +-
 .../src/interval_arithmetic.rs                |  32 +-
 .../mod.rs => expr-common/src/lib.rs}         |  26 +-
 .../{expr => expr-common}/src/operator.rs     | 206 -----
 .../{expr => expr-common}/src/signature.rs    |  13 +-
 .../src/sort_properties.rs                    |   0
 .../src/type_coercion.rs}                     |   3 +-
 .../src/type_coercion/aggregates.rs           |   2 +-
 .../src/type_coercion/binary.rs               |   4 +-
 datafusion/expr/Cargo.toml                    |   5 +-
 datafusion/expr/src/function.rs               |  90 +-
 datafusion/expr/src/lib.rs                    |  33 +-
 datafusion/expr/src/operation.rs              | 222 +++++
 .../expr/src/type_coercion/functions.rs       |   6 +-
 datafusion/expr/src/type_coercion/mod.rs      |   7 +-
 datafusion/expr/src/udaf.rs                   |   2 +-
 datafusion/expr/src/udf.rs                    |  17 +-
 datafusion/expr/src/utils.rs                  |  35 +-
 .../functions-aggregate-common/Cargo.toml     |  46 +
 .../src/accumulator.rs                        |  96 +++
 .../src/aggregate.rs                          | 182 ++++
 .../src/aggregate/count_distinct.rs}          |   0
 .../src/aggregate/count_distinct/bytes.rs     |   6 +-
 .../src/aggregate/count_distinct/native.rs    |   4 +-
 .../src/aggregate/groups_accumulator.rs}      |  11 +-
 .../groups_accumulator/accumulate.rs          |   6 +-
 .../aggregate/groups_accumulator/bool_op.rs   |   2 +-
 .../aggregate/groups_accumulator/prim_op.rs   |   2 +-
 .../src/lib.rs}                               |  23 +-
 .../src}/merge_arrays.rs                      |   0
 .../functions-aggregate-common/src/order.rs   |  47 +
 .../src}/stats.rs                             |   0
 .../src}/tdigest.rs                           |   0
 .../src}/utils.rs                             |   7 +-
 datafusion/functions-aggregate/Cargo.toml     |   4 +-
 .../functions-aggregate/benches/count.rs      |  11 +-
 datafusion/functions-aggregate/benches/sum.rs |  11 +-
 .../src/approx_distinct.rs                    |   4 +-
 .../functions-aggregate/src/approx_median.rs  |   2 +-
 .../src/approx_percentile_cont.rs             | 138 ++-
 .../src/approx_percentile_cont_with_weight.rs |  11 +-
 .../functions-aggregate/src/array_agg.rs      |  33 +-
 datafusion/functions-aggregate/src/average.rs |  34 +-
 .../functions-aggregate/src/bit_and_or_xor.rs |   8 +-
 .../functions-aggregate/src/bool_and_or.rs    |  10 +-
 .../functions-aggregate/src/correlation.rs    |   2 +-
 datafusion/functions-aggregate/src/count.rs   |  20 +-
 .../functions-aggregate/src/covariance.rs     |   2 +-
 .../functions-aggregate/src/first_last.rs     |  37 +-
 datafusion/functions-aggregate/src/median.rs  |   4 +-
 datafusion/functions-aggregate/src/min_max.rs |  18 +-
 .../functions-aggregate/src/nth_value.rs      |  51 +-
 datafusion/functions-aggregate/src/stddev.rs  |  31 +-
 .../functions-aggregate/src/string_agg.rs     |  30 +-
 datafusion/functions-aggregate/src/sum.rs     |  22 +-
 .../functions-aggregate/src/variance.rs       |   2 +-
 datafusion/physical-expr-common/Cargo.toml    |   2 +-
 .../physical-expr-common/src/aggregate/mod.rs | 807 ------------------
 .../physical-expr-common/src/binary_map.rs    |   2 +-
 datafusion/physical-expr-common/src/datum.rs  |   3 +-
 .../src/expressions/mod.rs                    |  23 -
 datafusion/physical-expr-common/src/lib.rs    |   7 +-
 .../physical-expr-common/src/physical_expr.rs |  39 +-
 .../physical-expr-common/src/sort_expr.rs     |  31 +-
 datafusion/physical-expr-common/src/utils.rs  |  40 +-
 .../Cargo.toml                                |  48 ++
 .../src/aggregate.rs                          | 486 +++++++++++
 .../src/lib.rs}                               |   6 +-
 datafusion/physical-expr/Cargo.toml           |   2 +
 datafusion/physical-expr/benches/case_when.rs |   4 +-
 datafusion/physical-expr/benches/is_null.rs   |   3 +-
 .../physical-expr/src/equivalence/class.rs    |   2 +-
 .../src/equivalence/properties.rs             |   5 +-
 .../physical-expr/src/expressions/binary.rs   |   4 +-
 .../physical-expr/src/expressions/case.rs     |   6 +-
 .../src/expressions/cast.rs                   |  10 +-
 .../src/expressions/column.rs                 |  33 +-
 .../src/expressions/literal.rs                |   7 +-
 .../physical-expr/src/expressions/mod.rs      |   9 +-
 datafusion/physical-expr/src/lib.rs           |  26 +-
 datafusion/physical-expr/src/physical_expr.rs |   2 +-
 datafusion/physical-plan/Cargo.toml           |   2 +
 .../physical-plan/src/aggregates/mod.rs       | 199 +----
 datafusion/physical-plan/src/lib.rs           |   4 +-
 datafusion/physical-plan/src/union.rs         |   2 +-
 datafusion/physical-plan/src/windows/mod.rs   |  22 +-
 datafusion/proto/src/physical_plan/mod.rs     |   2 +-
 .../tests/cases/roundtrip_physical_plan.rs    |   2 +-
 100 files changed, 1776 insertions(+), 1846 deletions(-)
 create mode 100644 datafusion/expr-common/Cargo.toml
 rename datafusion/{expr => expr-common}/src/accumulator.rs (100%)
 rename datafusion/{expr => expr-common}/src/columnar_value.rs (100%)
 rename datafusion/{expr => expr-common}/src/groups_accumulator.rs (97%)
 rename datafusion/{expr => expr-common}/src/interval_arithmetic.rs (99%)
 rename datafusion/{physical-expr/src/aggregate/mod.rs => expr-common/src/lib.rs} (56%)
 rename datafusion/{expr => expr-common}/src/operator.rs (67%)
 rename datafusion/{expr => expr-common}/src/signature.rs (97%)
 rename datafusion/{expr => expr-common}/src/sort_properties.rs (100%)
 rename datafusion/{physical-expr/src/aggregate/stats.rs => expr-common/src/type_coercion.rs} (91%)
 rename datafusion/{expr => expr-common}/src/type_coercion/aggregates.rs (99%)
 rename datafusion/{expr => expr-common}/src/type_coercion/binary.rs (99%)
 create mode 100644 datafusion/expr/src/operation.rs
 create mode 100644 datafusion/functions-aggregate-common/Cargo.toml
 create mode 100644 datafusion/functions-aggregate-common/src/accumulator.rs
 create mode 100644 datafusion/functions-aggregate-common/src/aggregate.rs
 rename datafusion/{physical-expr-common/src/aggregate/count_distinct/mod.rs => functions-aggregate-common/src/aggregate/count_distinct.rs} (100%)
 rename datafusion/{physical-expr-common => functions-aggregate-common}/src/aggregate/count_distinct/bytes.rs (95%)
 rename datafusion/{physical-expr-common => functions-aggregate-common}/src/aggregate/count_distinct/native.rs (98%)
 rename datafusion/{physical-expr/src/aggregate/groups_accumulator/adapter.rs => functions-aggregate-common/src/aggregate/groups_accumulator.rs} (97%)
 rename datafusion/{physical-expr-common => functions-aggregate-common}/src/aggregate/groups_accumulator/accumulate.rs (99%)
 rename datafusion/{physical-expr-common => functions-aggregate-common}/src/aggregate/groups_accumulator/bool_op.rs (98%)
 rename datafusion/{physical-expr-common => functions-aggregate-common}/src/aggregate/groups_accumulator/prim_op.rs (98%)
 rename datafusion/{physical-expr/src/aggregate/groups_accumulator/mod.rs => functions-aggregate-common/src/lib.rs} (59%)
 rename datafusion/{physical-expr-common/src/aggregate => functions-aggregate-common/src}/merge_arrays.rs (100%)
 create mode 100644 datafusion/functions-aggregate-common/src/order.rs
 rename datafusion/{physical-expr-common/src/aggregate => functions-aggregate-common/src}/stats.rs (100%)
 rename datafusion/{physical-expr-common/src/aggregate => functions-aggregate-common/src}/tdigest.rs (100%)
 rename datafusion/{physical-expr-common/src/aggregate => functions-aggregate-common/src}/utils.rs (98%)
 delete mode 100644 datafusion/physical-expr-common/src/aggregate/mod.rs
 delete mode 100644 datafusion/physical-expr-common/src/expressions/mod.rs
 create mode 100644 datafusion/physical-expr-functions-aggregate/Cargo.toml
 create mode 100644 datafusion/physical-expr-functions-aggregate/src/aggregate.rs
 rename datafusion/{physical-expr-common/src/aggregate/groups_accumulator/mod.rs => physical-expr-functions-aggregate/src/lib.rs} (87%)
 rename datafusion/{physical-expr-common => physical-expr}/src/expressions/cast.rs (98%)
 rename datafusion/{physical-expr-common => physical-expr}/src/expressions/column.rs (82%)
 rename datafusion/{physical-expr-common => physical-expr}/src/expressions/literal.rs (95%)

diff --git a/Cargo.toml b/Cargo.toml
index 3431c4673e0cf..02b1f1ccd92a9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,13 +23,16 @@ members = [
     "datafusion/catalog",
     "datafusion/core",
     "datafusion/expr",
+    "datafusion/expr-common",
     "datafusion/execution",
-    "datafusion/functions-aggregate",
     "datafusion/functions",
+    "datafusion/functions-aggregate",
+    "datafusion/functions-aggregate-common",
     "datafusion/functions-nested",
     "datafusion/optimizer",
-    "datafusion/physical-expr-common",
     "datafusion/physical-expr",
+    "datafusion/physical-expr-common",
+    "datafusion/physical-expr-functions-aggregate",
     "datafusion/physical-optimizer",
     "datafusion/physical-plan",
     "datafusion/proto",
@@ -94,12 +97,15 @@ datafusion-common = { path = "datafusion/common", version = "41.0.0", default-fe
 datafusion-common-runtime = { path = "datafusion/common-runtime", version = "41.0.0" }
 datafusion-execution = { path = "datafusion/execution", version = "41.0.0" }
 datafusion-expr = { path = "datafusion/expr", version = "41.0.0" }
+datafusion-expr-common = { path = "datafusion/expr-common", version = "41.0.0" }
 datafusion-functions = { path = "datafusion/functions", version = "41.0.0" }
 datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "41.0.0" }
+datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "41.0.0" }
 datafusion-functions-nested = { path = "datafusion/functions-nested", version = "41.0.0" }
 datafusion-optimizer = { path = "datafusion/optimizer", version = "41.0.0", default-features = false }
 datafusion-physical-expr = { path = "datafusion/physical-expr", version = "41.0.0", default-features = false }
 datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "41.0.0", default-features = false }
+datafusion-physical-expr-functions-aggregate = { path = "datafusion/physical-expr-functions-aggregate", version = "41.0.0" }
 datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "41.0.0" }
 datafusion-physical-plan = { path = "datafusion/physical-plan", version = "41.0.0" }
 datafusion-proto = { path = "datafusion/proto", version = "41.0.0" }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 2eb93da7c020e..134cde8976d67 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1153,6 +1153,7 @@ dependencies = [
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
+ "datafusion-physical-expr-functions-aggregate",
  "datafusion-physical-optimizer",
  "datafusion-physical-plan",
  "datafusion-sql",
@@ -1278,6 +1279,9 @@ dependencies = [
  "arrow-buffer",
  "chrono",
  "datafusion-common",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
+ "datafusion-physical-expr-common",
  "paste",
  "serde_json",
  "sqlparser",
@@ -1285,6 +1289,15 @@ dependencies = [
  "strum_macros 0.26.4",
 ]
 
+[[package]]
+name = "datafusion-expr-common"
+version = "41.0.0"
+dependencies = [
+ "arrow",
+ "datafusion-common",
+ "paste",
+]
+
 [[package]]
 name = "datafusion-functions"
 version = "41.0.0"
@@ -1320,12 +1333,26 @@ dependencies = [
  "datafusion-common",
  "datafusion-execution",
  "datafusion-expr",
+ "datafusion-functions-aggregate-common",
+ "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "log",
  "paste",
  "sqlparser",
 ]
 
+[[package]]
+name = "datafusion-functions-aggregate-common"
+version = "41.0.0"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr-common",
+ "datafusion-physical-expr-common",
+ "rand",
+]
+
 [[package]]
 name = "datafusion-functions-nested"
 version = "41.0.0"
@@ -1380,6 +1407,8 @@ dependencies = [
  "datafusion-common",
  "datafusion-execution",
  "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
  "datafusion-physical-expr-common",
  "half",
  "hashbrown 0.14.5",
@@ -1399,11 +1428,25 @@ dependencies = [
  "ahash",
  "arrow",
  "datafusion-common",
- "datafusion-expr",
+ "datafusion-expr-common",
  "hashbrown 0.14.5",
  "rand",
 ]
 
+[[package]]
+name = "datafusion-physical-expr-functions-aggregate"
+version = "41.0.0"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
+ "datafusion-physical-expr-common",
+ "rand",
+]
+
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "41.0.0"
@@ -1431,8 +1474,10 @@ dependencies = [
  "datafusion-execution",
  "datafusion-expr",
  "datafusion-functions-aggregate",
+ "datafusion-functions-aggregate-common",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
+ "datafusion-physical-expr-functions-aggregate",
  "futures",
  "half",
  "hashbrown 0.14.5",
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 0714c3e94a85a..e678c93ede8be 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -109,6 +109,7 @@ datafusion-functions-nested = { workspace = true, optional = true }
 datafusion-optimizer = { workspace = true }
 datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
+datafusion-physical-expr-functions-aggregate = { workspace = true }
 datafusion-physical-optimizer = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 datafusion-sql = { workspace = true }
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index d4b82f288bdd3..6b3773e4f6d56 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -556,6 +556,11 @@ pub mod physical_expr_common {
     pub use datafusion_physical_expr_common::*;
 }
 
+/// re-export of [`datafusion_physical_expr_functions_aggregate`] crate
+pub mod physical_expr_functions_aggregate {
+    pub use datafusion_physical_expr_functions_aggregate::*;
+}
+
 /// re-export of [`datafusion_physical_expr`] crate
 pub mod physical_expr {
     pub use datafusion_physical_expr::*;
diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index 843efcc7b0d28..f65a4c837a601 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -177,7 +177,7 @@ mod tests {
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_functions_aggregate::sum::sum_udaf;
     use datafusion_physical_expr::expressions::col;
-    use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
+    use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
 
     /// Runs the CombinePartialFinalAggregate optimizer and asserts the plan against the expected
     macro_rules! assert_optimized {
diff --git a/datafusion/core/src/physical_optimizer/limit_pushdown.rs b/datafusion/core/src/physical_optimizer/limit_pushdown.rs
index 4379a34a94267..d02737ff09599 100644
--- a/datafusion/core/src/physical_optimizer/limit_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/limit_pushdown.rs
@@ -258,9 +258,8 @@ mod tests {
     use datafusion_execution::{SendableRecordBatchStream, TaskContext};
     use datafusion_expr::Operator;
     use datafusion_physical_expr::expressions::BinaryExpr;
+    use datafusion_physical_expr::expressions::{col, lit};
     use datafusion_physical_expr::Partitioning;
-    use datafusion_physical_expr_common::expressions::column::col;
-    use datafusion_physical_expr_common::expressions::lit;
     use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
     use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
     use datafusion_physical_plan::empty::EmptyExec;
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index ab0765ac0deb7..7eb468f56eeb1 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -58,7 +58,7 @@ use crate::physical_plan::unnest::UnnestExec;
 use crate::physical_plan::values::ValuesExec;
 use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
 use crate::physical_plan::{
-    displayable, udaf, windows, AggregateExpr, ExecutionPlan, ExecutionPlanProperties,
+    displayable, windows, AggregateExpr, ExecutionPlan, ExecutionPlanProperties,
     InputOrderMode, Partitioning, PhysicalExpr, WindowExpr,
 };
 
@@ -73,7 +73,8 @@ use datafusion_common::{
 };
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
-    self, physical_name, AggregateFunction, Alias, GroupingSet, WindowFunction,
+    self, create_function_physical_name, physical_name, AggregateFunction, Alias,
+    GroupingSet, WindowFunction,
 };
 use datafusion_expr::expr_rewriter::unnormalize_cols;
 use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
@@ -83,6 +84,7 @@ use datafusion_expr::{
 };
 use datafusion_physical_expr::expressions::Literal;
 use datafusion_physical_expr::LexOrdering;
+use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_sql::utils::window_expr_common_partition_keys;
 
@@ -1559,6 +1561,17 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
             order_by,
             null_treatment,
         }) => {
+            let name = if let Some(name) = name {
+                name
+            } else {
+                create_function_physical_name(
+                    func.name(),
+                    *distinct,
+                    args,
+                    order_by.as_ref(),
+                )?
+            };
+
             let physical_args =
                 create_physical_exprs(args, logical_input_schema, execution_props)?;
             let filter = match filter {
@@ -1575,7 +1588,6 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                 == NullTreatment::IgnoreNulls;
 
             let (agg_expr, filter, order_by) = {
-                let sort_exprs = order_by.clone().unwrap_or(vec![]);
                 let physical_sort_exprs = match order_by {
                     Some(exprs) => Some(create_physical_sort_exprs(
                         exprs,
@@ -1588,18 +1600,15 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                 let ordering_reqs: Vec<PhysicalSortExpr> =
                     physical_sort_exprs.clone().unwrap_or(vec![]);
 
-                let agg_expr = udaf::create_aggregate_expr_with_dfschema(
-                    func,
-                    &physical_args,
-                    args,
-                    &sort_exprs,
-                    &ordering_reqs,
-                    logical_input_schema,
-                    name,
-                    ignore_nulls,
-                    *distinct,
-                    false,
-                )?;
+                let schema: Schema = logical_input_schema.clone().into();
+                let agg_expr =
+                    AggregateExprBuilder::new(func.to_owned(), physical_args.to_vec())
+                        .order_by(ordering_reqs.to_vec())
+                        .schema(Arc::new(schema))
+                        .alias(name)
+                        .with_ignore_nulls(ignore_nulls)
+                        .with_distinct(*distinct)
+                        .build()?;
 
                 (agg_expr, filter, physical_sort_exprs)
             };
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index 937344ef5e4ed..ca8376fdec0a8 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -54,7 +54,7 @@ use datafusion_physical_expr::{
 
 use async_trait::async_trait;
 use datafusion_catalog::Session;
-use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
+use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
 use futures::Stream;
 use tempfile::TempDir;
 // backwards compatibility
diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index 4cecb0b69335c..138e5bda7f394 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -25,6 +25,7 @@ use arrow::util::pretty::pretty_format_batches;
 use arrow_array::types::Int64Type;
 use datafusion::common::Result;
 use datafusion::datasource::MemTable;
+use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
 use datafusion::physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
@@ -35,7 +36,6 @@ use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}
 use datafusion_functions_aggregate::sum::sum_udaf;
 use datafusion_physical_expr::expressions::col;
 use datafusion_physical_expr::PhysicalSortExpr;
-use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
 use datafusion_physical_plan::InputOrderMode;
 use test_utils::{add_empty_batches, StringBatchGenerator};
 
diff --git a/datafusion/expr-common/Cargo.toml b/datafusion/expr-common/Cargo.toml
new file mode 100644
index 0000000000000..7e477efc4ebc1
--- /dev/null
+++ b/datafusion/expr-common/Cargo.toml
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-expr-common"
+description = "Logical plan and expression representation for DataFusion query engine"
+keywords = ["datafusion", "logical", "plan", "expressions"]
+readme = "README.md"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+rust-version = { workspace = true }
+
+[lints]
+workspace = true
+
+[lib]
+name = "datafusion_expr_common"
+path = "src/lib.rs"
+
+[features]
+
+[dependencies]
+arrow = { workspace = true }
+datafusion-common = { workspace = true }
+paste = "^1.0"
diff --git a/datafusion/expr/src/accumulator.rs b/datafusion/expr-common/src/accumulator.rs
similarity index 100%
rename from datafusion/expr/src/accumulator.rs
rename to datafusion/expr-common/src/accumulator.rs
diff --git a/datafusion/expr/src/columnar_value.rs b/datafusion/expr-common/src/columnar_value.rs
similarity index 100%
rename from datafusion/expr/src/columnar_value.rs
rename to datafusion/expr-common/src/columnar_value.rs
diff --git a/datafusion/expr/src/groups_accumulator.rs b/datafusion/expr-common/src/groups_accumulator.rs
similarity index 97%
rename from datafusion/expr/src/groups_accumulator.rs
rename to datafusion/expr-common/src/groups_accumulator.rs
index 886bd8443e4d3..e66b27d073d1c 100644
--- a/datafusion/expr/src/groups_accumulator.rs
+++ b/datafusion/expr-common/src/groups_accumulator.rs
@@ -17,7 +17,7 @@
 
 //! Vectorized [`GroupsAccumulator`]
 
-use arrow_array::{ArrayRef, BooleanArray};
+use arrow::array::{ArrayRef, BooleanArray};
 use datafusion_common::{not_impl_err, Result};
 
 /// Describes how many rows should be emitted during grouping.
@@ -75,7 +75,7 @@ impl EmitTo {
 /// expected that each `GroupAccumulator` will use something like `Vec<..>`
 /// to store the group states.
 ///
-/// [`Accumulator`]: crate::Accumulator
+/// [`Accumulator`]: crate::accumulator::Accumulator
 /// [Aggregating Millions of Groups Fast blog]: https://arrow.apache.org/blog/2023/08/05/datafusion_fast_grouping/
 pub trait GroupsAccumulator: Send {
     /// Updates the accumulator's state from its arguments, encoded as
@@ -140,7 +140,7 @@ pub trait GroupsAccumulator: Send {
     /// See [`Self::evaluate`] for details on the required output
     /// order and `emit_to`.
     ///
-    /// [`Accumulator::state`]: crate::Accumulator::state
+    /// [`Accumulator::state`]: crate::accumulator::Accumulator::state
     fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>>;
 
     /// Merges intermediate state (the output from [`Self::state`])
@@ -197,7 +197,7 @@ pub trait GroupsAccumulator: Send {
     /// state directly to the next aggregation phase with minimal processing
     /// using this method.
     ///
-    /// [`Accumulator::state`]: crate::Accumulator::state
+    /// [`Accumulator::state`]: crate::accumulator::Accumulator::state
     fn convert_to_state(
         &self,
         _values: &[ArrayRef],
diff --git a/datafusion/expr/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs
similarity index 99%
rename from datafusion/expr/src/interval_arithmetic.rs
rename to datafusion/expr-common/src/interval_arithmetic.rs
index 553cdd8c87097..e3ff412e785bd 100644
--- a/datafusion/expr/src/interval_arithmetic.rs
+++ b/datafusion/expr-common/src/interval_arithmetic.rs
@@ -17,16 +17,16 @@
 
 //! Interval arithmetic library
 
+use crate::operator::Operator;
 use crate::type_coercion::binary::get_result_type;
-use crate::Operator;
-use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
 use std::borrow::Borrow;
 use std::fmt::{self, Display, Formatter};
 use std::ops::{AddAssign, SubAssign};
 
 use arrow::compute::{cast_with_options, CastOptions};
-use arrow::datatypes::DataType;
-use arrow::datatypes::{IntervalUnit, TimeUnit};
+use arrow::datatypes::{
+    DataType, IntervalDayTime, IntervalMonthDayNano, IntervalUnit, TimeUnit,
+};
 use datafusion_common::rounding::{alter_fp_rounding_mode, next_down, next_up};
 use datafusion_common::{internal_err, Result, ScalarValue};
 
@@ -120,12 +120,12 @@ macro_rules! value_transition {
                 IntervalYearMonth(None)
             }
             IntervalDayTime(Some(value))
-                if value == arrow_buffer::IntervalDayTime::$bound =>
+                if value == arrow::datatypes::IntervalDayTime::$bound =>
             {
                 IntervalDayTime(None)
             }
             IntervalMonthDayNano(Some(value))
-                if value == arrow_buffer::IntervalMonthDayNano::$bound =>
+                if value == arrow::datatypes::IntervalMonthDayNano::$bound =>
             {
                 IntervalMonthDayNano(None)
             }
@@ -1135,12 +1135,12 @@ fn next_value_helper<const INC: bool>(value: ScalarValue) -> ScalarValue {
         }
         IntervalDayTime(Some(val)) => IntervalDayTime(Some(increment_decrement::<
             INC,
-            arrow_buffer::IntervalDayTime,
+            arrow::datatypes::IntervalDayTime,
         >(val))),
         IntervalMonthDayNano(Some(val)) => {
             IntervalMonthDayNano(Some(increment_decrement::<
                 INC,
-                arrow_buffer::IntervalMonthDayNano,
+                arrow::datatypes::IntervalMonthDayNano,
             >(val)))
         }
         _ => value, // Unbounded values return without change.
@@ -1177,7 +1177,7 @@ fn min_of_bounds(first: &ScalarValue, second: &ScalarValue) -> ScalarValue {
 /// Example usage:
 /// ```
 /// use datafusion_common::DataFusionError;
-/// use datafusion_expr::interval_arithmetic::{satisfy_greater, Interval};
+/// use datafusion_expr_common::interval_arithmetic::{satisfy_greater, Interval};
 ///
 /// let left = Interval::make(Some(-1000.0_f32), Some(1000.0_f32))?;
 /// let right = Interval::make(Some(500.0_f32), Some(2000.0_f32))?;
@@ -1552,8 +1552,8 @@ fn cast_scalar_value(
 /// ```
 /// use arrow::datatypes::DataType;
 /// use datafusion_common::ScalarValue;
-/// use datafusion_expr::interval_arithmetic::Interval;
-/// use datafusion_expr::interval_arithmetic::NullableInterval;
+/// use datafusion_expr_common::interval_arithmetic::Interval;
+/// use datafusion_expr_common::interval_arithmetic::NullableInterval;
 ///
 /// // [1, 2) U {NULL}
 /// let maybe_null = NullableInterval::MaybeNull {
@@ -1674,9 +1674,9 @@ impl NullableInterval {
     ///
     /// ```
     /// use datafusion_common::ScalarValue;
-    /// use datafusion_expr::Operator;
-    /// use datafusion_expr::interval_arithmetic::Interval;
-    /// use datafusion_expr::interval_arithmetic::NullableInterval;
+    /// use datafusion_expr_common::operator::Operator;
+    /// use datafusion_expr_common::interval_arithmetic::Interval;
+    /// use datafusion_expr_common::interval_arithmetic::NullableInterval;
     ///
     /// // 4 > 3 -> true
     /// let lhs = NullableInterval::from(ScalarValue::Int32(Some(4)));
@@ -1798,8 +1798,8 @@ impl NullableInterval {
     ///
     /// ```
     /// use datafusion_common::ScalarValue;
-    /// use datafusion_expr::interval_arithmetic::Interval;
-    /// use datafusion_expr::interval_arithmetic::NullableInterval;
+    /// use datafusion_expr_common::interval_arithmetic::Interval;
+    /// use datafusion_expr_common::interval_arithmetic::NullableInterval;
     ///
     /// let interval = NullableInterval::from(ScalarValue::Int32(Some(4)));
     /// assert_eq!(interval.single_value(), Some(ScalarValue::Int32(Some(4))));
diff --git a/datafusion/physical-expr/src/aggregate/mod.rs b/datafusion/expr-common/src/lib.rs
similarity index 56%
rename from datafusion/physical-expr/src/aggregate/mod.rs
rename to datafusion/expr-common/src/lib.rs
index b477a815bf800..179dd75ace85a 100644
--- a/datafusion/physical-expr/src/aggregate/mod.rs
+++ b/datafusion/expr-common/src/lib.rs
@@ -15,14 +15,22 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub(crate) mod groups_accumulator;
-pub(crate) mod stats;
+//! Logical Expr types and traits for [DataFusion]
+//!
+//! This crate contains types and traits that are used by both Logical and Physical expressions.
+//! They are kept in their own crate to avoid physical expressions depending on logical expressions.
+//!  
+//!
+//! [DataFusion]: <https://crates.io/crates/datafusion>
 
-pub mod utils {
-    pub use datafusion_physical_expr_common::aggregate::utils::{
-        adjust_output_array, down_cast_any_ref, get_accum_scalar_values_as_arrays,
-        get_sort_options, ordering_fields, DecimalAverager, Hashable,
-    };
-}
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
-pub use datafusion_physical_expr_common::aggregate::AggregateExpr;
+pub mod accumulator;
+pub mod columnar_value;
+pub mod groups_accumulator;
+pub mod interval_arithmetic;
+pub mod operator;
+pub mod signature;
+pub mod sort_properties;
+pub mod type_coercion;
diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr-common/src/operator.rs
similarity index 67%
rename from datafusion/expr/src/operator.rs
rename to datafusion/expr-common/src/operator.rs
index 9bb8c48d6c71f..e013b6fafa22d 100644
--- a/datafusion/expr/src/operator.rs
+++ b/datafusion/expr-common/src/operator.rs
@@ -15,14 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Operator module contains foundational types that are used to represent operators in DataFusion.
-
-use crate::expr_fn::binary_expr;
-use crate::Expr;
-use crate::Like;
 use std::fmt;
-use std::ops;
-use std::ops::Not;
 
 /// Operators applied to expressions
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Hash)]
@@ -287,202 +280,3 @@ impl fmt::Display for Operator {
         write!(f, "{display}")
     }
 }
-
-/// Support `<expr> + <expr>` fluent style
-impl ops::Add for Expr {
-    type Output = Self;
-
-    fn add(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Plus, rhs)
-    }
-}
-
-/// Support `<expr> - <expr>` fluent style
-impl ops::Sub for Expr {
-    type Output = Self;
-
-    fn sub(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Minus, rhs)
-    }
-}
-
-/// Support `<expr> * <expr>` fluent style
-impl ops::Mul for Expr {
-    type Output = Self;
-
-    fn mul(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Multiply, rhs)
-    }
-}
-
-/// Support `<expr> / <expr>` fluent style
-impl ops::Div for Expr {
-    type Output = Self;
-
-    fn div(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Divide, rhs)
-    }
-}
-
-/// Support `<expr> % <expr>` fluent style
-impl ops::Rem for Expr {
-    type Output = Self;
-
-    fn rem(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Modulo, rhs)
-    }
-}
-
-/// Support `<expr> & <expr>` fluent style
-impl ops::BitAnd for Expr {
-    type Output = Self;
-
-    fn bitand(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::BitwiseAnd, rhs)
-    }
-}
-
-/// Support `<expr> | <expr>` fluent style
-impl ops::BitOr for Expr {
-    type Output = Self;
-
-    fn bitor(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::BitwiseOr, rhs)
-    }
-}
-
-/// Support `<expr> ^ <expr>` fluent style
-impl ops::BitXor for Expr {
-    type Output = Self;
-
-    fn bitxor(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::BitwiseXor, rhs)
-    }
-}
-
-/// Support `<expr> << <expr>` fluent style
-impl ops::Shl for Expr {
-    type Output = Self;
-
-    fn shl(self, rhs: Self) -> Self::Output {
-        binary_expr(self, Operator::BitwiseShiftLeft, rhs)
-    }
-}
-
-/// Support `<expr> >> <expr>` fluent style
-impl ops::Shr for Expr {
-    type Output = Self;
-
-    fn shr(self, rhs: Self) -> Self::Output {
-        binary_expr(self, Operator::BitwiseShiftRight, rhs)
-    }
-}
-
-/// Support `- <expr>` fluent style
-impl ops::Neg for Expr {
-    type Output = Self;
-
-    fn neg(self) -> Self::Output {
-        Expr::Negative(Box::new(self))
-    }
-}
-
-/// Support `NOT <expr>` fluent style
-impl Not for Expr {
-    type Output = Self;
-
-    fn not(self) -> Self::Output {
-        match self {
-            Expr::Like(Like {
-                negated,
-                expr,
-                pattern,
-                escape_char,
-                case_insensitive,
-            }) => Expr::Like(Like::new(
-                !negated,
-                expr,
-                pattern,
-                escape_char,
-                case_insensitive,
-            )),
-            Expr::SimilarTo(Like {
-                negated,
-                expr,
-                pattern,
-                escape_char,
-                case_insensitive,
-            }) => Expr::SimilarTo(Like::new(
-                !negated,
-                expr,
-                pattern,
-                escape_char,
-                case_insensitive,
-            )),
-            _ => Expr::Not(Box::new(self)),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::lit;
-
-    #[test]
-    fn test_operators() {
-        // Add
-        assert_eq!(
-            format!("{}", lit(1u32) + lit(2u32)),
-            "UInt32(1) + UInt32(2)"
-        );
-        // Sub
-        assert_eq!(
-            format!("{}", lit(1u32) - lit(2u32)),
-            "UInt32(1) - UInt32(2)"
-        );
-        // Mul
-        assert_eq!(
-            format!("{}", lit(1u32) * lit(2u32)),
-            "UInt32(1) * UInt32(2)"
-        );
-        // Div
-        assert_eq!(
-            format!("{}", lit(1u32) / lit(2u32)),
-            "UInt32(1) / UInt32(2)"
-        );
-        // Rem
-        assert_eq!(
-            format!("{}", lit(1u32) % lit(2u32)),
-            "UInt32(1) % UInt32(2)"
-        );
-        // BitAnd
-        assert_eq!(
-            format!("{}", lit(1u32) & lit(2u32)),
-            "UInt32(1) & UInt32(2)"
-        );
-        // BitOr
-        assert_eq!(
-            format!("{}", lit(1u32) | lit(2u32)),
-            "UInt32(1) | UInt32(2)"
-        );
-        // BitXor
-        assert_eq!(
-            format!("{}", lit(1u32) ^ lit(2u32)),
-            "UInt32(1) BIT_XOR UInt32(2)"
-        );
-        // Shl
-        assert_eq!(
-            format!("{}", lit(1u32) << lit(2u32)),
-            "UInt32(1) << UInt32(2)"
-        );
-        // Shr
-        assert_eq!(
-            format!("{}", lit(1u32) >> lit(2u32)),
-            "UInt32(1) >> UInt32(2)"
-        );
-        // Neg
-        assert_eq!(format!("{}", -lit(1u32)), "(- UInt32(1))");
-        // Not
-        assert_eq!(format!("{}", !lit(1u32)), "NOT UInt32(1)");
-    }
-}
diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr-common/src/signature.rs
similarity index 97%
rename from datafusion/expr/src/signature.rs
rename to datafusion/expr-common/src/signature.rs
index 577c663142a1e..4dcfa423e3718 100644
--- a/datafusion/expr/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -75,7 +75,7 @@ pub enum Volatility {
 ///
 /// ```
 /// # use arrow::datatypes::{DataType, TimeUnit};
-/// # use datafusion_expr::{TIMEZONE_WILDCARD, TypeSignature};
+/// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature};
 /// let type_signature = TypeSignature::Exact(vec![
 ///   // A nanosecond precision timestamp with ANY timezone
 ///   // matches  Timestamp(Nanosecond, Some("+0:00"))
@@ -93,9 +93,7 @@ pub enum TypeSignature {
     Variadic(Vec<DataType>),
     /// The acceptable signature and coercions rules to coerce arguments to this
     /// signature are special for this function. If this signature is specified,
-    /// DataFusion will call [`ScalarUDFImpl::coerce_types`] to prepare argument types.
-    ///
-    /// [`ScalarUDFImpl::coerce_types`]: crate::udf::ScalarUDFImpl::coerce_types
+    /// DataFusion will call `ScalarUDFImpl::coerce_types` to prepare argument types.
     UserDefined,
     /// One or more arguments with arbitrary types
     VariadicAny,
@@ -176,7 +174,7 @@ impl std::fmt::Display for ArrayFunctionSignature {
 }
 
 impl TypeSignature {
-    pub(crate) fn to_string_repr(&self) -> Vec<String> {
+    pub fn to_string_repr(&self) -> Vec<String> {
         match self {
             TypeSignature::Variadic(types) => {
                 vec![format!("{}, ..", Self::join_types(types, "/"))]
@@ -213,10 +211,7 @@ impl TypeSignature {
     }
 
     /// Helper function to join types with specified delimiter.
-    pub(crate) fn join_types<T: std::fmt::Display>(
-        types: &[T],
-        delimiter: &str,
-    ) -> String {
+    pub fn join_types<T: std::fmt::Display>(types: &[T], delimiter: &str) -> String {
         types
             .iter()
             .map(|t| t.to_string())
diff --git a/datafusion/expr/src/sort_properties.rs b/datafusion/expr-common/src/sort_properties.rs
similarity index 100%
rename from datafusion/expr/src/sort_properties.rs
rename to datafusion/expr-common/src/sort_properties.rs
diff --git a/datafusion/physical-expr/src/aggregate/stats.rs b/datafusion/expr-common/src/type_coercion.rs
similarity index 91%
rename from datafusion/physical-expr/src/aggregate/stats.rs
rename to datafusion/expr-common/src/type_coercion.rs
index d9338f5a962f7..e934c6eaf35b1 100644
--- a/datafusion/physical-expr/src/aggregate/stats.rs
+++ b/datafusion/expr-common/src/type_coercion.rs
@@ -15,4 +15,5 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub use datafusion_physical_expr_common::aggregate::stats::StatsType;
+pub mod aggregates;
+pub mod binary;
diff --git a/datafusion/expr/src/type_coercion/aggregates.rs b/datafusion/expr-common/src/type_coercion/aggregates.rs
similarity index 99%
rename from datafusion/expr/src/type_coercion/aggregates.rs
rename to datafusion/expr-common/src/type_coercion/aggregates.rs
index e7e58bf84362e..40ee596eee05a 100644
--- a/datafusion/expr/src/type_coercion/aggregates.rs
+++ b/datafusion/expr-common/src/type_coercion/aggregates.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::TypeSignature;
+use crate::signature::TypeSignature;
 use arrow::datatypes::{
     DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
     DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
similarity index 99%
rename from datafusion/expr/src/type_coercion/binary.rs
rename to datafusion/expr-common/src/type_coercion/binary.rs
index 6de0118f6bae7..05e365a0b9885 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -20,7 +20,7 @@
 use std::collections::HashSet;
 use std::sync::Arc;
 
-use crate::Operator;
+use crate::operator::Operator;
 
 use arrow::array::{new_empty_array, Array};
 use arrow::compute::can_cast_types;
@@ -569,7 +569,7 @@ fn string_temporal_coercion(
 }
 
 /// Coerce `lhs_type` and `rhs_type` to a common type where both are numeric
-pub(crate) fn binary_numeric_coercion(
+pub fn binary_numeric_coercion(
     lhs_type: &DataType,
     rhs_type: &DataType,
 ) -> Option<DataType> {
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 1b6878b6f49e8..b5d34d9a3834f 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -43,7 +43,10 @@ arrow = { workspace = true }
 arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 chrono = { workspace = true }
-datafusion-common = { workspace = true, default-features = true }
+datafusion-common = { workspace = true }
+datafusion-expr-common = { workspace = true }
+datafusion-functions-aggregate-common = { workspace = true }
+datafusion-physical-expr-common = { workspace = true }
 paste = "^1.0"
 serde_json = { workspace = true }
 sqlparser = { workspace = true }
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index d8be2b4347323..cd7a0c8aa918c 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -18,11 +18,15 @@
 //! Function module contains typing and signature for built-in and user defined functions.
 
 use crate::ColumnarValue;
-use crate::{Accumulator, Expr, PartitionEvaluator};
-use arrow::datatypes::{DataType, Field, Schema};
-use datafusion_common::{DFSchema, Result};
+use crate::{Expr, PartitionEvaluator};
+use arrow::datatypes::DataType;
+use datafusion_common::Result;
 use std::sync::Arc;
 
+pub use datafusion_functions_aggregate_common::accumulator::{
+    AccumulatorArgs, AccumulatorFactoryFunction, StateFieldsArgs,
+};
+
 #[derive(Debug, Clone, Copy)]
 pub enum Hint {
     /// Indicates the argument needs to be padded if it is scalar
@@ -46,86 +50,6 @@ pub type ScalarFunctionImplementation =
 pub type ReturnTypeFunction =
     Arc<dyn Fn(&[DataType]) -> Result<Arc<DataType>> + Send + Sync>;
 
-/// [`AccumulatorArgs`] contains information about how an aggregate
-/// function was called, including the types of its arguments and any optional
-/// ordering expressions.
-#[derive(Debug)]
-pub struct AccumulatorArgs<'a> {
-    /// The return type of the aggregate function.
-    pub data_type: &'a DataType,
-
-    /// The schema of the input arguments
-    pub schema: &'a Schema,
-
-    /// The schema of the input arguments
-    pub dfschema: &'a DFSchema,
-
-    /// Whether to ignore nulls.
-    ///
-    /// SQL allows the user to specify `IGNORE NULLS`, for example:
-    ///
-    /// ```sql
-    /// SELECT FIRST_VALUE(column1) IGNORE NULLS FROM t;
-    /// ```
-    pub ignore_nulls: bool,
-
-    /// The expressions in the `ORDER BY` clause passed to this aggregator.
-    ///
-    /// SQL allows the user to specify the ordering of arguments to the
-    /// aggregate using an `ORDER BY`. For example:
-    ///
-    /// ```sql
-    /// SELECT FIRST_VALUE(column1 ORDER BY column2) FROM t;
-    /// ```
-    ///
-    /// If no `ORDER BY` is specified, `sort_exprs`` will be empty.
-    pub sort_exprs: &'a [Expr],
-
-    /// Whether the aggregation is running in reverse order
-    pub is_reversed: bool,
-
-    /// The name of the aggregate expression
-    pub name: &'a str,
-
-    /// Whether the aggregate function is distinct.
-    ///
-    /// ```sql
-    /// SELECT COUNT(DISTINCT column1) FROM t;
-    /// ```
-    pub is_distinct: bool,
-
-    /// The input types of the aggregate function.
-    pub input_types: &'a [DataType],
-
-    /// The logical expression of arguments the aggregate function takes.
-    pub input_exprs: &'a [Expr],
-}
-
-/// [`StateFieldsArgs`] contains information about the fields that an
-/// aggregate function's accumulator should have. Used for [`AggregateUDFImpl::state_fields`].
-///
-/// [`AggregateUDFImpl::state_fields`]: crate::udaf::AggregateUDFImpl::state_fields
-pub struct StateFieldsArgs<'a> {
-    /// The name of the aggregate function.
-    pub name: &'a str,
-
-    /// The input types of the aggregate function.
-    pub input_types: &'a [DataType],
-
-    /// The return type of the aggregate function.
-    pub return_type: &'a DataType,
-
-    /// The ordering fields of the aggregate function.
-    pub ordering_fields: &'a [Field],
-
-    /// Whether the aggregate function is distinct.
-    pub is_distinct: bool,
-}
-
-/// Factory that returns an accumulator for the given aggregate function.
-pub type AccumulatorFactoryFunction =
-    Arc<dyn Fn(AccumulatorArgs) -> Result<Box<dyn Accumulator>> + Send + Sync>;
-
 /// Factory that creates a PartitionEvaluator for the given window
 /// function
 pub type PartitionEvaluatorFactory =
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index f5460918fa707..260065f69af98 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -27,13 +27,10 @@
 //!
 //! The [expr_fn] module contains functions for creating expressions.
 
-mod accumulator;
 mod built_in_window_function;
-mod columnar_value;
 mod literal;
-mod operator;
+mod operation;
 mod partition_evaluator;
-mod signature;
 mod table_source;
 mod udaf;
 mod udf;
@@ -46,13 +43,20 @@ pub mod expr_fn;
 pub mod expr_rewriter;
 pub mod expr_schema;
 pub mod function;
-pub mod groups_accumulator;
-pub mod interval_arithmetic;
+pub mod groups_accumulator {
+    pub use datafusion_expr_common::groups_accumulator::*;
+}
+
+pub mod interval_arithmetic {
+    pub use datafusion_expr_common::interval_arithmetic::*;
+}
 pub mod logical_plan;
 pub mod planner;
 pub mod registry;
 pub mod simplify;
-pub mod sort_properties;
+pub mod sort_properties {
+    pub use datafusion_expr_common::sort_properties::*;
+}
 pub mod test;
 pub mod tree_node;
 pub mod type_coercion;
@@ -62,9 +66,15 @@ pub mod window_frame;
 pub mod window_function;
 pub mod window_state;
 
-pub use accumulator::Accumulator;
 pub use built_in_window_function::BuiltInWindowFunction;
-pub use columnar_value::ColumnarValue;
+pub use datafusion_expr_common::accumulator::Accumulator;
+pub use datafusion_expr_common::columnar_value::ColumnarValue;
+pub use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
+pub use datafusion_expr_common::operator::Operator;
+pub use datafusion_expr_common::signature::{
+    ArrayFunctionSignature, Signature, TypeSignature, Volatility, TIMEZONE_WILDCARD,
+};
+pub use datafusion_expr_common::type_coercion::binary;
 pub use expr::{
     Between, BinaryExpr, Case, Cast, Expr, GetFieldAccess, GroupingSet, Like,
     Sort as SortExpr, TryCast, WindowFunctionDefinition,
@@ -75,14 +85,9 @@ pub use function::{
     AccumulatorFactoryFunction, PartitionEvaluatorFactory, ReturnTypeFunction,
     ScalarFunctionImplementation, StateTypeFunction,
 };
-pub use groups_accumulator::{EmitTo, GroupsAccumulator};
 pub use literal::{lit, lit_timestamp_nano, Literal, TimestampLiteral};
 pub use logical_plan::*;
-pub use operator::Operator;
 pub use partition_evaluator::PartitionEvaluator;
-pub use signature::{
-    ArrayFunctionSignature, Signature, TypeSignature, Volatility, TIMEZONE_WILDCARD,
-};
 pub use sqlparser;
 pub use table_source::{TableProviderFilterPushDown, TableSource, TableType};
 pub use udaf::{AggregateUDF, AggregateUDFImpl, ReversedUDAF};
diff --git a/datafusion/expr/src/operation.rs b/datafusion/expr/src/operation.rs
new file mode 100644
index 0000000000000..6b79a8248b293
--- /dev/null
+++ b/datafusion/expr/src/operation.rs
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module contains implementations of operations (unary, binary etc.) for DataFusion expressions.
+
+use crate::expr_fn::binary_expr;
+use crate::{Expr, Like};
+use datafusion_expr_common::operator::Operator;
+use std::ops::{self, Not};
+
+/// Support `<expr> + <expr>` fluent style
+impl ops::Add for Expr {
+    type Output = Self;
+
+    fn add(self, rhs: Self) -> Self {
+        binary_expr(self, Operator::Plus, rhs)
+    }
+}
+
+/// Support `<expr> - <expr>` fluent style
+impl ops::Sub for Expr {
+    type Output = Self;
+
+    fn sub(self, rhs: Self) -> Self {
+        binary_expr(self, Operator::Minus, rhs)
+    }
+}
+
+/// Support `<expr> * <expr>` fluent style
+impl ops::Mul for Expr {
+    type Output = Self;
+
+    fn mul(self, rhs: Self) -> Self {
+        binary_expr(self, Operator::Multiply, rhs)
+    }
+}
+
+/// Support `<expr> / <expr>` fluent style
+impl ops::Div for Expr {
+    type Output = Self;
+
+    fn div(self, rhs: Self) -> Self {
+        binary_expr(self, Operator::Divide, rhs)
+    }
+}
+
+/// Support `<expr> % <expr>` fluent style
+impl ops::Rem for Expr {
+    type Output = Self;
+
+    fn rem(self, rhs: Self) -> Self {
+        binary_expr(self, Operator::Modulo, rhs)
+    }
+}
+
+/// Support `<expr> & <expr>` fluent style
+impl ops::BitAnd for Expr {
+    type Output = Self;
+
+    fn bitand(self, rhs: Self) -> Self {
+        binary_expr(self, Operator::BitwiseAnd, rhs)
+    }
+}
+
+/// Support `<expr> | <expr>` fluent style
+impl ops::BitOr for Expr {
+    type Output = Self;
+
+    fn bitor(self, rhs: Self) -> Self {
+        binary_expr(self, Operator::BitwiseOr, rhs)
+    }
+}
+
+/// Support `<expr> ^ <expr>` fluent style
+impl ops::BitXor for Expr {
+    type Output = Self;
+
+    fn bitxor(self, rhs: Self) -> Self {
+        binary_expr(self, Operator::BitwiseXor, rhs)
+    }
+}
+
+/// Support `<expr> << <expr>` fluent style
+impl ops::Shl for Expr {
+    type Output = Self;
+
+    fn shl(self, rhs: Self) -> Self::Output {
+        binary_expr(self, Operator::BitwiseShiftLeft, rhs)
+    }
+}
+
+/// Support `<expr> >> <expr>` fluent style
+impl ops::Shr for Expr {
+    type Output = Self;
+
+    fn shr(self, rhs: Self) -> Self::Output {
+        binary_expr(self, Operator::BitwiseShiftRight, rhs)
+    }
+}
+
+/// Support `- <expr>` fluent style
+impl ops::Neg for Expr {
+    type Output = Self;
+
+    fn neg(self) -> Self::Output {
+        Expr::Negative(Box::new(self))
+    }
+}
+
+/// Support `NOT <expr>` fluent style
+impl Not for Expr {
+    type Output = Self;
+
+    fn not(self) -> Self::Output {
+        match self {
+            Expr::Like(Like {
+                negated,
+                expr,
+                pattern,
+                escape_char,
+                case_insensitive,
+            }) => Expr::Like(Like::new(
+                !negated,
+                expr,
+                pattern,
+                escape_char,
+                case_insensitive,
+            )),
+            Expr::SimilarTo(Like {
+                negated,
+                expr,
+                pattern,
+                escape_char,
+                case_insensitive,
+            }) => Expr::SimilarTo(Like::new(
+                !negated,
+                expr,
+                pattern,
+                escape_char,
+                case_insensitive,
+            )),
+            _ => Expr::Not(Box::new(self)),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::lit;
+
+    #[test]
+    fn test_operators() {
+        // Add
+        assert_eq!(
+            format!("{}", lit(1u32) + lit(2u32)),
+            "UInt32(1) + UInt32(2)"
+        );
+        // Sub
+        assert_eq!(
+            format!("{}", lit(1u32) - lit(2u32)),
+            "UInt32(1) - UInt32(2)"
+        );
+        // Mul
+        assert_eq!(
+            format!("{}", lit(1u32) * lit(2u32)),
+            "UInt32(1) * UInt32(2)"
+        );
+        // Div
+        assert_eq!(
+            format!("{}", lit(1u32) / lit(2u32)),
+            "UInt32(1) / UInt32(2)"
+        );
+        // Rem
+        assert_eq!(
+            format!("{}", lit(1u32) % lit(2u32)),
+            "UInt32(1) % UInt32(2)"
+        );
+        // BitAnd
+        assert_eq!(
+            format!("{}", lit(1u32) & lit(2u32)),
+            "UInt32(1) & UInt32(2)"
+        );
+        // BitOr
+        assert_eq!(
+            format!("{}", lit(1u32) | lit(2u32)),
+            "UInt32(1) | UInt32(2)"
+        );
+        // BitXor
+        assert_eq!(
+            format!("{}", lit(1u32) ^ lit(2u32)),
+            "UInt32(1) BIT_XOR UInt32(2)"
+        );
+        // Shl
+        assert_eq!(
+            format!("{}", lit(1u32) << lit(2u32)),
+            "UInt32(1) << UInt32(2)"
+        );
+        // Shr
+        assert_eq!(
+            format!("{}", lit(1u32) >> lit(2u32)),
+            "UInt32(1) >> UInt32(2)"
+        );
+        // Neg
+        assert_eq!(format!("{}", -lit(1u32)), "(- UInt32(1))");
+        // Not
+        assert_eq!(format!("{}", !lit(1u32)), "NOT UInt32(1)");
+    }
+}
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index 4f2776516d3e4..190374b01dd24 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -17,9 +17,6 @@
 
 use std::sync::Arc;
 
-use crate::signature::{
-    ArrayFunctionSignature, FIXED_SIZE_LIST_WILDCARD, TIMEZONE_WILDCARD,
-};
 use crate::{AggregateUDF, ScalarUDF, Signature, TypeSignature};
 use arrow::{
     compute::can_cast_types,
@@ -29,6 +26,9 @@ use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims};
 use datafusion_common::{
     exec_err, internal_datafusion_err, internal_err, plan_err, Result,
 };
+use datafusion_expr_common::signature::{
+    ArrayFunctionSignature, FIXED_SIZE_LIST_WILDCARD, TIMEZONE_WILDCARD,
+};
 
 use super::binary::{binary_numeric_coercion, comparison_coercion};
 
diff --git a/datafusion/expr/src/type_coercion/mod.rs b/datafusion/expr/src/type_coercion/mod.rs
index e0d1236aac2d6..3a5c65fb46ee4 100644
--- a/datafusion/expr/src/type_coercion/mod.rs
+++ b/datafusion/expr/src/type_coercion/mod.rs
@@ -31,11 +31,14 @@
 //! i64. However, i64 -> i32 is never performed as there are i64
 //! values which can not be represented by i32 values.
 
-pub mod aggregates;
-pub mod binary;
+pub mod aggregates {
+    pub use datafusion_expr_common::type_coercion::aggregates::*;
+}
 pub mod functions;
 pub mod other;
 
+pub use datafusion_expr_common::type_coercion::binary;
+
 use arrow::datatypes::DataType;
 /// Determine whether the given data type `dt` represents signed numeric values.
 pub fn is_signed_numeric(dt: &DataType) -> bool {
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index 3a292b2b49bfb..d136aeaf09087 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -422,7 +422,7 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
     ///
     /// See [retract_batch] for more details.
     ///
-    /// [retract_batch]: crate::accumulator::Accumulator::retract_batch
+    /// [retract_batch]: datafusion_expr_common::accumulator::Accumulator::retract_batch
     fn create_sliding_accumulator(
         &self,
         args: AccumulatorArgs,
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 34b5909f0a5a3..f5434726e23d7 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -17,22 +17,19 @@
 
 //! [`ScalarUDF`]: Scalar User Defined Functions
 
-use std::any::Any;
-use std::fmt::{self, Debug, Formatter};
-use std::hash::{DefaultHasher, Hash, Hasher};
-use std::sync::Arc;
-
-use arrow::datatypes::DataType;
-
-use datafusion_common::{not_impl_err, ExprSchema, Result};
-
 use crate::expr::schema_name_from_exprs_comma_seperated_without_space;
-use crate::interval_arithmetic::Interval;
 use crate::simplify::{ExprSimplifyResult, SimplifyInfo};
 use crate::sort_properties::{ExprProperties, SortProperties};
 use crate::{
     ColumnarValue, Expr, ReturnTypeFunction, ScalarFunctionImplementation, Signature,
 };
+use arrow::datatypes::DataType;
+use datafusion_common::{not_impl_err, ExprSchema, Result};
+use datafusion_expr_common::interval_arithmetic::Interval;
+use std::any::Any;
+use std::fmt::{self, Debug, Formatter};
+use std::hash::{DefaultHasher, Hash, Hasher};
+use std::sync::Arc;
 
 /// Logical representation of a Scalar User Defined Function.
 ///
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index c3e4505ed19c7..7b650d1ab448b 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -23,10 +23,10 @@ use std::sync::Arc;
 
 use crate::expr::{Alias, Sort, WindowFunction};
 use crate::expr_rewriter::strip_outer_reference;
-use crate::signature::{Signature, TypeSignature};
 use crate::{
     and, BinaryExpr, Expr, ExprSchemable, Filter, GroupingSet, LogicalPlan, Operator,
 };
+use datafusion_expr_common::signature::{Signature, TypeSignature};
 
 use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
 use datafusion_common::tree_node::{
@@ -40,6 +40,8 @@ use datafusion_common::{
 
 use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem, WildcardAdditionalOptions};
 
+pub use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity;
+
 ///  The value to which `COUNT(*)` is expanded to in
 ///  `COUNT(<constant>)` expressions
 pub use datafusion_common::utils::expr::COUNT_STAR_EXPANSION;
@@ -1219,37 +1221,6 @@ pub fn format_state_name(name: &str, state_name: &str) -> String {
     format!("{name}[{state_name}]")
 }
 
-/// Represents the sensitivity of an aggregate expression to ordering.
-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
-pub enum AggregateOrderSensitivity {
-    /// Indicates that the aggregate expression is insensitive to ordering.
-    /// Ordering at the input is not important for the result of the aggregator.
-    Insensitive,
-    /// Indicates that the aggregate expression has a hard requirement on ordering.
-    /// The aggregator can not produce a correct result unless its ordering
-    /// requirement is satisfied.
-    HardRequirement,
-    /// Indicates that ordering is beneficial for the aggregate expression in terms
-    /// of evaluation efficiency. The aggregator can produce its result efficiently
-    /// when its required ordering is satisfied; however, it can still produce the
-    /// correct result (albeit less efficiently) when its required ordering is not met.
-    Beneficial,
-}
-
-impl AggregateOrderSensitivity {
-    pub fn is_insensitive(&self) -> bool {
-        self.eq(&AggregateOrderSensitivity::Insensitive)
-    }
-
-    pub fn is_beneficial(&self) -> bool {
-        self.eq(&AggregateOrderSensitivity::Beneficial)
-    }
-
-    pub fn hard_requires(&self) -> bool {
-        self.eq(&AggregateOrderSensitivity::HardRequirement)
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/functions-aggregate-common/Cargo.toml b/datafusion/functions-aggregate-common/Cargo.toml
new file mode 100644
index 0000000000000..a8296ce11f30d
--- /dev/null
+++ b/datafusion/functions-aggregate-common/Cargo.toml
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-functions-aggregate-common"
+description = "Utility functions for implementing aggregate functions for the DataFusion query engine"
+keywords = ["datafusion", "logical", "plan", "expressions"]
+readme = "README.md"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+rust-version = { workspace = true }
+
+[lints]
+workspace = true
+
+[lib]
+name = "datafusion_functions_aggregate_common"
+path = "src/lib.rs"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+ahash = { workspace = true }
+arrow = { workspace = true }
+datafusion-common = { workspace = true }
+datafusion-expr-common = { workspace = true }
+datafusion-physical-expr-common = { workspace = true }
+rand = { workspace = true }
diff --git a/datafusion/functions-aggregate-common/src/accumulator.rs b/datafusion/functions-aggregate-common/src/accumulator.rs
new file mode 100644
index 0000000000000..ddf0085b9de4c
--- /dev/null
+++ b/datafusion/functions-aggregate-common/src/accumulator.rs
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, Schema};
+use datafusion_common::Result;
+use datafusion_expr_common::accumulator::Accumulator;
+use datafusion_physical_expr_common::{
+    physical_expr::PhysicalExpr, sort_expr::PhysicalSortExpr,
+};
+use std::sync::Arc;
+
+/// [`AccumulatorArgs`] contains information about how an aggregate
+/// function was called, including the types of its arguments and any optional
+/// ordering expressions.
+#[derive(Debug)]
+pub struct AccumulatorArgs<'a> {
+    /// The return type of the aggregate function.
+    pub return_type: &'a DataType,
+
+    /// The schema of the input arguments
+    pub schema: &'a Schema,
+
+    /// Whether to ignore nulls.
+    ///
+    /// SQL allows the user to specify `IGNORE NULLS`, for example:
+    ///
+    /// ```sql
+    /// SELECT FIRST_VALUE(column1) IGNORE NULLS FROM t;
+    /// ```
+    pub ignore_nulls: bool,
+
+    /// The expressions in the `ORDER BY` clause passed to this aggregator.
+    ///
+    /// SQL allows the user to specify the ordering of arguments to the
+    /// aggregate using an `ORDER BY`. For example:
+    ///
+    /// ```sql
+    /// SELECT FIRST_VALUE(column1 ORDER BY column2) FROM t;
+    /// ```
+    ///
+    /// If no `ORDER BY` is specified, `ordering_req` will be empty.
+    pub ordering_req: &'a [PhysicalSortExpr],
+
+    /// Whether the aggregation is running in reverse order
+    pub is_reversed: bool,
+
+    /// The name of the aggregate expression
+    pub name: &'a str,
+
+    /// Whether the aggregate function is distinct.
+    ///
+    /// ```sql
+    /// SELECT COUNT(DISTINCT column1) FROM t;
+    /// ```
+    pub is_distinct: bool,
+
+    /// The physical expression of arguments the aggregate function takes.
+    pub exprs: &'a [Arc<dyn PhysicalExpr>],
+}
+
+/// Factory that returns an accumulator for the given aggregate function.
+pub type AccumulatorFactoryFunction =
+    Arc<dyn Fn(AccumulatorArgs) -> Result<Box<dyn Accumulator>> + Send + Sync>;
+
+/// [`StateFieldsArgs`] contains information about the fields that an
+/// aggregate function's accumulator should have. Used for `AggregateUDFImpl::state_fields`.
+pub struct StateFieldsArgs<'a> {
+    /// The name of the aggregate function.
+    pub name: &'a str,
+
+    /// The input types of the aggregate function.
+    pub input_types: &'a [DataType],
+
+    /// The return type of the aggregate function.
+    pub return_type: &'a DataType,
+
+    /// The ordering fields of the aggregate function.
+    pub ordering_fields: &'a [Field],
+
+    /// Whether the aggregate function is distinct.
+    pub is_distinct: bool,
+}
diff --git a/datafusion/functions-aggregate-common/src/aggregate.rs b/datafusion/functions-aggregate-common/src/aggregate.rs
new file mode 100644
index 0000000000000..016e54e688357
--- /dev/null
+++ b/datafusion/functions-aggregate-common/src/aggregate.rs
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`AggregateExpr`] which defines the interface all aggregate expressions
+//! (built-in and custom) need to satisfy.
+
+use crate::order::AggregateOrderSensitivity;
+use arrow::datatypes::Field;
+use datafusion_common::exec_err;
+use datafusion_common::{not_impl_err, Result};
+use datafusion_expr_common::accumulator::Accumulator;
+use datafusion_expr_common::groups_accumulator::GroupsAccumulator;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+use std::fmt::Debug;
+use std::{any::Any, sync::Arc};
+
+pub mod count_distinct;
+pub mod groups_accumulator;
+
+/// An aggregate expression that:
+/// * knows its resulting field
+/// * knows how to create its accumulator
+/// * knows its accumulator's state's field
+/// * knows the expressions from whose its accumulator will receive values
+///
+/// Any implementation of this trait also needs to implement the
+/// `PartialEq<dyn Any>` to allows comparing equality between the
+/// trait objects.
+pub trait AggregateExpr: Send + Sync + Debug + PartialEq<dyn Any> {
+    /// Returns the aggregate expression as [`Any`] so that it can be
+    /// downcast to a specific implementation.
+    fn as_any(&self) -> &dyn Any;
+
+    /// the field of the final result of this aggregation.
+    fn field(&self) -> Result<Field>;
+
+    /// the accumulator used to accumulate values from the expressions.
+    /// the accumulator expects the same number of arguments as `expressions` and must
+    /// return states with the same description as `state_fields`
+    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>>;
+
+    /// the fields that encapsulate the Accumulator's state
+    /// the number of fields here equals the number of states that the accumulator contains
+    fn state_fields(&self) -> Result<Vec<Field>>;
+
+    /// expressions that are passed to the Accumulator.
+    /// Single-column aggregations such as `sum` return a single value, others (e.g. `cov`) return many.
+    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>>;
+
+    /// Order by requirements for the aggregate function
+    /// By default it is `None` (there is no requirement)
+    /// Order-sensitive aggregators, such as `FIRST_VALUE(x ORDER BY y)` should implement this
+    fn order_bys(&self) -> Option<&[PhysicalSortExpr]> {
+        None
+    }
+
+    /// Indicates whether aggregator can produce the correct result with any
+    /// arbitrary input ordering. By default, we assume that aggregate expressions
+    /// are order insensitive.
+    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
+        AggregateOrderSensitivity::Insensitive
+    }
+
+    /// Sets the indicator whether ordering requirements of the aggregator is
+    /// satisfied by its input. If this is not the case, aggregators with order
+    /// sensitivity `AggregateOrderSensitivity::Beneficial` can still produce
+    /// the correct result with possibly more work internally.
+    ///
+    /// # Returns
+    ///
+    /// Returns `Ok(Some(updated_expr))` if the process completes successfully.
+    /// If the expression can benefit from existing input ordering, but does
+    /// not implement the method, returns an error. Order insensitive and hard
+    /// requirement aggregators return `Ok(None)`.
+    fn with_beneficial_ordering(
+        self: Arc<Self>,
+        _requirement_satisfied: bool,
+    ) -> Result<Option<Arc<dyn AggregateExpr>>> {
+        if self.order_bys().is_some() && self.order_sensitivity().is_beneficial() {
+            return exec_err!(
+                "Should implement with satisfied for aggregator :{:?}",
+                self.name()
+            );
+        }
+        Ok(None)
+    }
+
+    /// Human readable name such as `"MIN(c2)"`. The default
+    /// implementation returns placeholder text.
+    fn name(&self) -> &str {
+        "AggregateExpr: default name"
+    }
+
+    /// If the aggregate expression has a specialized
+    /// [`GroupsAccumulator`] implementation. If this returns true,
+    /// `[Self::create_groups_accumulator`] will be called.
+    fn groups_accumulator_supported(&self) -> bool {
+        false
+    }
+
+    /// Return a specialized [`GroupsAccumulator`] that manages state
+    /// for all groups.
+    ///
+    /// For maximum performance, a [`GroupsAccumulator`] should be
+    /// implemented in addition to [`Accumulator`].
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        not_impl_err!("GroupsAccumulator hasn't been implemented for {self:?} yet")
+    }
+
+    /// Construct an expression that calculates the aggregate in reverse.
+    /// Typically the "reverse" expression is itself (e.g. SUM, COUNT).
+    /// For aggregates that do not support calculation in reverse,
+    /// returns None (which is the default value).
+    fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
+        None
+    }
+
+    /// Creates accumulator implementation that supports retract
+    fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
+        not_impl_err!("Retractable Accumulator hasn't been implemented for {self:?} yet")
+    }
+
+    /// Returns all expressions used in the [`AggregateExpr`].
+    /// These expressions are  (1)function arguments, (2) order by expressions.
+    fn all_expressions(&self) -> AggregatePhysicalExpressions {
+        let args = self.expressions();
+        let order_bys = self.order_bys().unwrap_or(&[]);
+        let order_by_exprs = order_bys
+            .iter()
+            .map(|sort_expr| Arc::clone(&sort_expr.expr))
+            .collect::<Vec<_>>();
+        AggregatePhysicalExpressions {
+            args,
+            order_by_exprs,
+        }
+    }
+
+    /// Rewrites [`AggregateExpr`], with new expressions given. The argument should be consistent
+    /// with the return value of the [`AggregateExpr::all_expressions`] method.
+    /// Returns `Some(Arc<dyn AggregateExpr>)` if re-write is supported, otherwise returns `None`.
+    fn with_new_expressions(
+        &self,
+        _args: Vec<Arc<dyn PhysicalExpr>>,
+        _order_by_exprs: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Option<Arc<dyn AggregateExpr>> {
+        None
+    }
+
+    /// If this function is max, return (output_field, true)
+    /// if the function is min, return (output_field, false)
+    /// otherwise return None (the default)
+    ///
+    /// output_field is the name of the column produced by this aggregate
+    ///
+    /// Note: this is used to use special aggregate implementations in certain conditions
+    fn get_minmax_desc(&self) -> Option<(Field, bool)> {
+        None
+    }
+}
+
+/// Stores the physical expressions used inside the `AggregateExpr`.
+pub struct AggregatePhysicalExpressions {
+    /// Aggregate function arguments
+    pub args: Vec<Arc<dyn PhysicalExpr>>,
+    /// Order by expressions
+    pub order_by_exprs: Vec<Arc<dyn PhysicalExpr>>,
+}
diff --git a/datafusion/physical-expr-common/src/aggregate/count_distinct/mod.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct.rs
similarity index 100%
rename from datafusion/physical-expr-common/src/aggregate/count_distinct/mod.rs
rename to datafusion/functions-aggregate-common/src/aggregate/count_distinct.rs
diff --git a/datafusion/physical-expr-common/src/aggregate/count_distinct/bytes.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs
similarity index 95%
rename from datafusion/physical-expr-common/src/aggregate/count_distinct/bytes.rs
rename to datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs
index 360d64ce01419..ee61128979e10 100644
--- a/datafusion/physical-expr-common/src/aggregate/count_distinct/bytes.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs
@@ -17,13 +17,13 @@
 
 //! [`BytesDistinctCountAccumulator`] for Utf8/LargeUtf8/Binary/LargeBinary values
 
-use crate::binary_map::{ArrowBytesSet, OutputType};
-use crate::binary_view_map::ArrowBytesViewSet;
 use arrow::array::{ArrayRef, OffsetSizeTrait};
 use datafusion_common::cast::as_list_array;
 use datafusion_common::utils::array_into_list_array_nullable;
 use datafusion_common::ScalarValue;
-use datafusion_expr::Accumulator;
+use datafusion_expr_common::accumulator::Accumulator;
+use datafusion_physical_expr_common::binary_map::{ArrowBytesSet, OutputType};
+use datafusion_physical_expr_common::binary_view_map::ArrowBytesViewSet;
 use std::fmt::Debug;
 use std::sync::Arc;
 
diff --git a/datafusion/physical-expr-common/src/aggregate/count_distinct/native.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs
similarity index 98%
rename from datafusion/physical-expr-common/src/aggregate/count_distinct/native.rs
rename to datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs
index e525118b9a177..d128a8af58eef 100644
--- a/datafusion/physical-expr-common/src/aggregate/count_distinct/native.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs
@@ -35,9 +35,9 @@ use datafusion_common::cast::{as_list_array, as_primitive_array};
 use datafusion_common::utils::array_into_list_array_nullable;
 use datafusion_common::utils::memory::estimate_memory_size;
 use datafusion_common::ScalarValue;
-use datafusion_expr::Accumulator;
+use datafusion_expr_common::accumulator::Accumulator;
 
-use crate::aggregate::utils::Hashable;
+use crate::utils::Hashable;
 
 #[derive(Debug)]
 pub struct PrimitiveDistinctCountAccumulator<T>
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
similarity index 97%
rename from datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
rename to datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
index 592c130b69d89..644221edd04db 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
@@ -15,19 +15,24 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Utilities for implementing GroupsAccumulator
 //! Adapter that makes [`GroupsAccumulator`] out of [`Accumulator`]
 
+pub mod accumulate;
+pub mod bool_op;
+pub mod prim_op;
+
 use arrow::{
-    array::{AsArray, UInt32Builder},
+    array::{ArrayRef, AsArray, BooleanArray, PrimitiveArray, UInt32Builder},
     compute,
     datatypes::UInt32Type,
 };
-use arrow_array::{ArrayRef, BooleanArray, PrimitiveArray};
 use datafusion_common::{
     arrow_datafusion_err, utils::get_arrayref_at_indices, DataFusionError, Result,
     ScalarValue,
 };
-use datafusion_expr::{Accumulator, EmitTo, GroupsAccumulator};
+use datafusion_expr_common::accumulator::Accumulator;
+use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
 
 /// An adapter that implements [`GroupsAccumulator`] for any [`Accumulator`]
 ///
diff --git a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
similarity index 99%
rename from datafusion/physical-expr-common/src/aggregate/groups_accumulator/accumulate.rs
rename to datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
index 3fcd570f514e5..455fc5fec450d 100644
--- a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
@@ -17,13 +17,13 @@
 
 //! [`GroupsAccumulator`] helpers: [`NullState`] and [`accumulate_indices`]
 //!
-//! [`GroupsAccumulator`]: datafusion_expr::GroupsAccumulator
+//! [`GroupsAccumulator`]: datafusion_expr_common::groups_accumulator::GroupsAccumulator
 
 use arrow::array::{Array, BooleanArray, BooleanBufferBuilder, PrimitiveArray};
 use arrow::buffer::{BooleanBuffer, NullBuffer};
 use arrow::datatypes::ArrowPrimitiveType;
 
-use datafusion_expr::EmitTo;
+use datafusion_expr_common::groups_accumulator::EmitTo;
 /// Track the accumulator null state per row: if any values for that
 /// group were null and if any values have been seen at all for that group.
 ///
@@ -48,7 +48,7 @@ use datafusion_expr::EmitTo;
 /// had at least one value to accumulate so they do not need to track
 /// if they have seen values for a particular group.
 ///
-/// [`GroupsAccumulator`]: datafusion_expr::GroupsAccumulator
+/// [`GroupsAccumulator`]: datafusion_expr_common::groups_accumulator::GroupsAccumulator
 #[derive(Debug)]
 pub struct NullState {
     /// Have we seen any non-filtered input values for `group_index`?
diff --git a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/bool_op.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs
similarity index 98%
rename from datafusion/physical-expr-common/src/aggregate/groups_accumulator/bool_op.rs
rename to datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs
index 8498d69dd333f..be2b5e48a8db9 100644
--- a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/bool_op.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs
@@ -20,7 +20,7 @@ use std::sync::Arc;
 use arrow::array::{ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder};
 use arrow::buffer::BooleanBuffer;
 use datafusion_common::Result;
-use datafusion_expr::{EmitTo, GroupsAccumulator};
+use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
 
 use super::accumulate::NullState;
 
diff --git a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
similarity index 98%
rename from datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs
rename to datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
index acf1ae525c797..b5c6171af37ca 100644
--- a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/prim_op.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
@@ -23,7 +23,7 @@ use arrow::compute;
 use arrow::datatypes::ArrowPrimitiveType;
 use arrow::datatypes::DataType;
 use datafusion_common::{internal_datafusion_err, DataFusionError, Result};
-use datafusion_expr::{EmitTo, GroupsAccumulator};
+use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
 
 use super::accumulate::NullState;
 
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs b/datafusion/functions-aggregate-common/src/lib.rs
similarity index 59%
rename from datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
rename to datafusion/functions-aggregate-common/src/lib.rs
index 3c0f3a28fedbc..cc50ff70913b0 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
+++ b/datafusion/functions-aggregate-common/src/lib.rs
@@ -15,13 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
-mod adapter;
-pub use adapter::GroupsAccumulatorAdapter;
+//! Common Aggregate functionality for [DataFusion]
+//!
+//! This crate contains traits and utilities commonly used to implement aggregate functions
+//! They are kept in their own crate to avoid physical expressions depending on logical expressions.
+//!
+//! [DataFusion]: <https://crates.io/crates/datafusion>
 
-// Backward compatibility
-#[allow(unused_imports)]
-pub(crate) mod accumulate {
-    pub use datafusion_physical_expr_common::aggregate::groups_accumulator::accumulate::NullState;
-}
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
 
-pub use datafusion_physical_expr_common::aggregate::groups_accumulator::accumulate::NullState;
+pub mod accumulator;
+pub mod aggregate;
+pub mod merge_arrays;
+pub mod order;
+pub mod stats;
+pub mod tdigest;
+pub mod utils;
diff --git a/datafusion/physical-expr-common/src/aggregate/merge_arrays.rs b/datafusion/functions-aggregate-common/src/merge_arrays.rs
similarity index 100%
rename from datafusion/physical-expr-common/src/aggregate/merge_arrays.rs
rename to datafusion/functions-aggregate-common/src/merge_arrays.rs
diff --git a/datafusion/functions-aggregate-common/src/order.rs b/datafusion/functions-aggregate-common/src/order.rs
new file mode 100644
index 0000000000000..bfa6e39138f9e
--- /dev/null
+++ b/datafusion/functions-aggregate-common/src/order.rs
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Represents the sensitivity of an aggregate expression to ordering.
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+pub enum AggregateOrderSensitivity {
+    /// Indicates that the aggregate expression is insensitive to ordering.
+    /// Ordering at the input is not important for the result of the aggregator.
+    Insensitive,
+    /// Indicates that the aggregate expression has a hard requirement on ordering.
+    /// The aggregator can not produce a correct result unless its ordering
+    /// requirement is satisfied.
+    HardRequirement,
+    /// Indicates that ordering is beneficial for the aggregate expression in terms
+    /// of evaluation efficiency. The aggregator can produce its result efficiently
+    /// when its required ordering is satisfied; however, it can still produce the
+    /// correct result (albeit less efficiently) when its required ordering is not met.
+    Beneficial,
+}
+
+impl AggregateOrderSensitivity {
+    pub fn is_insensitive(&self) -> bool {
+        self.eq(&AggregateOrderSensitivity::Insensitive)
+    }
+
+    pub fn is_beneficial(&self) -> bool {
+        self.eq(&AggregateOrderSensitivity::Beneficial)
+    }
+
+    pub fn hard_requires(&self) -> bool {
+        self.eq(&AggregateOrderSensitivity::HardRequirement)
+    }
+}
diff --git a/datafusion/physical-expr-common/src/aggregate/stats.rs b/datafusion/functions-aggregate-common/src/stats.rs
similarity index 100%
rename from datafusion/physical-expr-common/src/aggregate/stats.rs
rename to datafusion/functions-aggregate-common/src/stats.rs
diff --git a/datafusion/physical-expr-common/src/aggregate/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs
similarity index 100%
rename from datafusion/physical-expr-common/src/aggregate/tdigest.rs
rename to datafusion/functions-aggregate-common/src/tdigest.rs
diff --git a/datafusion/physical-expr-common/src/aggregate/utils.rs b/datafusion/functions-aggregate-common/src/utils.rs
similarity index 98%
rename from datafusion/physical-expr-common/src/aggregate/utils.rs
rename to datafusion/functions-aggregate-common/src/utils.rs
index 9e380bd820ff4..7b8ce0397af83 100644
--- a/datafusion/physical-expr-common/src/aggregate/utils.rs
+++ b/datafusion/functions-aggregate-common/src/utils.rs
@@ -29,11 +29,10 @@ use arrow::{
     },
 };
 use datafusion_common::{exec_err, DataFusionError, Result};
-use datafusion_expr::Accumulator;
+use datafusion_expr_common::accumulator::Accumulator;
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
-use crate::sort_expr::PhysicalSortExpr;
-
-use super::AggregateExpr;
+use crate::aggregate::AggregateExpr;
 
 /// Downcast a `Box<dyn AggregateExpr>` or `Arc<dyn AggregateExpr>`
 /// and return the inner trait object as [`Any`] so
diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml
index 4f2bd864832e3..636b2e42d236c 100644
--- a/datafusion/functions-aggregate/Cargo.toml
+++ b/datafusion/functions-aggregate/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "datafusion-functions-aggregate"
-description = "Aggregate function packages for the DataFusion query engine"
+description = "Traits and types for logical plans and expressions for DataFusion query engine"
 keywords = ["datafusion", "logical", "plan", "expressions"]
 readme = "README.md"
 version = { workspace = true }
@@ -44,6 +44,8 @@ arrow-schema = { workspace = true }
 datafusion-common = { workspace = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
+datafusion-functions-aggregate-common = { workspace = true }
+datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 log = { workspace = true }
 paste = "1.0.14"
diff --git a/datafusion/functions-aggregate/benches/count.rs b/datafusion/functions-aggregate/benches/count.rs
index 875112ca8d47d..65956cb8a1dea 100644
--- a/datafusion/functions-aggregate/benches/count.rs
+++ b/datafusion/functions-aggregate/benches/count.rs
@@ -20,25 +20,22 @@ use arrow::datatypes::Int32Type;
 use arrow::util::bench_util::{create_boolean_array, create_primitive_array};
 use arrow_schema::{DataType, Field, Schema};
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use datafusion_common::DFSchema;
 use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator};
 use datafusion_functions_aggregate::count::Count;
+use datafusion_physical_expr::expressions::col;
 use std::sync::Arc;
 
 fn prepare_accumulator() -> Box<dyn GroupsAccumulator> {
     let schema = Arc::new(Schema::new(vec![Field::new("f", DataType::Int32, true)]));
-    let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
     let accumulator_args = AccumulatorArgs {
-        data_type: &DataType::Int64,
+        return_type: &DataType::Int64,
         schema: &schema,
-        dfschema: &df_schema,
         ignore_nulls: false,
-        sort_exprs: &[],
+        ordering_req: &[],
         is_reversed: false,
         name: "COUNT(f)",
         is_distinct: false,
-        input_types: &[DataType::Int32],
-        input_exprs: &[datafusion_expr::col("f")],
+        exprs: &[col("f", &schema).unwrap()],
     };
     let count_fn = Count::new();
 
diff --git a/datafusion/functions-aggregate/benches/sum.rs b/datafusion/functions-aggregate/benches/sum.rs
index dfaa93cdeff76..652d447129dc1 100644
--- a/datafusion/functions-aggregate/benches/sum.rs
+++ b/datafusion/functions-aggregate/benches/sum.rs
@@ -20,25 +20,22 @@ use arrow::datatypes::Int64Type;
 use arrow::util::bench_util::{create_boolean_array, create_primitive_array};
 use arrow_schema::{DataType, Field, Schema};
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use datafusion_common::DFSchema;
 use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator};
 use datafusion_functions_aggregate::sum::Sum;
+use datafusion_physical_expr::expressions::col;
 use std::sync::Arc;
 
 fn prepare_accumulator(data_type: &DataType) -> Box<dyn GroupsAccumulator> {
     let schema = Arc::new(Schema::new(vec![Field::new("f", data_type.clone(), true)]));
-    let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
     let accumulator_args = AccumulatorArgs {
-        data_type,
+        return_type: data_type,
         schema: &schema,
-        dfschema: &df_schema,
         ignore_nulls: false,
-        sort_exprs: &[],
+        ordering_req: &[],
         is_reversed: false,
         name: "SUM(f)",
         is_distinct: false,
-        input_types: &[data_type.clone()],
-        input_exprs: &[datafusion_expr::col("f")],
+        exprs: &[col("f", &schema).unwrap()],
     };
     let sum_fn = Sum::new();
 
diff --git a/datafusion/functions-aggregate/src/approx_distinct.rs b/datafusion/functions-aggregate/src/approx_distinct.rs
index 56ef32e7ebe07..cf8217fe981de 100644
--- a/datafusion/functions-aggregate/src/approx_distinct.rs
+++ b/datafusion/functions-aggregate/src/approx_distinct.rs
@@ -277,7 +277,9 @@ impl AggregateUDFImpl for ApproxDistinct {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let accumulator: Box<dyn Accumulator> = match &acc_args.input_types[0] {
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
+
+        let accumulator: Box<dyn Accumulator> = match data_type {
             // TODO u8, i8, u16, i16 shall really be done using bitmap, not HLL
             // TODO support for boolean (trivial case)
             // https://github.com/apache/datafusion/issues/1109
diff --git a/datafusion/functions-aggregate/src/approx_median.rs b/datafusion/functions-aggregate/src/approx_median.rs
index c386ad89f0fb7..7a7b12432544a 100644
--- a/datafusion/functions-aggregate/src/approx_median.rs
+++ b/datafusion/functions-aggregate/src/approx_median.rs
@@ -113,7 +113,7 @@ impl AggregateUDFImpl for ApproxMedian {
 
         Ok(Box::new(ApproxPercentileAccumulator::new(
             0.5_f64,
-            acc_args.input_types[0].clone(),
+            acc_args.exprs[0].data_type(acc_args.schema)?,
         )))
     }
 }
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index ffa623c13b0bf..89d827e868592 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -31,7 +31,7 @@ use arrow::{
 use arrow_schema::{Field, Schema};
 
 use datafusion_common::{
-    downcast_value, internal_err, not_impl_err, plan_err, DFSchema, DataFusionError,
+    downcast_value, internal_err, not_impl_err, plan_err, DataFusionError, Result,
     ScalarValue,
 };
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
@@ -41,10 +41,10 @@ use datafusion_expr::{
     Accumulator, AggregateUDFImpl, ColumnarValue, Expr, Signature, TypeSignature,
     Volatility,
 };
-use datafusion_physical_expr_common::aggregate::tdigest::{
+use datafusion_functions_aggregate_common::tdigest::{
     TDigest, TryIntoF64, DEFAULT_MAX_SIZE,
 };
-use datafusion_physical_expr_common::utils::limited_convert_logical_expr_to_physical_expr_with_dfschema;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
 create_func!(ApproxPercentileCont, approx_percentile_cont_udaf);
 
@@ -105,15 +105,16 @@ impl ApproxPercentileCont {
     pub(crate) fn create_accumulator(
         &self,
         args: AccumulatorArgs,
-    ) -> datafusion_common::Result<ApproxPercentileAccumulator> {
-        let percentile = validate_input_percentile_expr(&args.input_exprs[1])?;
-        let tdigest_max_size = if args.input_exprs.len() == 3 {
-            Some(validate_input_max_size_expr(&args.input_exprs[2])?)
+    ) -> Result<ApproxPercentileAccumulator> {
+        let percentile = validate_input_percentile_expr(&args.exprs[1])?;
+        let tdigest_max_size = if args.exprs.len() == 3 {
+            Some(validate_input_max_size_expr(&args.exprs[2])?)
         } else {
             None
         };
 
-        let accumulator: ApproxPercentileAccumulator = match &args.input_types[0] {
+        let data_type = args.exprs[0].data_type(args.schema)?;
+        let accumulator: ApproxPercentileAccumulator = match data_type {
             t @ (DataType::UInt8
             | DataType::UInt16
             | DataType::UInt32
@@ -142,31 +143,30 @@ impl ApproxPercentileCont {
     }
 }
 
-fn get_lit_value(expr: &Expr) -> datafusion_common::Result<ScalarValue> {
+fn get_scalar_value(expr: &Arc<dyn PhysicalExpr>) -> Result<ScalarValue> {
     let empty_schema = Arc::new(Schema::empty());
-    let empty_batch = RecordBatch::new_empty(Arc::clone(&empty_schema));
-    let dfschema = DFSchema::empty();
-    let expr =
-        limited_convert_logical_expr_to_physical_expr_with_dfschema(expr, &dfschema)?;
-    let result = expr.evaluate(&empty_batch)?;
-    match result {
-        ColumnarValue::Array(_) => Err(DataFusionError::Internal(format!(
-            "The expr {:?} can't be evaluated to scalar value",
-            expr
-        ))),
-        ColumnarValue::Scalar(scalar_value) => Ok(scalar_value),
+    let batch = RecordBatch::new_empty(Arc::clone(&empty_schema));
+    if let ColumnarValue::Scalar(s) = expr.evaluate(&batch)? {
+        Ok(s)
+    } else {
+        internal_err!("Didn't expect ColumnarValue::Array")
     }
 }
 
-fn validate_input_percentile_expr(expr: &Expr) -> datafusion_common::Result<f64> {
-    let lit = get_lit_value(expr)?;
-    let percentile = match &lit {
-        ScalarValue::Float32(Some(q)) => *q as f64,
-        ScalarValue::Float64(Some(q)) => *q,
-        got => return not_impl_err!(
-            "Percentile value for 'APPROX_PERCENTILE_CONT' must be Float32 or Float64 literal (got data type {})",
-            got.data_type()
-        )
+fn validate_input_percentile_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<f64> {
+    let percentile = match get_scalar_value(expr)? {
+        ScalarValue::Float32(Some(value)) => {
+            value as f64
+        }
+        ScalarValue::Float64(Some(value)) => {
+            value
+        }
+        sv => {
+            return not_impl_err!(
+                "Percentile value for 'APPROX_PERCENTILE_CONT' must be Float32 or Float64 literal (got data type {})",
+                sv.data_type()
+            )
+        }
     };
 
     // Ensure the percentile is between 0 and 1.
@@ -178,22 +178,24 @@ fn validate_input_percentile_expr(expr: &Expr) -> datafusion_common::Result<f64>
     Ok(percentile)
 }
 
-fn validate_input_max_size_expr(expr: &Expr) -> datafusion_common::Result<usize> {
-    let lit = get_lit_value(expr)?;
-    let max_size = match &lit {
-        ScalarValue::UInt8(Some(q)) => *q as usize,
-        ScalarValue::UInt16(Some(q)) => *q as usize,
-        ScalarValue::UInt32(Some(q)) => *q as usize,
-        ScalarValue::UInt64(Some(q)) => *q as usize,
-        ScalarValue::Int32(Some(q)) if *q > 0 => *q as usize,
-        ScalarValue::Int64(Some(q)) if *q > 0 => *q as usize,
-        ScalarValue::Int16(Some(q)) if *q > 0 => *q as usize,
-        ScalarValue::Int8(Some(q)) if *q > 0 => *q as usize,
-        got => return not_impl_err!(
-            "Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal (got data type {}).",
-            got.data_type()
-        )
+fn validate_input_max_size_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<usize> {
+    let max_size = match get_scalar_value(expr)? {
+        ScalarValue::UInt8(Some(q)) => q as usize,
+        ScalarValue::UInt16(Some(q)) => q as usize,
+        ScalarValue::UInt32(Some(q)) => q as usize,
+        ScalarValue::UInt64(Some(q)) => q as usize,
+        ScalarValue::Int32(Some(q)) if q > 0 => q as usize,
+        ScalarValue::Int64(Some(q)) if q > 0 => q as usize,
+        ScalarValue::Int16(Some(q)) if q > 0 => q as usize,
+        ScalarValue::Int8(Some(q)) if q > 0 => q as usize,
+        sv => {
+            return not_impl_err!(
+                "Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal (got data type {}).",
+                sv.data_type()
+            )
+        }
     };
+
     Ok(max_size)
 }
 
@@ -205,10 +207,7 @@ impl AggregateUDFImpl for ApproxPercentileCont {
     #[allow(rustdoc::private_intra_doc_links)]
     /// See [`TDigest::to_scalar_state()`] for a description of the serialised
     /// state.
-    fn state_fields(
-        &self,
-        args: StateFieldsArgs,
-    ) -> datafusion_common::Result<Vec<Field>> {
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         Ok(vec![
             Field::new(
                 format_state_name(args.name, "max_size"),
@@ -252,14 +251,11 @@ impl AggregateUDFImpl for ApproxPercentileCont {
     }
 
     #[inline]
-    fn accumulator(
-        &self,
-        acc_args: AccumulatorArgs,
-    ) -> datafusion_common::Result<Box<dyn Accumulator>> {
+    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
         Ok(Box::new(self.create_accumulator(acc_args)?))
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         if !arg_types[0].is_numeric() {
             return plan_err!("approx_percentile_cont requires numeric input types");
         }
@@ -307,7 +303,7 @@ impl ApproxPercentileAccumulator {
     }
 
     // public for approx_percentile_cont_with_weight
-    pub fn convert_to_float(values: &ArrayRef) -> datafusion_common::Result<Vec<f64>> {
+    pub fn convert_to_float(values: &ArrayRef) -> Result<Vec<f64>> {
         match values.data_type() {
             DataType::Float64 => {
                 let array = downcast_value!(values, Float64Array);
@@ -315,7 +311,7 @@ impl ApproxPercentileAccumulator {
                     .values()
                     .iter()
                     .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<datafusion_common::Result<Vec<_>>>()?)
+                    .collect::<Result<Vec<_>>>()?)
             }
             DataType::Float32 => {
                 let array = downcast_value!(values, Float32Array);
@@ -323,7 +319,7 @@ impl ApproxPercentileAccumulator {
                     .values()
                     .iter()
                     .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<datafusion_common::Result<Vec<_>>>()?)
+                    .collect::<Result<Vec<_>>>()?)
             }
             DataType::Int64 => {
                 let array = downcast_value!(values, Int64Array);
@@ -331,7 +327,7 @@ impl ApproxPercentileAccumulator {
                     .values()
                     .iter()
                     .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<datafusion_common::Result<Vec<_>>>()?)
+                    .collect::<Result<Vec<_>>>()?)
             }
             DataType::Int32 => {
                 let array = downcast_value!(values, Int32Array);
@@ -339,7 +335,7 @@ impl ApproxPercentileAccumulator {
                     .values()
                     .iter()
                     .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<datafusion_common::Result<Vec<_>>>()?)
+                    .collect::<Result<Vec<_>>>()?)
             }
             DataType::Int16 => {
                 let array = downcast_value!(values, Int16Array);
@@ -347,7 +343,7 @@ impl ApproxPercentileAccumulator {
                     .values()
                     .iter()
                     .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<datafusion_common::Result<Vec<_>>>()?)
+                    .collect::<Result<Vec<_>>>()?)
             }
             DataType::Int8 => {
                 let array = downcast_value!(values, Int8Array);
@@ -355,7 +351,7 @@ impl ApproxPercentileAccumulator {
                     .values()
                     .iter()
                     .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<datafusion_common::Result<Vec<_>>>()?)
+                    .collect::<Result<Vec<_>>>()?)
             }
             DataType::UInt64 => {
                 let array = downcast_value!(values, UInt64Array);
@@ -363,7 +359,7 @@ impl ApproxPercentileAccumulator {
                     .values()
                     .iter()
                     .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<datafusion_common::Result<Vec<_>>>()?)
+                    .collect::<Result<Vec<_>>>()?)
             }
             DataType::UInt32 => {
                 let array = downcast_value!(values, UInt32Array);
@@ -371,7 +367,7 @@ impl ApproxPercentileAccumulator {
                     .values()
                     .iter()
                     .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<datafusion_common::Result<Vec<_>>>()?)
+                    .collect::<Result<Vec<_>>>()?)
             }
             DataType::UInt16 => {
                 let array = downcast_value!(values, UInt16Array);
@@ -379,7 +375,7 @@ impl ApproxPercentileAccumulator {
                     .values()
                     .iter()
                     .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<datafusion_common::Result<Vec<_>>>()?)
+                    .collect::<Result<Vec<_>>>()?)
             }
             DataType::UInt8 => {
                 let array = downcast_value!(values, UInt8Array);
@@ -387,7 +383,7 @@ impl ApproxPercentileAccumulator {
                     .values()
                     .iter()
                     .filter_map(|v| v.try_as_f64().transpose())
-                    .collect::<datafusion_common::Result<Vec<_>>>()?)
+                    .collect::<Result<Vec<_>>>()?)
             }
             e => internal_err!(
                 "APPROX_PERCENTILE_CONT is not expected to receive the type {e:?}"
@@ -397,11 +393,11 @@ impl ApproxPercentileAccumulator {
 }
 
 impl Accumulator for ApproxPercentileAccumulator {
-    fn state(&mut self) -> datafusion_common::Result<Vec<ScalarValue>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(self.digest.to_scalar_state().into_iter().collect())
     }
 
-    fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
         // Remove any nulls before computing the percentile
         let mut values = Arc::clone(&values[0]);
         if values.nulls().is_some() {
@@ -413,7 +409,7 @@ impl Accumulator for ApproxPercentileAccumulator {
         Ok(())
     }
 
-    fn evaluate(&mut self) -> datafusion_common::Result<ScalarValue> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
         if self.digest.count() == 0 {
             return ScalarValue::try_from(self.return_type.clone());
         }
@@ -436,7 +432,7 @@ impl Accumulator for ApproxPercentileAccumulator {
         })
     }
 
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> datafusion_common::Result<()> {
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
         if states.is_empty() {
             return Ok(());
         }
@@ -446,10 +442,10 @@ impl Accumulator for ApproxPercentileAccumulator {
                 states
                     .iter()
                     .map(|array| ScalarValue::try_from_array(array, index))
-                    .collect::<datafusion_common::Result<Vec<_>>>()
+                    .collect::<Result<Vec<_>>>()
                     .map(|state| TDigest::from_scalar_state(&state))
             })
-            .collect::<datafusion_common::Result<Vec<_>>>()?;
+            .collect::<Result<Vec<_>>>()?;
 
         self.merge_digests(&states);
 
@@ -472,7 +468,7 @@ impl Accumulator for ApproxPercentileAccumulator {
 mod tests {
     use arrow_schema::DataType;
 
-    use datafusion_physical_expr_common::aggregate::tdigest::TDigest;
+    use datafusion_functions_aggregate_common::tdigest::TDigest;
 
     use crate::approx_percentile_cont::ApproxPercentileAccumulator;
 
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
index 0dbea1fb1ff79..fee67ba1623db 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -17,6 +17,7 @@
 
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
+use std::sync::Arc;
 
 use arrow::{
     array::ArrayRef,
@@ -29,7 +30,7 @@ use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::NUMERICS;
 use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, TypeSignature};
-use datafusion_physical_expr_common::aggregate::tdigest::{
+use datafusion_functions_aggregate_common::tdigest::{
     Centroid, TDigest, DEFAULT_MAX_SIZE,
 };
 
@@ -123,16 +124,16 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
             );
         }
 
-        if acc_args.input_exprs.len() != 3 {
+        if acc_args.exprs.len() != 3 {
             return plan_err!(
                 "approx_percentile_cont_with_weight requires three arguments: value, weight, percentile"
             );
         }
 
         let sub_args = AccumulatorArgs {
-            input_exprs: &[
-                acc_args.input_exprs[0].clone(),
-                acc_args.input_exprs[2].clone(),
+            exprs: &[
+                Arc::clone(&acc_args.exprs[0]),
+                Arc::clone(&acc_args.exprs[2]),
             ],
             ..acc_args
         };
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index 36c9d6a0d7c81..b641d388a7c52 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -29,12 +29,9 @@ use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::AggregateUDFImpl;
 use datafusion_expr::{Accumulator, Signature, Volatility};
-use datafusion_physical_expr_common::aggregate::merge_arrays::merge_ordered_arrays;
-use datafusion_physical_expr_common::aggregate::utils::ordering_fields;
-use datafusion_physical_expr_common::sort_expr::{
-    limited_convert_logical_sort_exprs_to_physical_with_dfschema, LexOrdering,
-    PhysicalSortExpr,
-};
+use datafusion_functions_aggregate_common::merge_arrays::merge_ordered_arrays;
+use datafusion_functions_aggregate_common::utils::ordering_fields;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use std::collections::{HashSet, VecDeque};
 use std::sync::Arc;
 
@@ -117,32 +114,26 @@ impl AggregateUDFImpl for ArrayAgg {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
+
         if acc_args.is_distinct {
-            return Ok(Box::new(DistinctArrayAggAccumulator::try_new(
-                &acc_args.input_types[0],
-            )?));
+            return Ok(Box::new(DistinctArrayAggAccumulator::try_new(&data_type)?));
         }
 
-        if acc_args.sort_exprs.is_empty() {
-            return Ok(Box::new(ArrayAggAccumulator::try_new(
-                &acc_args.input_types[0],
-            )?));
+        if acc_args.ordering_req.is_empty() {
+            return Ok(Box::new(ArrayAggAccumulator::try_new(&data_type)?));
         }
 
-        let ordering_req = limited_convert_logical_sort_exprs_to_physical_with_dfschema(
-            acc_args.sort_exprs,
-            acc_args.dfschema,
-        )?;
-
-        let ordering_dtypes = ordering_req
+        let ordering_dtypes = acc_args
+            .ordering_req
             .iter()
             .map(|e| e.expr.data_type(acc_args.schema))
             .collect::<Result<Vec<_>>>()?;
 
         OrderSensitiveArrayAggAccumulator::try_new(
-            &acc_args.input_types[0],
+            &data_type,
             &ordering_dtypes,
-            ordering_req,
+            acc_args.ordering_req.to_vec(),
             acc_args.is_reversed,
         )
         .map(|acc| Box::new(acc) as _)
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index 288e0b09f8092..1be3cd6b07146 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -34,8 +34,8 @@ use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
     Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature,
 };
-use datafusion_physical_expr_common::aggregate::groups_accumulator::accumulate::NullState;
-use datafusion_physical_expr_common::aggregate::utils::DecimalAverager;
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
+use datafusion_functions_aggregate_common::utils::DecimalAverager;
 use log::debug;
 use std::any::Any;
 use std::fmt::Debug;
@@ -92,8 +92,10 @@ impl AggregateUDFImpl for Avg {
             return exec_err!("avg(DISTINCT) aggregations are not available");
         }
         use DataType::*;
+
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
         // instantiate specialized accumulator based for the type
-        match (&acc_args.input_types[0], acc_args.data_type) {
+        match (&data_type, acc_args.return_type) {
             (Float64, Float64) => Ok(Box::<AvgAccumulator>::default()),
             (
                 Decimal128(sum_precision, sum_scale),
@@ -120,8 +122,8 @@ impl AggregateUDFImpl for Avg {
             })),
             _ => exec_err!(
                 "AvgAccumulator for ({} --> {})",
-                &acc_args.input_types[0],
-                acc_args.data_type
+                &data_type,
+                acc_args.return_type
             ),
         }
     }
@@ -143,7 +145,7 @@ impl AggregateUDFImpl for Avg {
 
     fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
         matches!(
-            args.data_type,
+            args.return_type,
             DataType::Float64 | DataType::Decimal128(_, _)
         )
     }
@@ -153,12 +155,14 @@ impl AggregateUDFImpl for Avg {
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
         use DataType::*;
+
+        let data_type = args.exprs[0].data_type(args.schema)?;
         // instantiate specialized accumulator based for the type
-        match (&args.input_types[0], args.data_type) {
+        match (&data_type, args.return_type) {
             (Float64, Float64) => {
                 Ok(Box::new(AvgGroupsAccumulator::<Float64Type, _>::new(
-                    &args.input_types[0],
-                    args.data_type,
+                    &data_type,
+                    args.return_type,
                     |sum: f64, count: u64| Ok(sum / count as f64),
                 )))
             }
@@ -176,8 +180,8 @@ impl AggregateUDFImpl for Avg {
                     move |sum: i128, count: u64| decimal_averager.avg(sum, count as i128);
 
                 Ok(Box::new(AvgGroupsAccumulator::<Decimal128Type, _>::new(
-                    &args.input_types[0],
-                    args.data_type,
+                    &data_type,
+                    args.return_type,
                     avg_fn,
                 )))
             }
@@ -197,16 +201,16 @@ impl AggregateUDFImpl for Avg {
                 };
 
                 Ok(Box::new(AvgGroupsAccumulator::<Decimal256Type, _>::new(
-                    &args.input_types[0],
-                    args.data_type,
+                    &data_type,
+                    args.return_type,
                     avg_fn,
                 )))
             }
 
             _ => not_impl_err!(
                 "AvgGroupsAccumulator for ({} --> {})",
-                &args.input_types[0],
-                args.data_type
+                &data_type,
+                args.return_type
             ),
         }
     }
diff --git a/datafusion/functions-aggregate/src/bit_and_or_xor.rs b/datafusion/functions-aggregate/src/bit_and_or_xor.rs
index f6dd0bc20a831..aa65062e3330c 100644
--- a/datafusion/functions-aggregate/src/bit_and_or_xor.rs
+++ b/datafusion/functions-aggregate/src/bit_and_or_xor.rs
@@ -38,7 +38,7 @@ use datafusion_expr::{
     Accumulator, AggregateUDFImpl, GroupsAccumulator, ReversedUDAF, Signature, Volatility,
 };
 
-use datafusion_physical_expr_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign};
 
 /// This macro helps create group accumulators based on bitwise operations typically used internally
@@ -84,7 +84,7 @@ macro_rules! accumulator_helper {
 /// `is_distinct` is boolean value indicating whether the operation is distinct or not.
 macro_rules! downcast_bitwise_accumulator {
     ($args:ident, $opr:expr, $is_distinct: expr) => {
-        match $args.data_type {
+        match $args.return_type {
             DataType::Int8 => accumulator_helper!(Int8Type, $opr, $is_distinct),
             DataType::Int16 => accumulator_helper!(Int16Type, $opr, $is_distinct),
             DataType::Int32 => accumulator_helper!(Int32Type, $opr, $is_distinct),
@@ -98,7 +98,7 @@ macro_rules! downcast_bitwise_accumulator {
                     "{} not supported for {}: {}",
                     stringify!($opr),
                     $args.name,
-                    $args.data_type
+                    $args.return_type
                 )
             }
         }
@@ -224,7 +224,7 @@ impl AggregateUDFImpl for BitwiseOperation {
         &self,
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
-        let data_type = args.data_type;
+        let data_type = args.return_type;
         let operation = &self.operation;
         downcast_integer! {
             data_type => (group_accumulator_helper, data_type, operation),
diff --git a/datafusion/functions-aggregate/src/bool_and_or.rs b/datafusion/functions-aggregate/src/bool_and_or.rs
index b91fbb9ff7095..b993b2a4979c8 100644
--- a/datafusion/functions-aggregate/src/bool_and_or.rs
+++ b/datafusion/functions-aggregate/src/bool_and_or.rs
@@ -35,7 +35,7 @@ use datafusion_expr::{
     Accumulator, AggregateUDFImpl, GroupsAccumulator, ReversedUDAF, Signature, Volatility,
 };
 
-use datafusion_physical_expr_common::aggregate::groups_accumulator::bool_op::BooleanGroupsAccumulator;
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::bool_op::BooleanGroupsAccumulator;
 
 // returns the new value after bool_and/bool_or with the new values, taking nullability into account
 macro_rules! typed_bool_and_or_batch {
@@ -149,14 +149,14 @@ impl AggregateUDFImpl for BoolAnd {
         &self,
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
-        match args.data_type {
+        match args.return_type {
             DataType::Boolean => {
                 Ok(Box::new(BooleanGroupsAccumulator::new(|x, y| x && y)))
             }
             _ => not_impl_err!(
                 "GroupsAccumulator not supported for {} with {}",
                 args.name,
-                args.data_type
+                args.return_type
             ),
         }
     }
@@ -269,14 +269,14 @@ impl AggregateUDFImpl for BoolOr {
         &self,
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
-        match args.data_type {
+        match args.return_type {
             DataType::Boolean => {
                 Ok(Box::new(BooleanGroupsAccumulator::new(|x, y| x || y)))
             }
             _ => not_impl_err!(
                 "GroupsAccumulator not supported for {} with {}",
                 args.name,
-                args.data_type
+                args.return_type
             ),
         }
     }
diff --git a/datafusion/functions-aggregate/src/correlation.rs b/datafusion/functions-aggregate/src/correlation.rs
index c2d7a89081d66..88f01b06d2d9b 100644
--- a/datafusion/functions-aggregate/src/correlation.rs
+++ b/datafusion/functions-aggregate/src/correlation.rs
@@ -36,7 +36,7 @@ use datafusion_expr::{
     utils::format_state_name,
     Accumulator, AggregateUDFImpl, Signature, Volatility,
 };
-use datafusion_physical_expr_common::aggregate::stats::StatsType;
+use datafusion_functions_aggregate_common::stats::StatsType;
 
 make_udaf_expr_and_func!(
     Correlation,
diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index aea05442536ee..04b1921c7b9e5 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use ahash::RandomState;
-use datafusion_physical_expr_common::aggregate::count_distinct::BytesViewDistinctCountAccumulator;
+use datafusion_functions_aggregate_common::aggregate::count_distinct::BytesViewDistinctCountAccumulator;
 use std::collections::HashSet;
 use std::ops::BitAnd;
 use std::{fmt::Debug, sync::Arc};
@@ -47,14 +47,12 @@ use datafusion_expr::{
     EmitTo, GroupsAccumulator, Signature, Volatility,
 };
 use datafusion_expr::{Expr, ReversedUDAF, TypeSignature};
-use datafusion_physical_expr_common::aggregate::groups_accumulator::accumulate::accumulate_indices;
-use datafusion_physical_expr_common::{
-    aggregate::count_distinct::{
-        BytesDistinctCountAccumulator, FloatDistinctCountAccumulator,
-        PrimitiveDistinctCountAccumulator,
-    },
-    binary_map::OutputType,
+use datafusion_functions_aggregate_common::aggregate::count_distinct::{
+    BytesDistinctCountAccumulator, FloatDistinctCountAccumulator,
+    PrimitiveDistinctCountAccumulator,
 };
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate_indices;
+use datafusion_physical_expr_common::binary_map::OutputType;
 
 make_udaf_expr_and_func!(
     Count,
@@ -145,11 +143,11 @@ impl AggregateUDFImpl for Count {
             return Ok(Box::new(CountAccumulator::new()));
         }
 
-        if acc_args.input_exprs.len() > 1 {
+        if acc_args.exprs.len() > 1 {
             return not_impl_err!("COUNT DISTINCT with multiple arguments");
         }
 
-        let data_type = &acc_args.input_types[0];
+        let data_type = &acc_args.exprs[0].data_type(acc_args.schema)?;
         Ok(match data_type {
             // try and use a specialized accumulator if possible, otherwise fall back to generic accumulator
             DataType::Int8 => Box::new(
@@ -271,7 +269,7 @@ impl AggregateUDFImpl for Count {
         if args.is_distinct {
             return false;
         }
-        args.input_exprs.len() == 1
+        args.exprs.len() == 1
     }
 
     fn create_groups_accumulator(
diff --git a/datafusion/functions-aggregate/src/covariance.rs b/datafusion/functions-aggregate/src/covariance.rs
index 6f03b256fd9f7..d0abb079ef155 100644
--- a/datafusion/functions-aggregate/src/covariance.rs
+++ b/datafusion/functions-aggregate/src/covariance.rs
@@ -35,7 +35,7 @@ use datafusion_expr::{
     utils::format_state_name,
     Accumulator, AggregateUDFImpl, Signature, Volatility,
 };
-use datafusion_physical_expr_common::aggregate::stats::StatsType;
+use datafusion_functions_aggregate_common::stats::StatsType;
 
 make_udaf_expr_and_func!(
     CovarianceSample,
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
index 587767b8e356a..2162442f054ed 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -34,11 +34,8 @@ use datafusion_expr::{
     Accumulator, AggregateUDFImpl, ArrayFunctionSignature, Expr, ExprFunctionExt,
     Signature, TypeSignature, Volatility,
 };
-use datafusion_physical_expr_common::aggregate::utils::get_sort_options;
-use datafusion_physical_expr_common::sort_expr::{
-    limited_convert_logical_sort_exprs_to_physical_with_dfschema, LexOrdering,
-    PhysicalSortExpr,
-};
+use datafusion_functions_aggregate_common::utils::get_sort_options;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 
 create_func!(FirstValue, first_value_udaf);
 
@@ -117,24 +114,21 @@ impl AggregateUDFImpl for FirstValue {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let ordering_req = limited_convert_logical_sort_exprs_to_physical_with_dfschema(
-            acc_args.sort_exprs,
-            acc_args.dfschema,
-        )?;
-
-        let ordering_dtypes = ordering_req
+        let ordering_dtypes = acc_args
+            .ordering_req
             .iter()
             .map(|e| e.expr.data_type(acc_args.schema))
             .collect::<Result<Vec<_>>>()?;
 
         // When requirement is empty, or it is signalled by outside caller that
         // the ordering requirement is/will be satisfied.
-        let requirement_satisfied = ordering_req.is_empty() || self.requirement_satisfied;
+        let requirement_satisfied =
+            acc_args.ordering_req.is_empty() || self.requirement_satisfied;
 
         FirstValueAccumulator::try_new(
-            acc_args.data_type,
+            acc_args.return_type,
             &ordering_dtypes,
-            ordering_req,
+            acc_args.ordering_req.to_vec(),
             acc_args.ignore_nulls,
         )
         .map(|acc| Box::new(acc.with_requirement_satisfied(requirement_satisfied)) as _)
@@ -416,22 +410,19 @@ impl AggregateUDFImpl for LastValue {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let ordering_req = limited_convert_logical_sort_exprs_to_physical_with_dfschema(
-            acc_args.sort_exprs,
-            acc_args.dfschema,
-        )?;
-
-        let ordering_dtypes = ordering_req
+        let ordering_dtypes = acc_args
+            .ordering_req
             .iter()
             .map(|e| e.expr.data_type(acc_args.schema))
             .collect::<Result<Vec<_>>>()?;
 
-        let requirement_satisfied = ordering_req.is_empty() || self.requirement_satisfied;
+        let requirement_satisfied =
+            acc_args.ordering_req.is_empty() || self.requirement_satisfied;
 
         LastValueAccumulator::try_new(
-            acc_args.data_type,
+            acc_args.return_type,
             &ordering_dtypes,
-            ordering_req,
+            acc_args.ordering_req.to_vec(),
             acc_args.ignore_nulls,
         )
         .map(|acc| Box::new(acc.with_requirement_satisfied(requirement_satisfied)) as _)
diff --git a/datafusion/functions-aggregate/src/median.rs b/datafusion/functions-aggregate/src/median.rs
index febf1fcd2fefb..7dd0de14c3c0c 100644
--- a/datafusion/functions-aggregate/src/median.rs
+++ b/datafusion/functions-aggregate/src/median.rs
@@ -38,7 +38,7 @@ use datafusion_expr::{
     function::AccumulatorArgs, utils::format_state_name, Accumulator, AggregateUDFImpl,
     Signature, Volatility,
 };
-use datafusion_physical_expr_common::aggregate::utils::Hashable;
+use datafusion_functions_aggregate_common::utils::Hashable;
 
 make_udaf_expr_and_func!(
     Median,
@@ -133,7 +133,7 @@ impl AggregateUDFImpl for Median {
             };
         }
 
-        let dt = &acc_args.input_types[0];
+        let dt = acc_args.exprs[0].data_type(acc_args.schema)?;
         downcast_integer! {
             dt => (helper, dt),
             DataType::Float16 => helper!(Float16Type, dt),
diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
index f19d6d767ba11..f9a08631bfb9d 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -49,7 +49,7 @@ use arrow::datatypes::{
 };
 use arrow_schema::IntervalUnit;
 use datafusion_common::{downcast_value, internal_err, DataFusionError, Result};
-use datafusion_physical_expr_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use std::fmt::Debug;
 
 use arrow::datatypes::i256;
@@ -156,7 +156,7 @@ impl AggregateUDFImpl for Max {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(MaxAccumulator::try_new(acc_args.data_type)?))
+        Ok(Box::new(MaxAccumulator::try_new(acc_args.return_type)?))
     }
 
     fn aliases(&self) -> &[String] {
@@ -166,7 +166,7 @@ impl AggregateUDFImpl for Max {
     fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
         use DataType::*;
         matches!(
-            args.data_type,
+            args.return_type,
             Int8 | Int16
                 | Int32
                 | Int64
@@ -192,7 +192,7 @@ impl AggregateUDFImpl for Max {
     ) -> Result<Box<dyn GroupsAccumulator>> {
         use DataType::*;
         use TimeUnit::*;
-        let data_type = args.data_type;
+        let data_type = args.return_type;
         match data_type {
             Int8 => instantiate_max_accumulator!(data_type, i8, Int8Type),
             Int16 => instantiate_max_accumulator!(data_type, i16, Int16Type),
@@ -253,7 +253,7 @@ impl AggregateUDFImpl for Max {
         &self,
         args: AccumulatorArgs,
     ) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(SlidingMaxAccumulator::try_new(args.data_type)?))
+        Ok(Box::new(SlidingMaxAccumulator::try_new(args.return_type)?))
     }
 
     fn is_descending(&self) -> Option<bool> {
@@ -925,7 +925,7 @@ impl AggregateUDFImpl for Min {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(MinAccumulator::try_new(acc_args.data_type)?))
+        Ok(Box::new(MinAccumulator::try_new(acc_args.return_type)?))
     }
 
     fn aliases(&self) -> &[String] {
@@ -935,7 +935,7 @@ impl AggregateUDFImpl for Min {
     fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
         use DataType::*;
         matches!(
-            args.data_type,
+            args.return_type,
             Int8 | Int16
                 | Int32
                 | Int64
@@ -961,7 +961,7 @@ impl AggregateUDFImpl for Min {
     ) -> Result<Box<dyn GroupsAccumulator>> {
         use DataType::*;
         use TimeUnit::*;
-        let data_type = args.data_type;
+        let data_type = args.return_type;
         match data_type {
             Int8 => instantiate_min_accumulator!(data_type, i8, Int8Type),
             Int16 => instantiate_min_accumulator!(data_type, i16, Int16Type),
@@ -1022,7 +1022,7 @@ impl AggregateUDFImpl for Min {
         &self,
         args: AccumulatorArgs,
     ) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(SlidingMinAccumulator::try_new(args.data_type)?))
+        Ok(Box::new(SlidingMinAccumulator::try_new(args.return_type)?))
     }
 
     fn is_descending(&self) -> Option<bool> {
diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs
index dc7c6c86f2130..cb1ddd4738c42 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -30,14 +30,12 @@ use datafusion_common::{exec_err, internal_err, not_impl_err, Result, ScalarValu
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, Expr, ReversedUDAF, Signature, Volatility,
-};
-use datafusion_physical_expr_common::aggregate::merge_arrays::merge_ordered_arrays;
-use datafusion_physical_expr_common::aggregate::utils::ordering_fields;
-use datafusion_physical_expr_common::sort_expr::{
-    limited_convert_logical_sort_exprs_to_physical_with_dfschema, LexOrdering,
-    PhysicalSortExpr,
+    Accumulator, AggregateUDFImpl, ReversedUDAF, Signature, Volatility,
 };
+use datafusion_functions_aggregate_common::merge_arrays::merge_ordered_arrays;
+use datafusion_functions_aggregate_common::utils::ordering_fields;
+use datafusion_physical_expr::expressions::Literal;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 
 make_udaf_expr_and_func!(
     NthValueAgg,
@@ -87,36 +85,39 @@ impl AggregateUDFImpl for NthValueAgg {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let n = match acc_args.input_exprs[1] {
-            Expr::Literal(ScalarValue::Int64(Some(value))) => {
+        let n = match acc_args.exprs[1]
+            .as_any()
+            .downcast_ref::<Literal>()
+            .map(|lit| lit.value())
+        {
+            Some(ScalarValue::Int64(Some(value))) => {
                 if acc_args.is_reversed {
-                    Ok(-value)
+                    -*value
                 } else {
-                    Ok(value)
+                    *value
                 }
             }
-            _ => not_impl_err!(
-                "{} not supported for n: {}",
-                self.name(),
-                &acc_args.input_exprs[1]
-            ),
-        }?;
-
-        let ordering_req = limited_convert_logical_sort_exprs_to_physical_with_dfschema(
-            acc_args.sort_exprs,
-            acc_args.dfschema,
-        )?;
+            _ => {
+                return not_impl_err!(
+                    "{} not supported for n: {}",
+                    self.name(),
+                    &acc_args.exprs[1]
+                )
+            }
+        };
 
-        let ordering_dtypes = ordering_req
+        let ordering_dtypes = acc_args
+            .ordering_req
             .iter()
             .map(|e| e.expr.data_type(acc_args.schema))
             .collect::<Result<Vec<_>>>()?;
 
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
         NthValueAccumulator::try_new(
             n,
-            &acc_args.input_types[0],
+            &data_type,
             &ordering_dtypes,
-            ordering_req,
+            acc_args.ordering_req.to_vec(),
         )
         .map(|acc| Box::new(acc) as _)
     }
diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs
index df757ddc04226..180f4ad3cf371 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -27,7 +27,7 @@ use datafusion_common::{plan_err, ScalarValue};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
-use datafusion_physical_expr_common::aggregate::stats::StatsType;
+use datafusion_functions_aggregate_common::stats::StatsType;
 
 use crate::variance::VarianceAccumulator;
 
@@ -269,16 +269,12 @@ impl Accumulator for StddevAccumulator {
 
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-
+    use super::*;
     use arrow::{array::*, datatypes::*};
-
-    use datafusion_common::DFSchema;
     use datafusion_expr::AggregateUDF;
-    use datafusion_physical_expr_common::aggregate::utils::get_accum_scalar_values_as_arrays;
-    use datafusion_physical_expr_common::expressions::column::col;
-
-    use super::*;
+    use datafusion_functions_aggregate_common::utils::get_accum_scalar_values_as_arrays;
+    use datafusion_physical_expr::expressions::col;
+    use std::sync::Arc;
 
     #[test]
     fn stddev_f64_merge_1() -> Result<()> {
@@ -325,31 +321,26 @@ mod tests {
         agg2: Arc<AggregateUDF>,
         schema: &Schema,
     ) -> Result<ScalarValue> {
-        let dfschema = DFSchema::empty();
         let args1 = AccumulatorArgs {
-            data_type: &DataType::Float64,
+            return_type: &DataType::Float64,
             schema,
-            dfschema: &dfschema,
             ignore_nulls: false,
-            sort_exprs: &[],
+            ordering_req: &[],
             name: "a",
             is_distinct: false,
             is_reversed: false,
-            input_types: &[DataType::Float64],
-            input_exprs: &[datafusion_expr::col("a")],
+            exprs: &[col("a", schema)?],
         };
 
         let args2 = AccumulatorArgs {
-            data_type: &DataType::Float64,
+            return_type: &DataType::Float64,
             schema,
-            dfschema: &dfschema,
             ignore_nulls: false,
-            sort_exprs: &[],
+            ordering_req: &[],
             name: "a",
             is_distinct: false,
             is_reversed: false,
-            input_types: &[DataType::Float64],
-            input_exprs: &[datafusion_expr::col("a")],
+            exprs: &[col("a", schema)?],
         };
 
         let mut accum1 = agg1.accumulator(args1)?;
diff --git a/datafusion/functions-aggregate/src/string_agg.rs b/datafusion/functions-aggregate/src/string_agg.rs
index 5d91a52bc4c65..a7e9a37e23ad6 100644
--- a/datafusion/functions-aggregate/src/string_agg.rs
+++ b/datafusion/functions-aggregate/src/string_agg.rs
@@ -24,8 +24,9 @@ use datafusion_common::Result;
 use datafusion_common::{not_impl_err, ScalarValue};
 use datafusion_expr::function::AccumulatorArgs;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, Expr, Signature, TypeSignature, Volatility,
+    Accumulator, AggregateUDFImpl, Signature, TypeSignature, Volatility,
 };
+use datafusion_physical_expr::expressions::Literal;
 use std::any::Any;
 
 make_udaf_expr_and_func!(
@@ -82,21 +83,20 @@ impl AggregateUDFImpl for StringAgg {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        match &acc_args.input_exprs[1] {
-            Expr::Literal(ScalarValue::Utf8(Some(delimiter)))
-            | Expr::Literal(ScalarValue::LargeUtf8(Some(delimiter))) => {
-                Ok(Box::new(StringAggAccumulator::new(delimiter)))
-            }
-            Expr::Literal(ScalarValue::Utf8(None))
-            | Expr::Literal(ScalarValue::LargeUtf8(None))
-            | Expr::Literal(ScalarValue::Null) => {
-                Ok(Box::new(StringAggAccumulator::new("")))
-            }
-            _ => not_impl_err!(
-                "StringAgg not supported for delimiter {}",
-                &acc_args.input_exprs[1]
-            ),
+        if let Some(lit) = acc_args.exprs[1].as_any().downcast_ref::<Literal>() {
+            return match lit.value() {
+                ScalarValue::Utf8(Some(delimiter))
+                | ScalarValue::LargeUtf8(Some(delimiter)) => {
+                    Ok(Box::new(StringAggAccumulator::new(delimiter.as_str())))
+                }
+                ScalarValue::Utf8(None)
+                | ScalarValue::LargeUtf8(None)
+                | ScalarValue::Null => Ok(Box::new(StringAggAccumulator::new(""))),
+                e => not_impl_err!("StringAgg not supported for delimiter {}", e),
+            };
         }
+
+        not_impl_err!("expect literal")
     }
 }
 
diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs
index 08e3908a58297..7e40c1bd17a8d 100644
--- a/datafusion/functions-aggregate/src/sum.rs
+++ b/datafusion/functions-aggregate/src/sum.rs
@@ -39,8 +39,8 @@ use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
     Accumulator, AggregateUDFImpl, GroupsAccumulator, ReversedUDAF, Signature, Volatility,
 };
-use datafusion_physical_expr_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
-use datafusion_physical_expr_common::aggregate::utils::Hashable;
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
+use datafusion_functions_aggregate_common::utils::Hashable;
 
 make_udaf_expr_and_func!(
     Sum,
@@ -58,14 +58,18 @@ make_udaf_expr_and_func!(
 /// `helper` is a macro accepting (ArrowPrimitiveType, DataType)
 macro_rules! downcast_sum {
     ($args:ident, $helper:ident) => {
-        match $args.data_type {
-            DataType::UInt64 => $helper!(UInt64Type, $args.data_type),
-            DataType::Int64 => $helper!(Int64Type, $args.data_type),
-            DataType::Float64 => $helper!(Float64Type, $args.data_type),
-            DataType::Decimal128(_, _) => $helper!(Decimal128Type, $args.data_type),
-            DataType::Decimal256(_, _) => $helper!(Decimal256Type, $args.data_type),
+        match $args.return_type {
+            DataType::UInt64 => $helper!(UInt64Type, $args.return_type),
+            DataType::Int64 => $helper!(Int64Type, $args.return_type),
+            DataType::Float64 => $helper!(Float64Type, $args.return_type),
+            DataType::Decimal128(_, _) => $helper!(Decimal128Type, $args.return_type),
+            DataType::Decimal256(_, _) => $helper!(Decimal256Type, $args.return_type),
             _ => {
-                not_impl_err!("Sum not supported for {}: {}", $args.name, $args.data_type)
+                not_impl_err!(
+                    "Sum not supported for {}: {}",
+                    $args.name,
+                    $args.return_type
+                )
             }
         }
     };
diff --git a/datafusion/functions-aggregate/src/variance.rs b/datafusion/functions-aggregate/src/variance.rs
index c772608cb376d..4c78a42ea494f 100644
--- a/datafusion/functions-aggregate/src/variance.rs
+++ b/datafusion/functions-aggregate/src/variance.rs
@@ -34,7 +34,7 @@ use datafusion_expr::{
     utils::format_state_name,
     Accumulator, AggregateUDFImpl, Signature, Volatility,
 };
-use datafusion_physical_expr_common::aggregate::stats::StatsType;
+use datafusion_functions_aggregate_common::stats::StatsType;
 
 make_udaf_expr_and_func!(
     VarianceSample,
diff --git a/datafusion/physical-expr-common/Cargo.toml b/datafusion/physical-expr-common/Cargo.toml
index 3ef2d53455339..45ccb08e52e91 100644
--- a/datafusion/physical-expr-common/Cargo.toml
+++ b/datafusion/physical-expr-common/Cargo.toml
@@ -39,6 +39,6 @@ path = "src/lib.rs"
 ahash = { workspace = true }
 arrow = { workspace = true }
 datafusion-common = { workspace = true, default-features = true }
-datafusion-expr = { workspace = true }
+datafusion-expr-common = { workspace = true }
 hashbrown = { workspace = true }
 rand = { workspace = true }
diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
deleted file mode 100644
index 350023352b12f..0000000000000
--- a/datafusion/physical-expr-common/src/aggregate/mod.rs
+++ /dev/null
@@ -1,807 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fmt::Debug;
-use std::{any::Any, sync::Arc};
-
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-
-use datafusion_common::exec_err;
-use datafusion_common::{internal_err, not_impl_err, DFSchema, Result};
-use datafusion_expr::expr::create_function_physical_name;
-use datafusion_expr::function::StateFieldsArgs;
-use datafusion_expr::type_coercion::aggregates::check_arg_count;
-use datafusion_expr::utils::AggregateOrderSensitivity;
-use datafusion_expr::ReversedUDAF;
-use datafusion_expr::{
-    function::AccumulatorArgs, Accumulator, AggregateUDF, Expr, GroupsAccumulator,
-};
-
-use crate::physical_expr::PhysicalExpr;
-use crate::sort_expr::{LexOrdering, PhysicalSortExpr};
-use crate::utils::reverse_order_bys;
-
-use self::utils::down_cast_any_ref;
-
-pub mod count_distinct;
-pub mod groups_accumulator;
-pub mod merge_arrays;
-pub mod stats;
-pub mod tdigest;
-pub mod utils;
-
-/// Creates a physical expression of the UDAF, that includes all necessary type coercion.
-/// This function errors when `args`' can't be coerced to a valid argument type of the UDAF.
-///
-/// `input_exprs` and `sort_exprs` are used for customizing Accumulator
-/// whose behavior depends on arguments such as the `ORDER BY`.
-///
-/// For example to call `ARRAY_AGG(x ORDER BY y)` would pass `y` to `sort_exprs`, `x` to `input_exprs`
-///
-/// `input_exprs` and `sort_exprs` are used for customizing Accumulator as the arguments in `AccumulatorArgs`,
-/// if you don't need them it is fine to pass empty slice `&[]`.
-///
-/// `is_reversed` is used to indicate whether the aggregation is running in reverse order,
-/// it could be used to hint Accumulator to accumulate in the reversed order,
-/// you can just set to false if you are not reversing expression
-///
-/// You can also create expression by [`AggregateExprBuilder`]
-#[allow(clippy::too_many_arguments)]
-pub fn create_aggregate_expr(
-    fun: &AggregateUDF,
-    input_phy_exprs: &[Arc<dyn PhysicalExpr>],
-    input_exprs: &[Expr],
-    sort_exprs: &[Expr],
-    ordering_req: &[PhysicalSortExpr],
-    schema: &Schema,
-    name: Option<String>,
-    ignore_nulls: bool,
-    is_distinct: bool,
-) -> Result<Arc<dyn AggregateExpr>> {
-    let mut builder =
-        AggregateExprBuilder::new(Arc::new(fun.clone()), input_phy_exprs.to_vec());
-    builder = builder.sort_exprs(sort_exprs.to_vec());
-    builder = builder.order_by(ordering_req.to_vec());
-    builder = builder.logical_exprs(input_exprs.to_vec());
-    builder = builder.schema(Arc::new(schema.clone()));
-    if let Some(name) = name {
-        builder = builder.alias(name);
-    }
-
-    if ignore_nulls {
-        builder = builder.ignore_nulls();
-    }
-    if is_distinct {
-        builder = builder.distinct();
-    }
-
-    builder.build()
-}
-
-#[allow(clippy::too_many_arguments)]
-// This is not for external usage, consider creating with `create_aggregate_expr` instead.
-pub fn create_aggregate_expr_with_dfschema(
-    fun: &AggregateUDF,
-    input_phy_exprs: &[Arc<dyn PhysicalExpr>],
-    input_exprs: &[Expr],
-    sort_exprs: &[Expr],
-    ordering_req: &[PhysicalSortExpr],
-    dfschema: &DFSchema,
-    alias: Option<String>,
-    ignore_nulls: bool,
-    is_distinct: bool,
-    is_reversed: bool,
-) -> Result<Arc<dyn AggregateExpr>> {
-    let mut builder =
-        AggregateExprBuilder::new(Arc::new(fun.clone()), input_phy_exprs.to_vec());
-    builder = builder.sort_exprs(sort_exprs.to_vec());
-    builder = builder.order_by(ordering_req.to_vec());
-    builder = builder.logical_exprs(input_exprs.to_vec());
-    builder = builder.dfschema(dfschema.clone());
-    let schema: Schema = dfschema.into();
-    builder = builder.schema(Arc::new(schema));
-    if let Some(alias) = alias {
-        builder = builder.alias(alias);
-    }
-
-    if ignore_nulls {
-        builder = builder.ignore_nulls();
-    }
-    if is_distinct {
-        builder = builder.distinct();
-    }
-    if is_reversed {
-        builder = builder.reversed();
-    }
-
-    builder.build()
-}
-
-/// Builder for physical [`AggregateExpr`]
-///
-/// `AggregateExpr` contains the information necessary to call
-/// an aggregate expression.
-#[derive(Debug, Clone)]
-pub struct AggregateExprBuilder {
-    fun: Arc<AggregateUDF>,
-    /// Physical expressions of the aggregate function
-    args: Vec<Arc<dyn PhysicalExpr>>,
-    /// Logical expressions of the aggregate function, it will be deprecated in <https://github.com/apache/datafusion/issues/11359>
-    logical_args: Vec<Expr>,
-    alias: Option<String>,
-    /// Arrow Schema for the aggregate function
-    schema: SchemaRef,
-    /// Datafusion Schema for the aggregate function
-    dfschema: DFSchema,
-    /// The logical order by expressions, it will be deprecated in <https://github.com/apache/datafusion/issues/11359>
-    sort_exprs: Vec<Expr>,
-    /// The physical order by expressions
-    ordering_req: LexOrdering,
-    /// Whether to ignore null values
-    ignore_nulls: bool,
-    /// Whether is distinct aggregate function
-    is_distinct: bool,
-    /// Whether the expression is reversed
-    is_reversed: bool,
-}
-
-impl AggregateExprBuilder {
-    pub fn new(fun: Arc<AggregateUDF>, args: Vec<Arc<dyn PhysicalExpr>>) -> Self {
-        Self {
-            fun,
-            args,
-            logical_args: vec![],
-            alias: None,
-            schema: Arc::new(Schema::empty()),
-            dfschema: DFSchema::empty(),
-            sort_exprs: vec![],
-            ordering_req: vec![],
-            ignore_nulls: false,
-            is_distinct: false,
-            is_reversed: false,
-        }
-    }
-
-    pub fn build(self) -> Result<Arc<dyn AggregateExpr>> {
-        let Self {
-            fun,
-            args,
-            logical_args,
-            alias,
-            schema,
-            dfschema,
-            sort_exprs,
-            ordering_req,
-            ignore_nulls,
-            is_distinct,
-            is_reversed,
-        } = self;
-        if args.is_empty() {
-            return internal_err!("args should not be empty");
-        }
-
-        let mut ordering_fields = vec![];
-
-        debug_assert_eq!(sort_exprs.len(), ordering_req.len());
-        if !ordering_req.is_empty() {
-            let ordering_types = ordering_req
-                .iter()
-                .map(|e| e.expr.data_type(&schema))
-                .collect::<Result<Vec<_>>>()?;
-
-            ordering_fields = utils::ordering_fields(&ordering_req, &ordering_types);
-        }
-
-        let input_exprs_types = args
-            .iter()
-            .map(|arg| arg.data_type(&schema))
-            .collect::<Result<Vec<_>>>()?;
-
-        check_arg_count(
-            fun.name(),
-            &input_exprs_types,
-            &fun.signature().type_signature,
-        )?;
-
-        let data_type = fun.return_type(&input_exprs_types)?;
-        let name = match alias {
-            None => create_function_physical_name(
-                fun.name(),
-                is_distinct,
-                &logical_args,
-                if sort_exprs.is_empty() {
-                    None
-                } else {
-                    Some(&sort_exprs)
-                },
-            )?,
-            Some(alias) => alias,
-        };
-
-        Ok(Arc::new(AggregateFunctionExpr {
-            fun: Arc::unwrap_or_clone(fun),
-            args,
-            logical_args,
-            data_type,
-            name,
-            schema: Arc::unwrap_or_clone(schema),
-            dfschema,
-            sort_exprs,
-            ordering_req,
-            ignore_nulls,
-            ordering_fields,
-            is_distinct,
-            input_types: input_exprs_types,
-            is_reversed,
-        }))
-    }
-
-    pub fn alias(mut self, alias: impl Into<String>) -> Self {
-        self.alias = Some(alias.into());
-        self
-    }
-
-    pub fn schema(mut self, schema: SchemaRef) -> Self {
-        self.schema = schema;
-        self
-    }
-
-    pub fn dfschema(mut self, dfschema: DFSchema) -> Self {
-        self.dfschema = dfschema;
-        self
-    }
-
-    pub fn order_by(mut self, order_by: LexOrdering) -> Self {
-        self.ordering_req = order_by;
-        self
-    }
-
-    pub fn reversed(mut self) -> Self {
-        self.is_reversed = true;
-        self
-    }
-
-    pub fn with_reversed(mut self, is_reversed: bool) -> Self {
-        self.is_reversed = is_reversed;
-        self
-    }
-
-    pub fn distinct(mut self) -> Self {
-        self.is_distinct = true;
-        self
-    }
-
-    pub fn with_distinct(mut self, is_distinct: bool) -> Self {
-        self.is_distinct = is_distinct;
-        self
-    }
-
-    pub fn ignore_nulls(mut self) -> Self {
-        self.ignore_nulls = true;
-        self
-    }
-
-    pub fn with_ignore_nulls(mut self, ignore_nulls: bool) -> Self {
-        self.ignore_nulls = ignore_nulls;
-        self
-    }
-
-    /// This method will be deprecated in <https://github.com/apache/datafusion/issues/11359>
-    pub fn sort_exprs(mut self, sort_exprs: Vec<Expr>) -> Self {
-        self.sort_exprs = sort_exprs;
-        self
-    }
-
-    /// This method will be deprecated in <https://github.com/apache/datafusion/issues/11359>
-    pub fn logical_exprs(mut self, logical_args: Vec<Expr>) -> Self {
-        self.logical_args = logical_args;
-        self
-    }
-}
-
-/// An aggregate expression that:
-/// * knows its resulting field
-/// * knows how to create its accumulator
-/// * knows its accumulator's state's field
-/// * knows the expressions from whose its accumulator will receive values
-///
-/// Any implementation of this trait also needs to implement the
-/// `PartialEq<dyn Any>` to allows comparing equality between the
-/// trait objects.
-pub trait AggregateExpr: Send + Sync + Debug + PartialEq<dyn Any> {
-    /// Returns the aggregate expression as [`Any`] so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// the field of the final result of this aggregation.
-    fn field(&self) -> Result<Field>;
-
-    /// the accumulator used to accumulate values from the expressions.
-    /// the accumulator expects the same number of arguments as `expressions` and must
-    /// return states with the same description as `state_fields`
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>>;
-
-    /// the fields that encapsulate the Accumulator's state
-    /// the number of fields here equals the number of states that the accumulator contains
-    fn state_fields(&self) -> Result<Vec<Field>>;
-
-    /// expressions that are passed to the Accumulator.
-    /// Single-column aggregations such as `sum` return a single value, others (e.g. `cov`) return many.
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>>;
-
-    /// Order by requirements for the aggregate function
-    /// By default it is `None` (there is no requirement)
-    /// Order-sensitive aggregators, such as `FIRST_VALUE(x ORDER BY y)` should implement this
-    fn order_bys(&self) -> Option<&[PhysicalSortExpr]> {
-        None
-    }
-
-    /// Indicates whether aggregator can produce the correct result with any
-    /// arbitrary input ordering. By default, we assume that aggregate expressions
-    /// are order insensitive.
-    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
-        AggregateOrderSensitivity::Insensitive
-    }
-
-    /// Sets the indicator whether ordering requirements of the aggregator is
-    /// satisfied by its input. If this is not the case, aggregators with order
-    /// sensitivity `AggregateOrderSensitivity::Beneficial` can still produce
-    /// the correct result with possibly more work internally.
-    ///
-    /// # Returns
-    ///
-    /// Returns `Ok(Some(updated_expr))` if the process completes successfully.
-    /// If the expression can benefit from existing input ordering, but does
-    /// not implement the method, returns an error. Order insensitive and hard
-    /// requirement aggregators return `Ok(None)`.
-    fn with_beneficial_ordering(
-        self: Arc<Self>,
-        _requirement_satisfied: bool,
-    ) -> Result<Option<Arc<dyn AggregateExpr>>> {
-        if self.order_bys().is_some() && self.order_sensitivity().is_beneficial() {
-            return exec_err!(
-                "Should implement with satisfied for aggregator :{:?}",
-                self.name()
-            );
-        }
-        Ok(None)
-    }
-
-    /// Human readable name such as `"MIN(c2)"`. The default
-    /// implementation returns placeholder text.
-    fn name(&self) -> &str {
-        "AggregateExpr: default name"
-    }
-
-    /// If the aggregate expression has a specialized
-    /// [`GroupsAccumulator`] implementation. If this returns true,
-    /// `[Self::create_groups_accumulator`] will be called.
-    fn groups_accumulator_supported(&self) -> bool {
-        false
-    }
-
-    /// Return a specialized [`GroupsAccumulator`] that manages state
-    /// for all groups.
-    ///
-    /// For maximum performance, a [`GroupsAccumulator`] should be
-    /// implemented in addition to [`Accumulator`].
-    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
-        not_impl_err!("GroupsAccumulator hasn't been implemented for {self:?} yet")
-    }
-
-    /// Construct an expression that calculates the aggregate in reverse.
-    /// Typically the "reverse" expression is itself (e.g. SUM, COUNT).
-    /// For aggregates that do not support calculation in reverse,
-    /// returns None (which is the default value).
-    fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
-        None
-    }
-
-    /// Creates accumulator implementation that supports retract
-    fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        not_impl_err!("Retractable Accumulator hasn't been implemented for {self:?} yet")
-    }
-
-    /// Returns all expressions used in the [`AggregateExpr`].
-    /// These expressions are  (1)function arguments, (2) order by expressions.
-    fn all_expressions(&self) -> AggregatePhysicalExpressions {
-        let args = self.expressions();
-        let order_bys = self.order_bys().unwrap_or(&[]);
-        let order_by_exprs = order_bys
-            .iter()
-            .map(|sort_expr| sort_expr.expr.clone())
-            .collect::<Vec<_>>();
-        AggregatePhysicalExpressions {
-            args,
-            order_by_exprs,
-        }
-    }
-
-    /// Rewrites [`AggregateExpr`], with new expressions given. The argument should be consistent
-    /// with the return value of the [`AggregateExpr::all_expressions`] method.
-    /// Returns `Some(Arc<dyn AggregateExpr>)` if re-write is supported, otherwise returns `None`.
-    /// TODO: This method only rewrites the [`PhysicalExpr`]s and does not handle [`Expr`]s.
-    /// This can cause silent bugs and should be fixed in the future (possibly with physical-to-logical
-    /// conversions).
-    fn with_new_expressions(
-        &self,
-        _args: Vec<Arc<dyn PhysicalExpr>>,
-        _order_by_exprs: Vec<Arc<dyn PhysicalExpr>>,
-    ) -> Option<Arc<dyn AggregateExpr>> {
-        None
-    }
-
-    /// If this function is max, return (output_field, true)
-    /// if the function is min, return (output_field, false)
-    /// otherwise return None (the default)
-    ///
-    /// output_field is the name of the column produced by this aggregate
-    ///
-    /// Note: this is used to use special aggregate implementations in certain conditions
-    fn get_minmax_desc(&self) -> Option<(Field, bool)> {
-        None
-    }
-}
-
-/// Stores the physical expressions used inside the `AggregateExpr`.
-pub struct AggregatePhysicalExpressions {
-    /// Aggregate function arguments
-    pub args: Vec<Arc<dyn PhysicalExpr>>,
-    /// Order by expressions
-    pub order_by_exprs: Vec<Arc<dyn PhysicalExpr>>,
-}
-
-/// Physical aggregate expression of a UDAF.
-#[derive(Debug, Clone)]
-pub struct AggregateFunctionExpr {
-    fun: AggregateUDF,
-    args: Vec<Arc<dyn PhysicalExpr>>,
-    logical_args: Vec<Expr>,
-    /// Output / return type of this aggregate
-    data_type: DataType,
-    name: String,
-    schema: Schema,
-    dfschema: DFSchema,
-    // The logical order by expressions
-    sort_exprs: Vec<Expr>,
-    // The physical order by expressions
-    ordering_req: LexOrdering,
-    // Whether to ignore null values
-    ignore_nulls: bool,
-    // fields used for order sensitive aggregation functions
-    ordering_fields: Vec<Field>,
-    is_distinct: bool,
-    is_reversed: bool,
-    input_types: Vec<DataType>,
-}
-
-impl AggregateFunctionExpr {
-    /// Return the `AggregateUDF` used by this `AggregateFunctionExpr`
-    pub fn fun(&self) -> &AggregateUDF {
-        &self.fun
-    }
-
-    /// Return if the aggregation is distinct
-    pub fn is_distinct(&self) -> bool {
-        self.is_distinct
-    }
-
-    /// Return if the aggregation ignores nulls
-    pub fn ignore_nulls(&self) -> bool {
-        self.ignore_nulls
-    }
-
-    /// Return if the aggregation is reversed
-    pub fn is_reversed(&self) -> bool {
-        self.is_reversed
-    }
-}
-
-impl AggregateExpr for AggregateFunctionExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        self.args.clone()
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        let args = StateFieldsArgs {
-            name: &self.name,
-            input_types: &self.input_types,
-            return_type: &self.data_type,
-            ordering_fields: &self.ordering_fields,
-            is_distinct: self.is_distinct,
-        };
-
-        self.fun.state_fields(args)
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(&self.name, self.data_type.clone(), true))
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        let acc_args = AccumulatorArgs {
-            data_type: &self.data_type,
-            schema: &self.schema,
-            dfschema: &self.dfschema,
-            ignore_nulls: self.ignore_nulls,
-            sort_exprs: &self.sort_exprs,
-            is_distinct: self.is_distinct,
-            input_types: &self.input_types,
-            input_exprs: &self.logical_args,
-            name: &self.name,
-            is_reversed: self.is_reversed,
-        };
-
-        self.fun.accumulator(acc_args)
-    }
-
-    fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        let args = AccumulatorArgs {
-            data_type: &self.data_type,
-            schema: &self.schema,
-            dfschema: &self.dfschema,
-            ignore_nulls: self.ignore_nulls,
-            sort_exprs: &self.sort_exprs,
-            is_distinct: self.is_distinct,
-            input_types: &self.input_types,
-            input_exprs: &self.logical_args,
-            name: &self.name,
-            is_reversed: self.is_reversed,
-        };
-
-        let accumulator = self.fun.create_sliding_accumulator(args)?;
-
-        // Accumulators that have window frame startings different
-        // than `UNBOUNDED PRECEDING`, such as `1 PRECEDING`, need to
-        // implement retract_batch method in order to run correctly
-        // currently in DataFusion.
-        //
-        // If this `retract_batches` is not present, there is no way
-        // to calculate result correctly. For example, the query
-        //
-        // ```sql
-        // SELECT
-        //  SUM(a) OVER(ORDER BY a ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS sum_a
-        // FROM
-        //  t
-        // ```
-        //
-        // 1. First sum value will be the sum of rows between `[0, 1)`,
-        //
-        // 2. Second sum value will be the sum of rows between `[0, 2)`
-        //
-        // 3. Third sum value will be the sum of rows between `[1, 3)`, etc.
-        //
-        // Since the accumulator keeps the running sum:
-        //
-        // 1. First sum we add to the state sum value between `[0, 1)`
-        //
-        // 2. Second sum we add to the state sum value between `[1, 2)`
-        // (`[0, 1)` is already in the state sum, hence running sum will
-        // cover `[0, 2)` range)
-        //
-        // 3. Third sum we add to the state sum value between `[2, 3)`
-        // (`[0, 2)` is already in the state sum).  Also we need to
-        // retract values between `[0, 1)` by this way we can obtain sum
-        // between [1, 3) which is indeed the appropriate range.
-        //
-        // When we use `UNBOUNDED PRECEDING` in the query starting
-        // index will always be 0 for the desired range, and hence the
-        // `retract_batch` method will not be called. In this case
-        // having retract_batch is not a requirement.
-        //
-        // This approach is a a bit different than window function
-        // approach. In window function (when they use a window frame)
-        // they get all the desired range during evaluation.
-        if !accumulator.supports_retract_batch() {
-            return not_impl_err!(
-                "Aggregate can not be used as a sliding accumulator because \
-                     `retract_batch` is not implemented: {}",
-                self.name
-            );
-        }
-        Ok(accumulator)
-    }
-
-    fn name(&self) -> &str {
-        &self.name
-    }
-
-    fn groups_accumulator_supported(&self) -> bool {
-        let args = AccumulatorArgs {
-            data_type: &self.data_type,
-            schema: &self.schema,
-            dfschema: &self.dfschema,
-            ignore_nulls: self.ignore_nulls,
-            sort_exprs: &self.sort_exprs,
-            is_distinct: self.is_distinct,
-            input_types: &self.input_types,
-            input_exprs: &self.logical_args,
-            name: &self.name,
-            is_reversed: self.is_reversed,
-        };
-        self.fun.groups_accumulator_supported(args)
-    }
-
-    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
-        let args = AccumulatorArgs {
-            data_type: &self.data_type,
-            schema: &self.schema,
-            dfschema: &self.dfschema,
-            ignore_nulls: self.ignore_nulls,
-            sort_exprs: &self.sort_exprs,
-            is_distinct: self.is_distinct,
-            input_types: &self.input_types,
-            input_exprs: &self.logical_args,
-            name: &self.name,
-            is_reversed: self.is_reversed,
-        };
-        self.fun.create_groups_accumulator(args)
-    }
-
-    fn order_bys(&self) -> Option<&[PhysicalSortExpr]> {
-        if self.ordering_req.is_empty() {
-            return None;
-        }
-
-        if !self.order_sensitivity().is_insensitive() {
-            return Some(&self.ordering_req);
-        }
-
-        None
-    }
-
-    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
-        if !self.ordering_req.is_empty() {
-            // If there is requirement, use the sensitivity of the implementation
-            self.fun.order_sensitivity()
-        } else {
-            // If no requirement, aggregator is order insensitive
-            AggregateOrderSensitivity::Insensitive
-        }
-    }
-
-    fn with_beneficial_ordering(
-        self: Arc<Self>,
-        beneficial_ordering: bool,
-    ) -> Result<Option<Arc<dyn AggregateExpr>>> {
-        let Some(updated_fn) = self
-            .fun
-            .clone()
-            .with_beneficial_ordering(beneficial_ordering)?
-        else {
-            return Ok(None);
-        };
-        create_aggregate_expr_with_dfschema(
-            &updated_fn,
-            &self.args,
-            &self.logical_args,
-            &self.sort_exprs,
-            &self.ordering_req,
-            &self.dfschema,
-            Some(self.name().to_string()),
-            self.ignore_nulls,
-            self.is_distinct,
-            self.is_reversed,
-        )
-        .map(Some)
-    }
-
-    fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
-        match self.fun.reverse_udf() {
-            ReversedUDAF::NotSupported => None,
-            ReversedUDAF::Identical => Some(Arc::new(self.clone())),
-            ReversedUDAF::Reversed(reverse_udf) => {
-                let reverse_ordering_req = reverse_order_bys(&self.ordering_req);
-                let reverse_sort_exprs = self
-                    .sort_exprs
-                    .iter()
-                    .map(|e| {
-                        if let Expr::Sort(s) = e {
-                            Expr::Sort(s.reverse())
-                        } else {
-                            // Expects to receive `Expr::Sort`.
-                            unreachable!()
-                        }
-                    })
-                    .collect::<Vec<_>>();
-                let mut name = self.name().to_string();
-                // If the function is changed, we need to reverse order_by clause as well
-                // i.e. First(a order by b asc null first) -> Last(a order by b desc null last)
-                if self.fun().name() == reverse_udf.name() {
-                } else {
-                    replace_order_by_clause(&mut name);
-                }
-                replace_fn_name_clause(&mut name, self.fun.name(), reverse_udf.name());
-                let reverse_aggr = create_aggregate_expr_with_dfschema(
-                    &reverse_udf,
-                    &self.args,
-                    &self.logical_args,
-                    &reverse_sort_exprs,
-                    &reverse_ordering_req,
-                    &self.dfschema,
-                    Some(name),
-                    self.ignore_nulls,
-                    self.is_distinct,
-                    !self.is_reversed,
-                )
-                .unwrap();
-
-                Some(reverse_aggr)
-            }
-        }
-    }
-
-    fn get_minmax_desc(&self) -> Option<(Field, bool)> {
-        self.fun
-            .is_descending()
-            .and_then(|flag| self.field().ok().map(|f| (f, flag)))
-    }
-}
-
-impl PartialEq<dyn Any> for AggregateFunctionExpr {
-    fn eq(&self, other: &dyn Any) -> bool {
-        down_cast_any_ref(other)
-            .downcast_ref::<Self>()
-            .map(|x| {
-                self.name == x.name
-                    && self.data_type == x.data_type
-                    && self.fun == x.fun
-                    && self.args.len() == x.args.len()
-                    && self
-                        .args
-                        .iter()
-                        .zip(x.args.iter())
-                        .all(|(this_arg, other_arg)| this_arg.eq(other_arg))
-            })
-            .unwrap_or(false)
-    }
-}
-
-fn replace_order_by_clause(order_by: &mut String) {
-    let suffixes = [
-        (" DESC NULLS FIRST]", " ASC NULLS LAST]"),
-        (" ASC NULLS FIRST]", " DESC NULLS LAST]"),
-        (" DESC NULLS LAST]", " ASC NULLS FIRST]"),
-        (" ASC NULLS LAST]", " DESC NULLS FIRST]"),
-    ];
-
-    if let Some(start) = order_by.find("ORDER BY [") {
-        if let Some(end) = order_by[start..].find(']') {
-            let order_by_start = start + 9;
-            let order_by_end = start + end;
-
-            let column_order = &order_by[order_by_start..=order_by_end];
-            for (suffix, replacement) in suffixes {
-                if column_order.ends_with(suffix) {
-                    let new_order = column_order.replace(suffix, replacement);
-                    order_by.replace_range(order_by_start..=order_by_end, &new_order);
-                    break;
-                }
-            }
-        }
-    }
-}
-
-fn replace_fn_name_clause(aggr_name: &mut String, fn_name_old: &str, fn_name_new: &str) {
-    *aggr_name = aggr_name.replace(fn_name_old, fn_name_new);
-}
diff --git a/datafusion/physical-expr-common/src/binary_map.rs b/datafusion/physical-expr-common/src/binary_map.rs
index edf608a2054f3..d21bdb3434c45 100644
--- a/datafusion/physical-expr-common/src/binary_map.rs
+++ b/datafusion/physical-expr-common/src/binary_map.rs
@@ -60,7 +60,7 @@ impl<O: OffsetSizeTrait> ArrowBytesSet<O> {
 
     /// Return the contents of this set and replace it with a new empty
     /// set with the same output type
-    pub(super) fn take(&mut self) -> Self {
+    pub fn take(&mut self) -> Self {
         Self(self.0.take())
     }
 
diff --git a/datafusion/physical-expr-common/src/datum.rs b/datafusion/physical-expr-common/src/datum.rs
index d0ba5f113b6fa..96c08d0d3a5b5 100644
--- a/datafusion/physical-expr-common/src/datum.rs
+++ b/datafusion/physical-expr-common/src/datum.rs
@@ -22,7 +22,8 @@ use arrow::compute::SortOptions;
 use arrow::error::ArrowError;
 use datafusion_common::internal_err;
 use datafusion_common::{Result, ScalarValue};
-use datafusion_expr::{ColumnarValue, Operator};
+use datafusion_expr_common::columnar_value::ColumnarValue;
+use datafusion_expr_common::operator::Operator;
 use std::sync::Arc;
 
 /// Applies a binary [`Datum`] kernel `f` to `lhs` and `rhs`
diff --git a/datafusion/physical-expr-common/src/expressions/mod.rs b/datafusion/physical-expr-common/src/expressions/mod.rs
deleted file mode 100644
index dd534cc07d20e..0000000000000
--- a/datafusion/physical-expr-common/src/expressions/mod.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-mod cast;
-pub mod column;
-pub mod literal;
-
-pub use cast::{cast, cast_with_options, CastExpr};
-pub use literal::{lit, Literal};
diff --git a/datafusion/physical-expr-common/src/lib.rs b/datafusion/physical-expr-common/src/lib.rs
index f03eedd4cf65c..7e2ea0c49397f 100644
--- a/datafusion/physical-expr-common/src/lib.rs
+++ b/datafusion/physical-expr-common/src/lib.rs
@@ -15,11 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub mod aggregate;
+//! Physical Expr Common packages for [DataFusion]
+//! This package contains high level PhysicalExpr trait
+//!
+//! [DataFusion]: <https://crates.io/crates/datafusion>
+
 pub mod binary_map;
 pub mod binary_view_map;
 pub mod datum;
-pub mod expressions;
 pub mod physical_expr;
 pub mod sort_expr;
 pub mod tree_node;
diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs
index e62606a42e6fb..75d300dd0107f 100644
--- a/datafusion/physical-expr-common/src/physical_expr.rs
+++ b/datafusion/physical-expr-common/src/physical_expr.rs
@@ -20,18 +20,16 @@ use std::fmt::{Debug, Display};
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
-use crate::expressions::column::Column;
 use crate::utils::scatter;
 
 use arrow::array::BooleanArray;
 use arrow::compute::filter_record_batch;
-use arrow::datatypes::{DataType, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Schema};
 use arrow::record_batch::RecordBatch;
-use datafusion_common::tree_node::{Transformed, TreeNode};
-use datafusion_common::{internal_err, not_impl_err, plan_err, Result};
-use datafusion_expr::interval_arithmetic::Interval;
-use datafusion_expr::sort_properties::ExprProperties;
-use datafusion_expr::ColumnarValue;
+use datafusion_common::{internal_err, not_impl_err, Result};
+use datafusion_expr_common::columnar_value::ColumnarValue;
+use datafusion_expr_common::interval_arithmetic::Interval;
+use datafusion_expr_common::sort_properties::ExprProperties;
 
 /// See [create_physical_expr](https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html)
 /// for examples of creating `PhysicalExpr` from `Expr`
@@ -193,33 +191,6 @@ pub fn with_new_children_if_necessary(
     }
 }
 
-/// Rewrites an expression according to new schema; i.e. changes the columns it
-/// refers to with the column at corresponding index in the new schema. Returns
-/// an error if the given schema has fewer columns than the original schema.
-/// Note that the resulting expression may not be valid if data types in the
-/// new schema is incompatible with expression nodes.
-pub fn with_new_schema(
-    expr: Arc<dyn PhysicalExpr>,
-    schema: &SchemaRef,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(expr
-        .transform_up(|expr| {
-            if let Some(col) = expr.as_any().downcast_ref::<Column>() {
-                let idx = col.index();
-                let Some(field) = schema.fields().get(idx) else {
-                    return plan_err!(
-                        "New schema has fewer columns than original schema"
-                    );
-                };
-                let new_col = Column::new(field.name(), idx);
-                Ok(Transformed::yes(Arc::new(new_col) as _))
-            } else {
-                Ok(Transformed::no(expr))
-            }
-        })?
-        .data)
-}
-
 pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
     if any.is::<Arc<dyn PhysicalExpr>>() {
         any.downcast_ref::<Arc<dyn PhysicalExpr>>()
diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs
index 2b506b74216f4..9dc54d2eb2d0f 100644
--- a/datafusion/physical-expr-common/src/sort_expr.rs
+++ b/datafusion/physical-expr-common/src/sort_expr.rs
@@ -22,13 +22,12 @@ use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
 use crate::physical_expr::PhysicalExpr;
-use crate::utils::limited_convert_logical_expr_to_physical_expr_with_dfschema;
 
 use arrow::compute::kernels::sort::{SortColumn, SortOptions};
 use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{exec_err, DFSchema, Result};
-use datafusion_expr::{ColumnarValue, Expr};
+use datafusion_common::Result;
+use datafusion_expr_common::columnar_value::ColumnarValue;
 
 /// Represents Sort operation for a column in a RecordBatch
 #[derive(Clone, Debug)]
@@ -272,29 +271,3 @@ pub type LexRequirement = Vec<PhysicalSortRequirement>;
 ///`LexRequirementRef` is an alias for the type &`[PhysicalSortRequirement]`, which
 /// represents a reference to a lexicographical ordering requirement.
 pub type LexRequirementRef<'a> = &'a [PhysicalSortRequirement];
-
-/// Converts each [`Expr::Sort`] into a corresponding [`PhysicalSortExpr`].
-/// Returns an error if the given logical expression is not a [`Expr::Sort`].
-pub fn limited_convert_logical_sort_exprs_to_physical_with_dfschema(
-    exprs: &[Expr],
-    dfschema: &DFSchema,
-) -> Result<Vec<PhysicalSortExpr>> {
-    // Construct PhysicalSortExpr objects from Expr objects:
-    let mut sort_exprs = vec![];
-    for expr in exprs {
-        let Expr::Sort(sort) = expr else {
-            return exec_err!("Expects to receive sort expression");
-        };
-        sort_exprs.push(PhysicalSortExpr::new(
-            limited_convert_logical_expr_to_physical_expr_with_dfschema(
-                sort.expr.as_ref(),
-                dfschema,
-            )?,
-            SortOptions {
-                descending: !sort.asc,
-                nulls_first: sort.nulls_first,
-            },
-        ))
-    }
-    Ok(sort_exprs)
-}
diff --git a/datafusion/physical-expr-common/src/utils.rs b/datafusion/physical-expr-common/src/utils.rs
index 0978a906a5dc6..d2c9bf1a24085 100644
--- a/datafusion/physical-expr-common/src/utils.rs
+++ b/datafusion/physical-expr-common/src/utils.rs
@@ -20,14 +20,9 @@ use std::sync::Arc;
 use arrow::array::{make_array, Array, ArrayRef, BooleanArray, MutableArrayData};
 use arrow::compute::{and_kleene, is_not_null, SlicesIterator};
 
-use datafusion_common::{exec_err, DFSchema, Result};
-use datafusion_expr::expr::Alias;
-use datafusion_expr::sort_properties::ExprProperties;
-use datafusion_expr::Expr;
-
-use crate::expressions::column::Column;
-use crate::expressions::literal::Literal;
-use crate::expressions::CastExpr;
+use datafusion_common::Result;
+use datafusion_expr_common::sort_properties::ExprProperties;
+
 use crate::physical_expr::PhysicalExpr;
 use crate::sort_expr::PhysicalSortExpr;
 use crate::tree_node::ExprContext;
@@ -108,35 +103,6 @@ pub fn reverse_order_bys(order_bys: &[PhysicalSortExpr]) -> Vec<PhysicalSortExpr
         .collect()
 }
 
-/// Converts `datafusion_expr::Expr` into corresponding `Arc<dyn PhysicalExpr>`.
-/// If conversion is not supported yet, returns Error.
-pub fn limited_convert_logical_expr_to_physical_expr_with_dfschema(
-    expr: &Expr,
-    dfschema: &DFSchema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    match expr {
-        Expr::Alias(Alias { expr, .. }) => Ok(
-            limited_convert_logical_expr_to_physical_expr_with_dfschema(expr, dfschema)?,
-        ),
-        Expr::Column(col) => {
-            let idx = dfschema.index_of_column(col)?;
-            Ok(Arc::new(Column::new(&col.name, idx)))
-        }
-        Expr::Cast(cast_expr) => Ok(Arc::new(CastExpr::new(
-            limited_convert_logical_expr_to_physical_expr_with_dfschema(
-                cast_expr.expr.as_ref(),
-                dfschema,
-            )?,
-            cast_expr.data_type.clone(),
-            None,
-        ))),
-        Expr::Literal(value) => Ok(Arc::new(Literal::new(value.clone()))),
-        _ => exec_err!(
-            "Unsupported expression: {expr} for conversion to Arc<dyn PhysicalExpr>"
-        ),
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
diff --git a/datafusion/physical-expr-functions-aggregate/Cargo.toml b/datafusion/physical-expr-functions-aggregate/Cargo.toml
new file mode 100644
index 0000000000000..6eed89614c532
--- /dev/null
+++ b/datafusion/physical-expr-functions-aggregate/Cargo.toml
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-physical-expr-functions-aggregate"
+description = "Logical plan and expression representation for DataFusion query engine"
+keywords = ["datafusion", "logical", "plan", "expressions"]
+readme = "README.md"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+rust-version = { workspace = true }
+
+[lints]
+workspace = true
+
+[lib]
+name = "datafusion_physical_expr_functions_aggregate"
+path = "src/lib.rs"
+
+[features]
+
+[dependencies]
+ahash = { workspace = true }
+arrow = { workspace = true }
+datafusion-common = { workspace = true }
+datafusion-expr = { workspace = true }
+datafusion-expr-common = { workspace = true }
+datafusion-functions-aggregate-common = { workspace = true }
+datafusion-physical-expr-common = { workspace = true }
+rand = { workspace = true }
diff --git a/datafusion/physical-expr-functions-aggregate/src/aggregate.rs b/datafusion/physical-expr-functions-aggregate/src/aggregate.rs
new file mode 100644
index 0000000000000..8185f0fdd51f6
--- /dev/null
+++ b/datafusion/physical-expr-functions-aggregate/src/aggregate.rs
@@ -0,0 +1,486 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion_common::{internal_err, not_impl_err, Result};
+use datafusion_expr::expr::create_function_physical_name;
+use datafusion_expr::AggregateUDF;
+use datafusion_expr::ReversedUDAF;
+use datafusion_expr_common::accumulator::Accumulator;
+use datafusion_expr_common::groups_accumulator::GroupsAccumulator;
+use datafusion_expr_common::type_coercion::aggregates::check_arg_count;
+use datafusion_functions_aggregate_common::accumulator::AccumulatorArgs;
+use datafusion_functions_aggregate_common::accumulator::StateFieldsArgs;
+use datafusion_functions_aggregate_common::aggregate::AggregateExpr;
+use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity;
+use datafusion_functions_aggregate_common::utils::{self, down_cast_any_ref};
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
+use datafusion_physical_expr_common::utils::reverse_order_bys;
+
+use std::fmt::Debug;
+use std::{any::Any, sync::Arc};
+
+/// Builder for physical [`AggregateExpr`]
+///
+/// `AggregateExpr` contains the information necessary to call
+/// an aggregate expression.
+#[derive(Debug, Clone)]
+pub struct AggregateExprBuilder {
+    fun: Arc<AggregateUDF>,
+    /// Physical expressions of the aggregate function
+    args: Vec<Arc<dyn PhysicalExpr>>,
+    alias: Option<String>,
+    /// Arrow Schema for the aggregate function
+    schema: SchemaRef,
+    /// The physical order by expressions
+    ordering_req: LexOrdering,
+    /// Whether to ignore null values
+    ignore_nulls: bool,
+    /// Whether is distinct aggregate function
+    is_distinct: bool,
+    /// Whether the expression is reversed
+    is_reversed: bool,
+}
+
+impl AggregateExprBuilder {
+    pub fn new(fun: Arc<AggregateUDF>, args: Vec<Arc<dyn PhysicalExpr>>) -> Self {
+        Self {
+            fun,
+            args,
+            alias: None,
+            schema: Arc::new(Schema::empty()),
+            ordering_req: vec![],
+            ignore_nulls: false,
+            is_distinct: false,
+            is_reversed: false,
+        }
+    }
+
+    pub fn build(self) -> Result<Arc<dyn AggregateExpr>> {
+        let Self {
+            fun,
+            args,
+            alias,
+            schema,
+            ordering_req,
+            ignore_nulls,
+            is_distinct,
+            is_reversed,
+        } = self;
+        if args.is_empty() {
+            return internal_err!("args should not be empty");
+        }
+
+        let mut ordering_fields = vec![];
+
+        if !ordering_req.is_empty() {
+            let ordering_types = ordering_req
+                .iter()
+                .map(|e| e.expr.data_type(&schema))
+                .collect::<Result<Vec<_>>>()?;
+
+            ordering_fields = utils::ordering_fields(&ordering_req, &ordering_types);
+        }
+
+        let input_exprs_types = args
+            .iter()
+            .map(|arg| arg.data_type(&schema))
+            .collect::<Result<Vec<_>>>()?;
+
+        check_arg_count(
+            fun.name(),
+            &input_exprs_types,
+            &fun.signature().type_signature,
+        )?;
+
+        let data_type = fun.return_type(&input_exprs_types)?;
+        let name = match alias {
+            // TODO: Ideally, we should build the name from physical expressions
+            None => create_function_physical_name(fun.name(), is_distinct, &[], None)?,
+            Some(alias) => alias,
+        };
+
+        Ok(Arc::new(AggregateFunctionExpr {
+            fun: Arc::unwrap_or_clone(fun),
+            args,
+            data_type,
+            name,
+            schema: Arc::unwrap_or_clone(schema),
+            ordering_req,
+            ignore_nulls,
+            ordering_fields,
+            is_distinct,
+            input_types: input_exprs_types,
+            is_reversed,
+        }))
+    }
+
+    pub fn alias(mut self, alias: impl Into<String>) -> Self {
+        self.alias = Some(alias.into());
+        self
+    }
+
+    pub fn schema(mut self, schema: SchemaRef) -> Self {
+        self.schema = schema;
+        self
+    }
+
+    pub fn order_by(mut self, order_by: LexOrdering) -> Self {
+        self.ordering_req = order_by;
+        self
+    }
+
+    pub fn reversed(mut self) -> Self {
+        self.is_reversed = true;
+        self
+    }
+
+    pub fn with_reversed(mut self, is_reversed: bool) -> Self {
+        self.is_reversed = is_reversed;
+        self
+    }
+
+    pub fn distinct(mut self) -> Self {
+        self.is_distinct = true;
+        self
+    }
+
+    pub fn with_distinct(mut self, is_distinct: bool) -> Self {
+        self.is_distinct = is_distinct;
+        self
+    }
+
+    pub fn ignore_nulls(mut self) -> Self {
+        self.ignore_nulls = true;
+        self
+    }
+
+    pub fn with_ignore_nulls(mut self, ignore_nulls: bool) -> Self {
+        self.ignore_nulls = ignore_nulls;
+        self
+    }
+}
+
+/// Physical aggregate expression of a UDAF.
+#[derive(Debug, Clone)]
+pub struct AggregateFunctionExpr {
+    fun: AggregateUDF,
+    args: Vec<Arc<dyn PhysicalExpr>>,
+    /// Output / return type of this aggregate
+    data_type: DataType,
+    name: String,
+    schema: Schema,
+    // The physical order by expressions
+    ordering_req: LexOrdering,
+    // Whether to ignore null values
+    ignore_nulls: bool,
+    // fields used for order sensitive aggregation functions
+    ordering_fields: Vec<Field>,
+    is_distinct: bool,
+    is_reversed: bool,
+    input_types: Vec<DataType>,
+}
+
+impl AggregateFunctionExpr {
+    /// Return the `AggregateUDF` used by this `AggregateFunctionExpr`
+    pub fn fun(&self) -> &AggregateUDF {
+        &self.fun
+    }
+
+    /// Return if the aggregation is distinct
+    pub fn is_distinct(&self) -> bool {
+        self.is_distinct
+    }
+
+    /// Return if the aggregation ignores nulls
+    pub fn ignore_nulls(&self) -> bool {
+        self.ignore_nulls
+    }
+
+    /// Return if the aggregation is reversed
+    pub fn is_reversed(&self) -> bool {
+        self.is_reversed
+    }
+}
+
+impl AggregateExpr for AggregateFunctionExpr {
+    /// Return a reference to Any that can be used for downcasting
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        self.args.clone()
+    }
+
+    fn state_fields(&self) -> Result<Vec<Field>> {
+        let args = StateFieldsArgs {
+            name: &self.name,
+            input_types: &self.input_types,
+            return_type: &self.data_type,
+            ordering_fields: &self.ordering_fields,
+            is_distinct: self.is_distinct,
+        };
+
+        self.fun.state_fields(args)
+    }
+
+    fn field(&self) -> Result<Field> {
+        Ok(Field::new(&self.name, self.data_type.clone(), true))
+    }
+
+    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
+        let acc_args = AccumulatorArgs {
+            return_type: &self.data_type,
+            schema: &self.schema,
+            ignore_nulls: self.ignore_nulls,
+            ordering_req: &self.ordering_req,
+            is_distinct: self.is_distinct,
+            name: &self.name,
+            is_reversed: self.is_reversed,
+            exprs: &self.args,
+        };
+
+        self.fun.accumulator(acc_args)
+    }
+
+    fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
+        let args = AccumulatorArgs {
+            return_type: &self.data_type,
+            schema: &self.schema,
+            ignore_nulls: self.ignore_nulls,
+            ordering_req: &self.ordering_req,
+            is_distinct: self.is_distinct,
+            name: &self.name,
+            is_reversed: self.is_reversed,
+            exprs: &self.args,
+        };
+
+        let accumulator = self.fun.create_sliding_accumulator(args)?;
+
+        // Accumulators that have window frame startings different
+        // than `UNBOUNDED PRECEDING`, such as `1 PRECEDING`, need to
+        // implement retract_batch method in order to run correctly
+        // currently in DataFusion.
+        //
+        // If this `retract_batches` is not present, there is no way
+        // to calculate result correctly. For example, the query
+        //
+        // ```sql
+        // SELECT
+        //  SUM(a) OVER(ORDER BY a ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS sum_a
+        // FROM
+        //  t
+        // ```
+        //
+        // 1. First sum value will be the sum of rows between `[0, 1)`,
+        //
+        // 2. Second sum value will be the sum of rows between `[0, 2)`
+        //
+        // 3. Third sum value will be the sum of rows between `[1, 3)`, etc.
+        //
+        // Since the accumulator keeps the running sum:
+        //
+        // 1. First sum we add to the state sum value between `[0, 1)`
+        //
+        // 2. Second sum we add to the state sum value between `[1, 2)`
+        // (`[0, 1)` is already in the state sum, hence running sum will
+        // cover `[0, 2)` range)
+        //
+        // 3. Third sum we add to the state sum value between `[2, 3)`
+        // (`[0, 2)` is already in the state sum).  Also we need to
+        // retract values between `[0, 1)` by this way we can obtain sum
+        // between [1, 3) which is indeed the appropriate range.
+        //
+        // When we use `UNBOUNDED PRECEDING` in the query starting
+        // index will always be 0 for the desired range, and hence the
+        // `retract_batch` method will not be called. In this case
+        // having retract_batch is not a requirement.
+        //
+        // This approach is a a bit different than window function
+        // approach. In window function (when they use a window frame)
+        // they get all the desired range during evaluation.
+        if !accumulator.supports_retract_batch() {
+            return not_impl_err!(
+                "Aggregate can not be used as a sliding accumulator because \
+                     `retract_batch` is not implemented: {}",
+                self.name
+            );
+        }
+        Ok(accumulator)
+    }
+
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn groups_accumulator_supported(&self) -> bool {
+        let args = AccumulatorArgs {
+            return_type: &self.data_type,
+            schema: &self.schema,
+            ignore_nulls: self.ignore_nulls,
+            ordering_req: &self.ordering_req,
+            is_distinct: self.is_distinct,
+            name: &self.name,
+            is_reversed: self.is_reversed,
+            exprs: &self.args,
+        };
+        self.fun.groups_accumulator_supported(args)
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        let args = AccumulatorArgs {
+            return_type: &self.data_type,
+            schema: &self.schema,
+            ignore_nulls: self.ignore_nulls,
+            ordering_req: &self.ordering_req,
+            is_distinct: self.is_distinct,
+            name: &self.name,
+            is_reversed: self.is_reversed,
+            exprs: &self.args,
+        };
+        self.fun.create_groups_accumulator(args)
+    }
+
+    fn order_bys(&self) -> Option<&[PhysicalSortExpr]> {
+        if self.ordering_req.is_empty() {
+            return None;
+        }
+
+        if !self.order_sensitivity().is_insensitive() {
+            return Some(&self.ordering_req);
+        }
+
+        None
+    }
+
+    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
+        if !self.ordering_req.is_empty() {
+            // If there is requirement, use the sensitivity of the implementation
+            self.fun.order_sensitivity()
+        } else {
+            // If no requirement, aggregator is order insensitive
+            AggregateOrderSensitivity::Insensitive
+        }
+    }
+
+    fn with_beneficial_ordering(
+        self: Arc<Self>,
+        beneficial_ordering: bool,
+    ) -> Result<Option<Arc<dyn AggregateExpr>>> {
+        let Some(updated_fn) = self
+            .fun
+            .clone()
+            .with_beneficial_ordering(beneficial_ordering)?
+        else {
+            return Ok(None);
+        };
+
+        AggregateExprBuilder::new(Arc::new(updated_fn), self.args.to_vec())
+            .order_by(self.ordering_req.to_vec())
+            .schema(Arc::new(self.schema.clone()))
+            .alias(self.name().to_string())
+            .with_ignore_nulls(self.ignore_nulls)
+            .with_distinct(self.is_distinct)
+            .with_reversed(self.is_reversed)
+            .build()
+            .map(Some)
+    }
+
+    fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
+        match self.fun.reverse_udf() {
+            ReversedUDAF::NotSupported => None,
+            ReversedUDAF::Identical => Some(Arc::new(self.clone())),
+            ReversedUDAF::Reversed(reverse_udf) => {
+                let reverse_ordering_req = reverse_order_bys(&self.ordering_req);
+                let mut name = self.name().to_string();
+                // If the function is changed, we need to reverse order_by clause as well
+                // i.e. First(a order by b asc null first) -> Last(a order by b desc null last)
+                if self.fun().name() == reverse_udf.name() {
+                } else {
+                    replace_order_by_clause(&mut name);
+                }
+                replace_fn_name_clause(&mut name, self.fun.name(), reverse_udf.name());
+
+                AggregateExprBuilder::new(reverse_udf, self.args.to_vec())
+                    .order_by(reverse_ordering_req.to_vec())
+                    .schema(Arc::new(self.schema.clone()))
+                    .alias(name)
+                    .with_ignore_nulls(self.ignore_nulls)
+                    .with_distinct(self.is_distinct)
+                    .with_reversed(!self.is_reversed)
+                    .build()
+                    .ok()
+            }
+        }
+    }
+
+    fn get_minmax_desc(&self) -> Option<(Field, bool)> {
+        self.fun
+            .is_descending()
+            .and_then(|flag| self.field().ok().map(|f| (f, flag)))
+    }
+}
+
+impl PartialEq<dyn Any> for AggregateFunctionExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.name == x.name
+                    && self.data_type == x.data_type
+                    && self.fun == x.fun
+                    && self.args.len() == x.args.len()
+                    && self
+                        .args
+                        .iter()
+                        .zip(x.args.iter())
+                        .all(|(this_arg, other_arg)| this_arg.eq(other_arg))
+            })
+            .unwrap_or(false)
+    }
+}
+
+fn replace_order_by_clause(order_by: &mut String) {
+    let suffixes = [
+        (" DESC NULLS FIRST]", " ASC NULLS LAST]"),
+        (" ASC NULLS FIRST]", " DESC NULLS LAST]"),
+        (" DESC NULLS LAST]", " ASC NULLS FIRST]"),
+        (" ASC NULLS LAST]", " DESC NULLS FIRST]"),
+    ];
+
+    if let Some(start) = order_by.find("ORDER BY [") {
+        if let Some(end) = order_by[start..].find(']') {
+            let order_by_start = start + 9;
+            let order_by_end = start + end;
+
+            let column_order = &order_by[order_by_start..=order_by_end];
+            for (suffix, replacement) in suffixes {
+                if column_order.ends_with(suffix) {
+                    let new_order = column_order.replace(suffix, replacement);
+                    order_by.replace_range(order_by_start..=order_by_end, &new_order);
+                    break;
+                }
+            }
+        }
+    }
+}
+
+fn replace_fn_name_clause(aggr_name: &mut String, fn_name_old: &str, fn_name_new: &str) {
+    *aggr_name = aggr_name.replace(fn_name_old, fn_name_new);
+}
diff --git a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/mod.rs b/datafusion/physical-expr-functions-aggregate/src/lib.rs
similarity index 87%
rename from datafusion/physical-expr-common/src/aggregate/groups_accumulator/mod.rs
rename to datafusion/physical-expr-functions-aggregate/src/lib.rs
index 5b0182c5db8a7..2ff7ff5777ec2 100644
--- a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/mod.rs
+++ b/datafusion/physical-expr-functions-aggregate/src/lib.rs
@@ -15,8 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Utilities for implementing GroupsAccumulator
+//! Technically, all aggregate functions that depend on `expr` crate should be included here.
 
-pub mod accumulate;
-pub mod bool_op;
-pub mod prim_op;
+pub mod aggregate;
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 8436b5279bd76..c53f7a6c47715 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -56,6 +56,8 @@ chrono = { workspace = true }
 datafusion-common = { workspace = true, default-features = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
+datafusion-expr-common = { workspace = true }
+datafusion-functions-aggregate-common = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 half = { workspace = true }
 hashbrown = { workspace = true }
diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
index 862edd9c1fac3..8a34f34a82db3 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -22,9 +22,7 @@ use arrow_schema::DataType;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use datafusion_common::ScalarValue;
 use datafusion_expr::Operator;
-use datafusion_physical_expr::expressions::{BinaryExpr, CaseExpr};
-use datafusion_physical_expr_common::expressions::column::Column;
-use datafusion_physical_expr_common::expressions::Literal;
+use datafusion_physical_expr::expressions::{BinaryExpr, CaseExpr, Column, Literal};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use std::sync::Arc;
 
diff --git a/datafusion/physical-expr/benches/is_null.rs b/datafusion/physical-expr/benches/is_null.rs
index 3dad8e9b456a0..7d26557afb1b3 100644
--- a/datafusion/physical-expr/benches/is_null.rs
+++ b/datafusion/physical-expr/benches/is_null.rs
@@ -20,8 +20,7 @@ use arrow::record_batch::RecordBatch;
 use arrow_array::builder::Int32Builder;
 use arrow_schema::DataType;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use datafusion_physical_expr::expressions::{IsNotNullExpr, IsNullExpr};
-use datafusion_physical_expr_common::expressions::column::Column;
+use datafusion_physical_expr::expressions::{Column, IsNotNullExpr, IsNullExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use std::sync::Arc;
 
diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs
index ffa58e3853225..0296b7a247d6e 100644
--- a/datafusion/physical-expr/src/equivalence/class.rs
+++ b/datafusion/physical-expr/src/equivalence/class.rs
@@ -47,7 +47,7 @@ use datafusion_common::JoinType;
 ///
 /// ```rust
 /// # use datafusion_physical_expr::ConstExpr;
-/// # use datafusion_physical_expr_common::expressions::lit;
+/// # use datafusion_physical_expr::expressions::lit;
 /// let col = lit(5);
 /// // Create a constant expression from a physical expression ref
 /// let const_expr = ConstExpr::from(&col);
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs
index a6e9fba281676..a5d54ee56cffe 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -24,7 +24,7 @@ use crate::equivalence::{
     collapse_lex_req, EquivalenceClass, EquivalenceGroup, OrderingEquivalenceClass,
     ProjectionMapping,
 };
-use crate::expressions::Literal;
+use crate::expressions::{with_new_schema, CastExpr, Column, Literal};
 use crate::{
     physical_exprs_contains, ConstExpr, LexOrdering, LexOrderingRef, LexRequirement,
     LexRequirementRef, PhysicalExpr, PhysicalExprRef, PhysicalSortExpr,
@@ -36,9 +36,6 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{plan_err, JoinSide, JoinType, Result};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
-use datafusion_physical_expr_common::expressions::column::Column;
-use datafusion_physical_expr_common::expressions::CastExpr;
-use datafusion_physical_expr_common::physical_expr::with_new_schema;
 use datafusion_physical_expr_common::utils::ExprPropertiesNode;
 
 use indexmap::{IndexMap, IndexSet};
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index c34dcdfb75988..347a5d82dbecd 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -681,11 +681,9 @@ pub fn binary(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::expressions::{col, lit, try_cast, Literal};
-
+    use crate::expressions::{col, lit, try_cast, Column, Literal};
     use datafusion_common::plan_datafusion_err;
     use datafusion_expr::type_coercion::binary::get_input_types;
-    use datafusion_physical_expr_common::expressions::column::Column;
 
     /// Performs a binary operation, applying any type coercion necessary
     fn binary_op(
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index b428d562bd1b7..583a4ef32542d 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -32,8 +32,7 @@ use datafusion_common::cast::as_boolean_array;
 use datafusion_common::{exec_err, internal_err, DataFusionError, Result, ScalarValue};
 use datafusion_expr::ColumnarValue;
 
-use datafusion_physical_expr_common::expressions::column::Column;
-use datafusion_physical_expr_common::expressions::Literal;
+use super::{Column, Literal};
 use itertools::Itertools;
 
 type WhenThen = (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>);
@@ -548,8 +547,8 @@ pub fn case(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::expressions::{binary, cast, col, lit, BinaryExpr};
 
+    use crate::expressions::{binary, cast, col, lit, BinaryExpr};
     use arrow::buffer::Buffer;
     use arrow::datatypes::DataType::Float64;
     use arrow::datatypes::*;
@@ -558,7 +557,6 @@ mod tests {
     use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
     use datafusion_expr::type_coercion::binary::comparison_coercion;
     use datafusion_expr::Operator;
-    use datafusion_physical_expr_common::expressions::Literal;
 
     #[test]
     fn case_with_expr() -> Result<()> {
diff --git a/datafusion/physical-expr-common/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs
similarity index 98%
rename from datafusion/physical-expr-common/src/expressions/cast.rs
rename to datafusion/physical-expr/src/expressions/cast.rs
index dd6131ad65c3b..5621473c4fdb1 100644
--- a/datafusion/physical-expr-common/src/expressions/cast.rs
+++ b/datafusion/physical-expr/src/expressions/cast.rs
@@ -27,9 +27,9 @@ use arrow::datatypes::{DataType, DataType::*, Schema};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
 use datafusion_common::{not_impl_err, Result};
-use datafusion_expr::interval_arithmetic::Interval;
-use datafusion_expr::sort_properties::ExprProperties;
-use datafusion_expr::ColumnarValue;
+use datafusion_expr_common::columnar_value::ColumnarValue;
+use datafusion_expr_common::interval_arithmetic::Interval;
+use datafusion_expr_common::sort_properties::ExprProperties;
 
 const DEFAULT_CAST_OPTIONS: CastOptions<'static> = CastOptions {
     safe: false,
@@ -136,7 +136,7 @@ impl PhysicalExpr for CastExpr {
         children: Vec<Arc<dyn PhysicalExpr>>,
     ) -> Result<Arc<dyn PhysicalExpr>> {
         Ok(Arc::new(CastExpr::new(
-            children[0].clone(),
+            Arc::clone(&children[0]),
             self.cast_type.clone(),
             Some(self.cast_options.clone()),
         )))
@@ -211,7 +211,7 @@ pub fn cast_with_options(
 ) -> Result<Arc<dyn PhysicalExpr>> {
     let expr_type = expr.data_type(input_schema)?;
     if expr_type == cast_type {
-        Ok(expr.clone())
+        Ok(Arc::clone(&expr))
     } else if can_cast_types(&expr_type, &cast_type) {
         Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options)))
     } else {
diff --git a/datafusion/physical-expr-common/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs
similarity index 82%
rename from datafusion/physical-expr-common/src/expressions/column.rs
rename to datafusion/physical-expr/src/expressions/column.rs
index 5397599ea2dcc..79d15fdb02e81 100644
--- a/datafusion/physical-expr-common/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -25,7 +25,9 @@ use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
-use datafusion_common::{internal_err, Result};
+use arrow_schema::SchemaRef;
+use datafusion_common::tree_node::{Transformed, TreeNode};
+use datafusion_common::{internal_err, plan_err, Result};
 use datafusion_expr::ColumnarValue;
 
 use crate::physical_expr::{down_cast_any_ref, PhysicalExpr};
@@ -89,7 +91,7 @@ impl PhysicalExpr for Column {
     /// Evaluate the expression
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
         self.bounds_check(batch.schema().as_ref())?;
-        Ok(ColumnarValue::Array(batch.column(self.index).clone()))
+        Ok(ColumnarValue::Array(Arc::clone(batch.column(self.index))))
     }
 
     fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
@@ -136,6 +138,33 @@ pub fn col(name: &str, schema: &Schema) -> Result<Arc<dyn PhysicalExpr>> {
     Ok(Arc::new(Column::new_with_schema(name, schema)?))
 }
 
+/// Rewrites an expression according to new schema; i.e. changes the columns it
+/// refers to with the column at corresponding index in the new schema. Returns
+/// an error if the given schema has fewer columns than the original schema.
+/// Note that the resulting expression may not be valid if data types in the
+/// new schema is incompatible with expression nodes.
+pub fn with_new_schema(
+    expr: Arc<dyn PhysicalExpr>,
+    schema: &SchemaRef,
+) -> Result<Arc<dyn PhysicalExpr>> {
+    Ok(expr
+        .transform_up(|expr| {
+            if let Some(col) = expr.as_any().downcast_ref::<Column>() {
+                let idx = col.index();
+                let Some(field) = schema.fields().get(idx) else {
+                    return plan_err!(
+                        "New schema has fewer columns than original schema"
+                    );
+                };
+                let new_col = Column::new(field.name(), idx);
+                Ok(Transformed::yes(Arc::new(new_col) as _))
+            } else {
+                Ok(Transformed::no(expr))
+            }
+        })?
+        .data)
+}
+
 #[cfg(test)]
 mod test {
     use super::Column;
diff --git a/datafusion/physical-expr-common/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs
similarity index 95%
rename from datafusion/physical-expr-common/src/expressions/literal.rs
rename to datafusion/physical-expr/src/expressions/literal.rs
index b3cff1ef69baf..ed24e9028153e 100644
--- a/datafusion/physical-expr-common/src/expressions/literal.rs
+++ b/datafusion/physical-expr/src/expressions/literal.rs
@@ -28,9 +28,10 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use datafusion_common::{Result, ScalarValue};
-use datafusion_expr::interval_arithmetic::Interval;
-use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
-use datafusion_expr::{ColumnarValue, Expr};
+use datafusion_expr::Expr;
+use datafusion_expr_common::columnar_value::ColumnarValue;
+use datafusion_expr_common::interval_arithmetic::Interval;
+use datafusion_expr_common::sort_properties::{ExprProperties, SortProperties};
 
 /// Represents a literal value
 #[derive(Debug, PartialEq, Eq, Hash)]
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index cbb697b5f3041..9e65889d87583 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -20,10 +20,13 @@
 #[macro_use]
 mod binary;
 mod case;
+mod cast;
+mod column;
 mod in_list;
 mod is_not_null;
 mod is_null;
 mod like;
+mod literal;
 mod negative;
 mod no_op;
 mod not;
@@ -42,14 +45,14 @@ pub use crate::PhysicalSortExpr;
 
 pub use binary::{binary, BinaryExpr};
 pub use case::{case, CaseExpr};
+pub use cast::{cast, CastExpr};
+pub use column::{col, with_new_schema, Column};
 pub use datafusion_expr::utils::format_state_name;
-pub use datafusion_physical_expr_common::expressions::column::{col, Column};
-pub use datafusion_physical_expr_common::expressions::literal::{lit, Literal};
-pub use datafusion_physical_expr_common::expressions::{cast, CastExpr};
 pub use in_list::{in_list, InListExpr};
 pub use is_not_null::{is_not_null, IsNotNullExpr};
 pub use is_null::{is_null, IsNullExpr};
 pub use like::{like, LikeExpr};
+pub use literal::{lit, Literal};
 pub use negative::{negative, NegativeExpr};
 pub use no_op::NoOp;
 pub use not::{not, NotExpr};
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 2e78119eba468..c4255172d680c 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -14,10 +14,32 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+
 // Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
 
-pub mod aggregate;
+// Backward compatibility
+pub mod aggregate {
+    pub(crate) mod groups_accumulator {
+        #[allow(unused_imports)]
+        pub(crate) mod accumulate {
+            pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
+        }
+        pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::{
+            accumulate::NullState, GroupsAccumulatorAdapter,
+        };
+    }
+    pub(crate) mod stats {
+        pub use datafusion_functions_aggregate_common::stats::StatsType;
+    }
+    pub mod utils {
+        pub use datafusion_functions_aggregate_common::utils::{
+            adjust_output_array, down_cast_any_ref, get_accum_scalar_values_as_arrays,
+            get_sort_options, ordering_fields, DecimalAverager, Hashable,
+        };
+    }
+    pub use datafusion_functions_aggregate_common::aggregate::AggregateExpr;
+}
 pub mod analysis;
 pub mod binary_map {
     pub use datafusion_physical_expr_common::binary_map::{ArrowBytesSet, OutputType};
@@ -45,7 +67,7 @@ pub mod execution_props {
 
 pub use aggregate::groups_accumulator::{GroupsAccumulatorAdapter, NullState};
 pub use analysis::{analyze, AnalysisContext, ExprBoundaries};
-pub use datafusion_physical_expr_common::aggregate::{
+pub use datafusion_functions_aggregate_common::aggregate::{
     AggregateExpr, AggregatePhysicalExpressions,
 };
 pub use equivalence::{calculate_union, ConstExpr, EquivalenceProperties};
diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs
index c60a772b9ce26..c718e6b054ef3 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+pub(crate) use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use itertools::izip;
 
 pub use datafusion_physical_expr_common::physical_expr::down_cast_any_ref;
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index d3f66bdea93d5..78da4dc9c53f1 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -52,8 +52,10 @@ datafusion-common-runtime = { workspace = true, default-features = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-functions-aggregate = { workspace = true }
+datafusion-functions-aggregate-common = { workspace = true }
 datafusion-physical-expr = { workspace = true, default-features = true }
 datafusion-physical-expr-common = { workspace = true }
+datafusion-physical-expr-functions-aggregate = { workspace = true }
 futures = { workspace = true }
 half = { workspace = true }
 hashbrown = { workspace = true }
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index d72da9b300499..4d39eff42b5f4 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -1203,26 +1203,23 @@ mod tests {
     use arrow::datatypes::DataType;
     use arrow_array::{Float32Array, Int32Array};
     use datafusion_common::{
-        assert_batches_eq, assert_batches_sorted_eq, internal_err, DFSchema, DFSchemaRef,
-        DataFusionError, ScalarValue,
+        assert_batches_eq, assert_batches_sorted_eq, internal_err, DataFusionError,
+        ScalarValue,
     };
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::memory_pool::FairSpillPool;
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
-    use datafusion_expr::expr::Sort;
     use datafusion_functions_aggregate::array_agg::array_agg_udaf;
     use datafusion_functions_aggregate::average::avg_udaf;
     use datafusion_functions_aggregate::count::count_udaf;
-    use datafusion_functions_aggregate::first_last::{FirstValue, LastValue};
+    use datafusion_functions_aggregate::first_last::{first_value_udaf, last_value_udaf};
     use datafusion_functions_aggregate::median::median_udaf;
     use datafusion_physical_expr::expressions::lit;
     use datafusion_physical_expr::PhysicalSortExpr;
 
     use crate::common::collect;
-    use datafusion_physical_expr_common::aggregate::{
-        create_aggregate_expr_with_dfschema, AggregateExprBuilder,
-    };
-    use datafusion_physical_expr_common::expressions::Literal;
+    use datafusion_physical_expr::expressions::Literal;
+    use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
     use futures::{FutureExt, Stream};
 
     // Generate a schema which consists of 5 columns (a, b, c, d, e)
@@ -1270,22 +1267,19 @@ mod tests {
     }
 
     /// Generates some mock data for aggregate tests.
-    fn some_data_v2() -> (Arc<Schema>, DFSchemaRef, Vec<RecordBatch>) {
+    fn some_data_v2() -> (Arc<Schema>, Vec<RecordBatch>) {
         // Define a schema:
         let schema = Arc::new(Schema::new(vec![
             Field::new("a", DataType::UInt32, false),
             Field::new("b", DataType::Float64, false),
         ]));
 
-        let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
-
         // Generate data so that first and last value results are at 2nd and
         // 3rd partitions.  With this construction, we guarantee we don't receive
         // the expected result by accident, but merging actually works properly;
         // i.e. it doesn't depend on the data insertion order.
         (
             Arc::clone(&schema),
-            Arc::new(df_schema),
             vec![
                 RecordBatch::try_new(
                     Arc::clone(&schema),
@@ -1363,7 +1357,6 @@ mod tests {
         let aggregates = vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1i8)])
             .schema(Arc::clone(&input_schema))
             .alias("COUNT(1)")
-            .logical_exprs(vec![datafusion_expr::lit(1i8)])
             .build()?];
 
         let task_ctx = if spill {
@@ -1980,65 +1973,36 @@ mod tests {
     // FIRST_VALUE(b ORDER BY b <SortOptions>)
     fn test_first_value_agg_expr(
         schema: &Schema,
-        dfschema: &DFSchema,
         sort_options: SortOptions,
     ) -> Result<Arc<dyn AggregateExpr>> {
-        let sort_exprs = vec![datafusion_expr::Expr::Sort(Sort {
-            expr: Box::new(datafusion_expr::col("b")),
-            asc: !sort_options.descending,
-            nulls_first: sort_options.nulls_first,
-        })];
-        let ordering_req = vec![PhysicalSortExpr {
+        let ordering_req = [PhysicalSortExpr {
             expr: col("b", schema)?,
             options: sort_options,
         }];
-        let args = vec![col("b", schema)?];
-        let logical_args = vec![datafusion_expr::col("b")];
-        let func = datafusion_expr::AggregateUDF::new_from_impl(FirstValue::new());
-        datafusion_physical_expr_common::aggregate::create_aggregate_expr_with_dfschema(
-            &func,
-            &args,
-            &logical_args,
-            &sort_exprs,
-            &ordering_req,
-            dfschema,
-            None,
-            false,
-            false,
-            false,
-        )
+        let args = [col("b", schema)?];
+
+        AggregateExprBuilder::new(first_value_udaf(), args.to_vec())
+            .order_by(ordering_req.to_vec())
+            .schema(Arc::new(schema.clone()))
+            .alias(String::from("first_value(b) ORDER BY [b ASC NULLS LAST]"))
+            .build()
     }
 
     // LAST_VALUE(b ORDER BY b <SortOptions>)
     fn test_last_value_agg_expr(
         schema: &Schema,
-        dfschema: &DFSchema,
         sort_options: SortOptions,
     ) -> Result<Arc<dyn AggregateExpr>> {
-        let sort_exprs = vec![datafusion_expr::Expr::Sort(Sort {
-            expr: Box::new(datafusion_expr::col("b")),
-            asc: !sort_options.descending,
-            nulls_first: sort_options.nulls_first,
-        })];
-        let ordering_req = vec![PhysicalSortExpr {
+        let ordering_req = [PhysicalSortExpr {
             expr: col("b", schema)?,
             options: sort_options,
         }];
-        let args = vec![col("b", schema)?];
-        let logical_args = vec![datafusion_expr::col("b")];
-        let func = datafusion_expr::AggregateUDF::new_from_impl(LastValue::new());
-        create_aggregate_expr_with_dfschema(
-            &func,
-            &args,
-            &logical_args,
-            &sort_exprs,
-            &ordering_req,
-            dfschema,
-            None,
-            false,
-            false,
-            false,
-        )
+        let args = [col("b", schema)?];
+        AggregateExprBuilder::new(last_value_udaf(), args.to_vec())
+            .order_by(ordering_req.to_vec())
+            .schema(Arc::new(schema.clone()))
+            .alias(String::from("last_value(b) ORDER BY [b ASC NULLS LAST]"))
+            .build()
     }
 
     // This function either constructs the physical plan below,
@@ -2070,7 +2034,7 @@ mod tests {
             Arc::new(TaskContext::default())
         };
 
-        let (schema, df_schema, data) = some_data_v2();
+        let (schema, data) = some_data_v2();
         let partition1 = data[0].clone();
         let partition2 = data[1].clone();
         let partition3 = data[2].clone();
@@ -2084,13 +2048,9 @@ mod tests {
             nulls_first: false,
         };
         let aggregates: Vec<Arc<dyn AggregateExpr>> = if is_first_acc {
-            vec![test_first_value_agg_expr(
-                &schema,
-                &df_schema,
-                sort_options,
-            )?]
+            vec![test_first_value_agg_expr(&schema, sort_options)?]
         } else {
-            vec![test_last_value_agg_expr(&schema, &df_schema, sort_options)?]
+            vec![test_last_value_agg_expr(&schema, sort_options)?]
         };
 
         let memory_exec = Arc::new(MemoryExec::try_new(
@@ -2157,7 +2117,6 @@ mod tests {
     #[tokio::test]
     async fn test_get_finest_requirements() -> Result<()> {
         let test_schema = create_test_schema()?;
-        let test_df_schema = DFSchema::try_from(Arc::clone(&test_schema)).unwrap();
 
         // Assume column a and b are aliases
         // Assume also that a ASC and c DESC describe the same global ordering for the table. (Since they are ordering equivalent).
@@ -2204,46 +2163,7 @@ mod tests {
                 },
             ]),
         ];
-        let col_expr_a = Box::new(datafusion_expr::col("a"));
-        let col_expr_b = Box::new(datafusion_expr::col("b"));
-        let col_expr_c = Box::new(datafusion_expr::col("c"));
-        let sort_exprs = vec![
-            None,
-            Some(vec![datafusion_expr::Expr::Sort(Sort::new(
-                col_expr_a.clone(),
-                options1.descending,
-                options1.nulls_first,
-            ))]),
-            Some(vec![
-                datafusion_expr::Expr::Sort(Sort::new(
-                    col_expr_a.clone(),
-                    options1.descending,
-                    options1.nulls_first,
-                )),
-                datafusion_expr::Expr::Sort(Sort::new(
-                    col_expr_b.clone(),
-                    options1.descending,
-                    options1.nulls_first,
-                )),
-                datafusion_expr::Expr::Sort(Sort::new(
-                    col_expr_c,
-                    options1.descending,
-                    options1.nulls_first,
-                )),
-            ]),
-            Some(vec![
-                datafusion_expr::Expr::Sort(Sort::new(
-                    col_expr_a,
-                    options1.descending,
-                    options1.nulls_first,
-                )),
-                datafusion_expr::Expr::Sort(Sort::new(
-                    col_expr_b,
-                    options1.descending,
-                    options1.nulls_first,
-                )),
-            ]),
-        ];
+
         let common_requirement = vec![
             PhysicalSortExpr {
                 expr: Arc::clone(col_a),
@@ -2256,23 +2176,13 @@ mod tests {
         ];
         let mut aggr_exprs = order_by_exprs
             .into_iter()
-            .zip(sort_exprs.into_iter())
-            .map(|(order_by_expr, sort_exprs)| {
+            .map(|order_by_expr| {
                 let ordering_req = order_by_expr.unwrap_or_default();
-                let sort_exprs = sort_exprs.unwrap_or_default();
-                create_aggregate_expr_with_dfschema(
-                    &array_agg_udaf(),
-                    &[Arc::clone(col_a)],
-                    &[],
-                    &sort_exprs,
-                    &ordering_req,
-                    &test_df_schema,
-                    None,
-                    false,
-                    false,
-                    false,
-                )
-                .unwrap()
+                AggregateExprBuilder::new(array_agg_udaf(), vec![Arc::clone(col_a)])
+                    .order_by(ordering_req.to_vec())
+                    .schema(Arc::clone(&test_schema))
+                    .build()
+                    .unwrap()
             })
             .collect::<Vec<_>>();
         let group_by = PhysicalGroupBy::new_single(vec![]);
@@ -2293,7 +2203,6 @@ mod tests {
             Field::new("a", DataType::Float32, true),
             Field::new("b", DataType::Float32, true),
         ]));
-        let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
 
         let col_a = col("a", &schema)?;
         let option_desc = SortOptions {
@@ -2303,8 +2212,8 @@ mod tests {
         let groups = PhysicalGroupBy::new_single(vec![(col_a, "a".to_string())]);
 
         let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![
-            test_first_value_agg_expr(&schema, &df_schema, option_desc)?,
-            test_last_value_agg_expr(&schema, &df_schema, option_desc)?,
+            test_first_value_agg_expr(&schema, option_desc)?,
+            test_last_value_agg_expr(&schema, option_desc)?,
         ];
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
         let aggregate_exec = Arc::new(AggregateExec::try_new(
@@ -2414,24 +2323,17 @@ mod tests {
             Field::new("key", DataType::Int32, true),
             Field::new("val", DataType::Int32, true),
         ]));
-        let df_schema = DFSchema::try_from(Arc::clone(&schema))?;
 
         let group_by =
             PhysicalGroupBy::new_single(vec![(col("key", &schema)?, "key".to_string())]);
 
-        let aggr_expr: Vec<Arc<dyn AggregateExpr>> =
-            vec![create_aggregate_expr_with_dfschema(
-                &count_udaf(),
-                &[col("val", &schema)?],
-                &[datafusion_expr::col("val")],
-                &[],
-                &[],
-                &df_schema,
-                Some("COUNT(val)".to_string()),
-                false,
-                false,
-                false,
-            )?];
+        let aggr_expr =
+            vec![
+                AggregateExprBuilder::new(count_udaf(), vec![col("val", &schema)?])
+                    .schema(Arc::clone(&schema))
+                    .alias(String::from("COUNT(val)"))
+                    .build()?,
+            ];
 
         let input_data = vec![
             RecordBatch::try_new(
@@ -2502,24 +2404,17 @@ mod tests {
             Field::new("key", DataType::Int32, true),
             Field::new("val", DataType::Int32, true),
         ]));
-        let df_schema = DFSchema::try_from(Arc::clone(&schema))?;
 
         let group_by =
             PhysicalGroupBy::new_single(vec![(col("key", &schema)?, "key".to_string())]);
 
-        let aggr_expr: Vec<Arc<dyn AggregateExpr>> =
-            vec![create_aggregate_expr_with_dfschema(
-                &count_udaf(),
-                &[col("val", &schema)?],
-                &[datafusion_expr::col("val")],
-                &[],
-                &[],
-                &df_schema,
-                Some("COUNT(val)".to_string()),
-                false,
-                false,
-                false,
-            )?];
+        let aggr_expr =
+            vec![
+                AggregateExprBuilder::new(count_udaf(), vec![col("val", &schema)?])
+                    .schema(Arc::clone(&schema))
+                    .alias(String::from("COUNT(val)"))
+                    .build()?,
+            ];
 
         let input_data = vec![
             RecordBatch::try_new(
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index eeecc017c2afa..59c5da6b6fb20 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -82,9 +82,7 @@ pub mod windows;
 pub mod work_table;
 
 pub mod udaf {
-    pub use datafusion_physical_expr_common::aggregate::{
-        create_aggregate_expr, create_aggregate_expr_with_dfschema, AggregateFunctionExpr,
-    };
+    pub use datafusion_physical_expr_functions_aggregate::aggregate::AggregateFunctionExpr;
 }
 
 #[cfg(test)]
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 9321fdb2cadf8..9ef29c833dccb 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -586,8 +586,8 @@ mod tests {
 
     use arrow_schema::{DataType, SortOptions};
     use datafusion_common::ScalarValue;
+    use datafusion_physical_expr::expressions::col;
     use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
-    use datafusion_physical_expr_common::expressions::column::col;
 
     // Generate a schema which consists of 7 columns (a, b, c, d, e, f, g)
     fn create_test_schema() -> Result<SchemaRef> {
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index b41f3ad71bb8f..2e6ad4e1a14fc 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -31,10 +31,9 @@ use crate::{
 use arrow::datatypes::Schema;
 use arrow_schema::{DataType, Field, SchemaRef};
 use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
-use datafusion_expr::{col, Expr, SortExpr};
 use datafusion_expr::{
-    BuiltInWindowFunction, PartitionEvaluator, WindowFrame, WindowFunctionDefinition,
-    WindowUDF,
+    BuiltInWindowFunction, Expr, PartitionEvaluator, WindowFrame,
+    WindowFunctionDefinition, WindowUDF,
 };
 use datafusion_physical_expr::equivalence::collapse_lex_req;
 use datafusion_physical_expr::{
@@ -43,7 +42,7 @@ use datafusion_physical_expr::{
     AggregateExpr, ConstExpr, EquivalenceProperties, LexOrdering,
     PhysicalSortRequirement,
 };
-use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
+use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
 use itertools::Itertools;
 
 mod bounded_window_agg_exec;
@@ -112,25 +111,10 @@ pub fn create_window_expr(
             ))
         }
         WindowFunctionDefinition::AggregateUDF(fun) => {
-            // Convert `Vec<PhysicalSortExpr>` into `Vec<Expr::Sort>`
-            let sort_exprs = order_by
-                .iter()
-                .map(|PhysicalSortExpr { expr, options }| {
-                    let field_name = expr.to_string();
-                    let field_name = field_name.split('@').next().unwrap_or(&field_name);
-                    Expr::Sort(SortExpr {
-                        expr: Box::new(col(field_name)),
-                        asc: !options.descending,
-                        nulls_first: options.nulls_first,
-                    })
-                })
-                .collect::<Vec<_>>();
-
             let aggregate = AggregateExprBuilder::new(Arc::clone(fun), args.to_vec())
                 .schema(Arc::new(input_schema.clone()))
                 .alias(name)
                 .order_by(order_by.to_vec())
-                .sort_exprs(sort_exprs)
                 .with_ignore_nulls(ignore_nulls)
                 .build()?;
             window_expr_from_aggregate_expr(
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 59db791c7595a..b5d28f40a68f8 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -18,7 +18,7 @@
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use datafusion::physical_expr_common::aggregate::AggregateExprBuilder;
+use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
 use prost::bytes::BufMut;
 use prost::Message;
 
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 712182791b0bc..1a9c6d40ebe6c 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -24,7 +24,7 @@ use std::vec;
 
 use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
-use datafusion::physical_expr_common::aggregate::AggregateExprBuilder;
+use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
 use datafusion_functions_aggregate::min_max::max_udaf;
 use prost::Message;
 

From 1e93f7fa2cb3b7bda71743e7ebe24999ff4d40a4 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 9 Aug 2024 16:15:59 -0400
Subject: [PATCH 260/357] Add `generate_series` tests for arrays (#11921)

* Add generate_series tests for arrays

* Update datafusion/sqllogictest/test_files/array.slt
---
 datafusion/sqllogictest/test_files/array.slt | 40 ++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 2b8b9429cce2e..9e34db8f8dc24 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5890,6 +5890,46 @@ select generate_series(NULL)
 ----
 NULL
 
+# Test generate_series with a table of values
+statement ok
+CREATE TABLE date_table(
+  start DATE,
+  stop DATE,
+  step INTERVAL
+) AS VALUES
+  (DATE '1992-01-01', DATE '1993-01-02', INTERVAL '1' MONTH),
+  (DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY),
+  (DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR);
+
+query ?
+select generate_series(start, stop, step) from date_table;
+----
+[1992-01-01, 1992-02-01, 1992-03-01, 1992-04-01, 1992-05-01, 1992-06-01, 1992-07-01, 1992-08-01, 1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01]
+[1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01]
+[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01]
+
+query ?
+select generate_series(start, stop, INTERVAL '1 year') from date_table;
+----
+[1992-01-01, 1993-01-01]
+[]
+[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01]
+
+query ?
+select generate_series(start, '1993-03-01'::date, INTERVAL '1 year') from date_table;
+----
+[1992-01-01, 1993-01-01]
+[1993-02-01]
+[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01]
+
+
+# https://github.com/apache/datafusion/issues/11922
+query error
+select generate_series(start, '1993-03-01', INTERVAL '1 year') from date_table;
+----
+DataFusion error: Internal error: could not cast value to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::Date32Type>.
+This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
+
 
 ## array_except
 

From 12aa82c7454e9d5fdbc637a4dbeb7b7a4fc459e8 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 9 Aug 2024 22:12:30 -0400
Subject: [PATCH 261/357] Minor: use `lit(true)` and `lit(false)` more (#11904)

---
 datafusion/optimizer/src/decorrelate.rs       |  6 ++--
 datafusion/optimizer/src/eliminate_filter.rs  |  8 ++---
 datafusion/optimizer/src/eliminate_join.rs    |  8 ++---
 .../optimizer/src/propagate_empty_relation.rs | 34 +++++++++----------
 .../simplify_expressions/expr_simplifier.rs   |  8 ++---
 datafusion/physical-expr/src/planner.rs       | 22 +++++-------
 6 files changed, 40 insertions(+), 46 deletions(-)

diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index 16b4e43abcd5a..4d0770ccbbfb8 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -31,7 +31,7 @@ use datafusion_common::{plan_err, Column, DFSchemaRef, Result, ScalarValue};
 use datafusion_expr::expr::Alias;
 use datafusion_expr::simplify::SimplifyContext;
 use datafusion_expr::utils::{conjunction, find_join_exprs, split_conjunction};
-use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
+use datafusion_expr::{expr, lit, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
 use datafusion_physical_expr::execution_props::ExecutionProps;
 
 /// This struct rewrite the sub query plan by pull up the correlated
@@ -282,9 +282,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr {
                     )?;
                     if !expr_result_map_for_count_bug.is_empty() {
                         // has count bug
-                        let un_matched_row =
-                            Expr::Literal(ScalarValue::Boolean(Some(true)))
-                                .alias(UN_MATCHED_ROW_INDICATOR);
+                        let un_matched_row = lit(true).alias(UN_MATCHED_ROW_INDICATOR);
                         // add the unmatched rows indicator to the Aggregation's group expressions
                         missing_exprs.push(un_matched_row);
                     }
diff --git a/datafusion/optimizer/src/eliminate_filter.rs b/datafusion/optimizer/src/eliminate_filter.rs
index 2d8d77b89ddc8..84bb8e782142e 100644
--- a/datafusion/optimizer/src/eliminate_filter.rs
+++ b/datafusion/optimizer/src/eliminate_filter.rs
@@ -97,7 +97,7 @@ mod tests {
 
     #[test]
     fn filter_false() -> Result<()> {
-        let filter_expr = Expr::Literal(ScalarValue::Boolean(Some(false)));
+        let filter_expr = lit(false);
 
         let table_scan = test_table_scan().unwrap();
         let plan = LogicalPlanBuilder::from(table_scan)
@@ -127,7 +127,7 @@ mod tests {
 
     #[test]
     fn filter_false_nested() -> Result<()> {
-        let filter_expr = Expr::Literal(ScalarValue::Boolean(Some(false)));
+        let filter_expr = lit(false);
 
         let table_scan = test_table_scan()?;
         let plan1 = LogicalPlanBuilder::from(table_scan.clone())
@@ -149,7 +149,7 @@ mod tests {
 
     #[test]
     fn filter_true() -> Result<()> {
-        let filter_expr = Expr::Literal(ScalarValue::Boolean(Some(true)));
+        let filter_expr = lit(true);
 
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(table_scan)
@@ -164,7 +164,7 @@ mod tests {
 
     #[test]
     fn filter_true_nested() -> Result<()> {
-        let filter_expr = Expr::Literal(ScalarValue::Boolean(Some(true)));
+        let filter_expr = lit(true);
 
         let table_scan = test_table_scan()?;
         let plan1 = LogicalPlanBuilder::from(table_scan.clone())
diff --git a/datafusion/optimizer/src/eliminate_join.rs b/datafusion/optimizer/src/eliminate_join.rs
index c5115c87a0ed5..b15d981d11806 100644
--- a/datafusion/optimizer/src/eliminate_join.rs
+++ b/datafusion/optimizer/src/eliminate_join.rs
@@ -83,9 +83,9 @@ impl OptimizerRule for EliminateJoin {
 mod tests {
     use crate::eliminate_join::EliminateJoin;
     use crate::test::*;
-    use datafusion_common::{Result, ScalarValue};
+    use datafusion_common::Result;
     use datafusion_expr::JoinType::Inner;
-    use datafusion_expr::{logical_plan::builder::LogicalPlanBuilder, Expr, LogicalPlan};
+    use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder, LogicalPlan};
     use std::sync::Arc;
 
     fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
@@ -98,7 +98,7 @@ mod tests {
             .join_on(
                 LogicalPlanBuilder::empty(false).build()?,
                 Inner,
-                Some(Expr::Literal(ScalarValue::Boolean(Some(false)))),
+                Some(lit(false)),
             )?
             .build()?;
 
@@ -112,7 +112,7 @@ mod tests {
             .join_on(
                 LogicalPlanBuilder::empty(false).build()?,
                 Inner,
-                Some(Expr::Literal(ScalarValue::Boolean(Some(true)))),
+                Some(lit(true)),
             )?
             .build()?;
 
diff --git a/datafusion/optimizer/src/propagate_empty_relation.rs b/datafusion/optimizer/src/propagate_empty_relation.rs
index 91044207c4e11..6bf878ab3df81 100644
--- a/datafusion/optimizer/src/propagate_empty_relation.rs
+++ b/datafusion/optimizer/src/propagate_empty_relation.rs
@@ -250,10 +250,10 @@ mod tests {
 
     use arrow::datatypes::{DataType, Field, Schema};
 
-    use datafusion_common::{Column, DFSchema, JoinType, ScalarValue};
+    use datafusion_common::{Column, DFSchema, JoinType};
     use datafusion_expr::logical_plan::table_scan;
     use datafusion_expr::{
-        binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, Expr, Operator,
+        binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, Operator,
     };
 
     use crate::eliminate_filter::EliminateFilter;
@@ -289,7 +289,7 @@ mod tests {
     #[test]
     fn propagate_empty() -> Result<()> {
         let plan = LogicalPlanBuilder::empty(false)
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(true))))?
+            .filter(lit(true))?
             .limit(10, None)?
             .project(vec![binary_expr(lit(1), Operator::Plus, lit(1))])?
             .build()?;
@@ -305,7 +305,7 @@ mod tests {
         let right_table_scan = test_table_scan_with_name("test2")?;
         let right = LogicalPlanBuilder::from(right_table_scan)
             .project(vec![col("a")])?
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .filter(lit(false))?
             .build()?;
 
         let plan = LogicalPlanBuilder::from(left)
@@ -325,7 +325,7 @@ mod tests {
     fn propagate_union_empty() -> Result<()> {
         let left = LogicalPlanBuilder::from(test_table_scan()?).build()?;
         let right = LogicalPlanBuilder::from(test_table_scan_with_name("test2")?)
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .filter(lit(false))?
             .build()?;
 
         let plan = LogicalPlanBuilder::from(left).union(right)?.build()?;
@@ -339,10 +339,10 @@ mod tests {
         let one =
             LogicalPlanBuilder::from(test_table_scan_with_name("test1")?).build()?;
         let two = LogicalPlanBuilder::from(test_table_scan_with_name("test2")?)
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .filter(lit(false))?
             .build()?;
         let three = LogicalPlanBuilder::from(test_table_scan_with_name("test3")?)
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .filter(lit(false))?
             .build()?;
         let four =
             LogicalPlanBuilder::from(test_table_scan_with_name("test4")?).build()?;
@@ -362,16 +362,16 @@ mod tests {
     #[test]
     fn propagate_union_all_empty() -> Result<()> {
         let one = LogicalPlanBuilder::from(test_table_scan_with_name("test1")?)
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .filter(lit(false))?
             .build()?;
         let two = LogicalPlanBuilder::from(test_table_scan_with_name("test2")?)
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .filter(lit(false))?
             .build()?;
         let three = LogicalPlanBuilder::from(test_table_scan_with_name("test3")?)
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .filter(lit(false))?
             .build()?;
         let four = LogicalPlanBuilder::from(test_table_scan_with_name("test4")?)
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .filter(lit(false))?
             .build()?;
 
         let plan = LogicalPlanBuilder::from(one)
@@ -389,7 +389,7 @@ mod tests {
         let one_schema = Schema::new(vec![Field::new("t1a", DataType::UInt32, false)]);
         let t1_scan = table_scan(Some("test1"), &one_schema, None)?.build()?;
         let one = LogicalPlanBuilder::from(t1_scan)
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .filter(lit(false))?
             .build()?;
 
         let two_schema = Schema::new(vec![Field::new("t2a", DataType::UInt32, false)]);
@@ -415,7 +415,7 @@ mod tests {
     fn propagate_union_alias() -> Result<()> {
         let left = LogicalPlanBuilder::from(test_table_scan()?).build()?;
         let right = LogicalPlanBuilder::from(test_table_scan_with_name("test2")?)
-            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .filter(lit(false))?
             .build()?;
 
         let plan = LogicalPlanBuilder::from(left).union(right)?.build()?;
@@ -449,7 +449,7 @@ mod tests {
             let left_table_scan = test_table_scan()?;
 
             LogicalPlanBuilder::from(left_table_scan)
-                .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+                .filter(lit(false))?
                 .build()
         } else {
             let scan = test_table_scan_with_name("left").unwrap();
@@ -460,7 +460,7 @@ mod tests {
             let right_table_scan = test_table_scan_with_name("right")?;
 
             LogicalPlanBuilder::from(right_table_scan)
-                .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+                .filter(lit(false))?
                 .build()
         } else {
             let scan = test_table_scan_with_name("right").unwrap();
@@ -487,14 +487,14 @@ mod tests {
         let (left, right, join_type, expected) = if anti_left_join {
             let left = test_table_scan()?;
             let right = LogicalPlanBuilder::from(test_table_scan()?)
-                .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+                .filter(lit(false))?
                 .build()?;
             let expected = left.display_indent().to_string();
             (left, right, JoinType::LeftAnti, expected)
         } else {
             let right = test_table_scan()?;
             let left = LogicalPlanBuilder::from(test_table_scan()?)
-                .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+                .filter(lit(false))?
                 .build()?;
             let expected = right.display_indent().to_string();
             (left, right, JoinType::RightAnti, expected)
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 979a1499d0dee..c45df74a564da 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -817,7 +817,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
                 op: Or,
                 right,
             }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
-                Transformed::yes(Expr::Literal(ScalarValue::Boolean(Some(true))))
+                Transformed::yes(lit(true))
             }
             // !A OR A ---> true (if A not nullable)
             Expr::BinaryExpr(BinaryExpr {
@@ -825,7 +825,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
                 op: Or,
                 right,
             }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
-                Transformed::yes(Expr::Literal(ScalarValue::Boolean(Some(true))))
+                Transformed::yes(lit(true))
             }
             // (..A..) OR A --> (..A..)
             Expr::BinaryExpr(BinaryExpr {
@@ -890,7 +890,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
                 op: And,
                 right,
             }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
-                Transformed::yes(Expr::Literal(ScalarValue::Boolean(Some(false))))
+                Transformed::yes(lit(false))
             }
             // !A AND A ---> false (if A not nullable)
             Expr::BinaryExpr(BinaryExpr {
@@ -898,7 +898,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
                 op: And,
                 right,
             }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
-                Transformed::yes(Expr::Literal(ScalarValue::Boolean(Some(false))))
+                Transformed::yes(lit(false))
             }
             // (..A..) AND A --> (..A..)
             Expr::BinaryExpr(BinaryExpr {
diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs
index a975f0c6ef836..d015f545bf9d8 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -31,7 +31,9 @@ use datafusion_expr::execution_props::ExecutionProps;
 use datafusion_expr::expr::{Alias, Cast, InList, ScalarFunction};
 use datafusion_expr::var_provider::is_system_variables;
 use datafusion_expr::var_provider::VarType;
-use datafusion_expr::{binary_expr, Between, BinaryExpr, Expr, Like, Operator, TryCast};
+use datafusion_expr::{
+    binary_expr, lit, Between, BinaryExpr, Expr, Like, Operator, TryCast,
+};
 
 /// [PhysicalExpr] evaluate DataFusion expressions such as `A + 1`, or `CAST(c1
 /// AS int)`.
@@ -140,32 +142,26 @@ pub fn create_physical_expr(
             let binary_op = binary_expr(
                 expr.as_ref().clone(),
                 Operator::IsNotDistinctFrom,
-                Expr::Literal(ScalarValue::Boolean(Some(true))),
+                lit(true),
             );
             create_physical_expr(&binary_op, input_dfschema, execution_props)
         }
         Expr::IsNotTrue(expr) => {
-            let binary_op = binary_expr(
-                expr.as_ref().clone(),
-                Operator::IsDistinctFrom,
-                Expr::Literal(ScalarValue::Boolean(Some(true))),
-            );
+            let binary_op =
+                binary_expr(expr.as_ref().clone(), Operator::IsDistinctFrom, lit(true));
             create_physical_expr(&binary_op, input_dfschema, execution_props)
         }
         Expr::IsFalse(expr) => {
             let binary_op = binary_expr(
                 expr.as_ref().clone(),
                 Operator::IsNotDistinctFrom,
-                Expr::Literal(ScalarValue::Boolean(Some(false))),
+                lit(false),
             );
             create_physical_expr(&binary_op, input_dfschema, execution_props)
         }
         Expr::IsNotFalse(expr) => {
-            let binary_op = binary_expr(
-                expr.as_ref().clone(),
-                Operator::IsDistinctFrom,
-                Expr::Literal(ScalarValue::Boolean(Some(false))),
-            );
+            let binary_op =
+                binary_expr(expr.as_ref().clone(), Operator::IsDistinctFrom, lit(false));
             create_physical_expr(&binary_op, input_dfschema, execution_props)
         }
         Expr::IsUnknown(expr) => {

From 79fa6f9098be9a6e5b269cd3642694765b230ff1 Mon Sep 17 00:00:00 2001
From: Mustafa Akur <106137913+mustafasrepo@users.noreply.github.com>
Date: Sat, 10 Aug 2024 10:03:02 +0300
Subject: [PATCH 262/357] Enforce sorting handle fetchable operators, add
 option to repartition based on row count estimates (#11875)

* Tmp

* Minor changes

* Minor changes

* Minor changes

* Implement top down recursion with delete check

* Minor changes

* Minor changes

* Address reviews

* Update comments

* Minor changes

* Make test deterministic

* Add fetch info to the statistics

* Enforce distribution use inexact count estimate also.

* Minor changes

* Minor changes

* Minor changes

* Do not add unnecessary hash partitioning

* Minor changes

* Add config option to use inexact row number estimates during planning

* Update config

* Minor changes

* Minor changes

* Final review

* Address reviews

* Add handling for sort removal with fetch

* Fix linter errors

* Minor changes

* Update config

* Cleanup stats under fetch

* Update SLT comment

---------

Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
---
 datafusion/common/src/config.rs               |   8 +
 datafusion/common/src/stats.rs                | 122 ++++++++++--
 datafusion/core/src/dataframe/mod.rs          |  12 +-
 datafusion/core/src/datasource/statistics.rs  |   2 +-
 .../enforce_distribution.rs                   | 148 ++++++++++++--
 .../src/physical_optimizer/enforce_sorting.rs | 180 ++++++++++++++++--
 .../src/physical_optimizer/sort_pushdown.rs   | 125 +++++++++---
 .../physical-plan/src/coalesce_batches.rs     |   6 +-
 .../physical-plan/src/execution_plan.rs       |   5 +
 datafusion/physical-plan/src/filter.rs        |   2 +-
 datafusion/physical-plan/src/limit.rs         | 153 +++------------
 datafusion/physical-plan/src/sorts/sort.rs    |   6 +-
 .../test_files/count_star_rule.slt            |   6 +-
 .../sqllogictest/test_files/group_by.slt      |  15 +-
 .../test_files/information_schema.slt         |   2 +
 datafusion/sqllogictest/test_files/limit.slt  |   9 +-
 datafusion/sqllogictest/test_files/order.slt  |  41 ++++
 .../test_files/sort_merge_join.slt            |  12 +-
 datafusion/sqllogictest/test_files/union.slt  |  30 ++-
 datafusion/sqllogictest/test_files/window.slt |  22 +--
 docs/source/user-guide/configs.md             |   1 +
 21 files changed, 643 insertions(+), 264 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index b5204b343f055..c48845c061e71 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -333,6 +333,14 @@ config_namespace! {
         /// Number of input rows partial aggregation partition should process, before
         /// aggregation ratio check and trying to switch to skipping aggregation mode
         pub skip_partial_aggregation_probe_rows_threshold: usize, default = 100_000
+
+        /// Should DataFusion use row number estimates at the input to decide
+        /// whether increasing parallelism is beneficial or not. By default,
+        /// only exact row numbers (not estimates) are used for this decision.
+        /// Setting this flag to `true` will likely produce better plans.
+        /// if the source of statistics is accurate.
+        /// We plan to make this the default in the future.
+        pub use_row_number_estimates_to_optimize_partitioning: bool, default = false
     }
 }
 
diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
index 6cefef8d0eb58..d6b5310581d72 100644
--- a/datafusion/common/src/stats.rs
+++ b/datafusion/common/src/stats.rs
@@ -19,9 +19,9 @@
 
 use std::fmt::{self, Debug, Display};
 
-use crate::ScalarValue;
+use crate::{Result, ScalarValue};
 
-use arrow_schema::Schema;
+use arrow_schema::{Schema, SchemaRef};
 
 /// Represents a value with a degree of certainty. `Precision` is used to
 /// propagate information the precision of statistical values.
@@ -247,21 +247,96 @@ impl Statistics {
 
     /// If the exactness of a [`Statistics`] instance is lost, this function relaxes
     /// the exactness of all information by converting them [`Precision::Inexact`].
-    pub fn into_inexact(self) -> Self {
-        Statistics {
-            num_rows: self.num_rows.to_inexact(),
-            total_byte_size: self.total_byte_size.to_inexact(),
-            column_statistics: self
-                .column_statistics
-                .into_iter()
-                .map(|cs| ColumnStatistics {
-                    null_count: cs.null_count.to_inexact(),
-                    max_value: cs.max_value.to_inexact(),
-                    min_value: cs.min_value.to_inexact(),
-                    distinct_count: cs.distinct_count.to_inexact(),
-                })
-                .collect::<Vec<_>>(),
+    pub fn to_inexact(mut self) -> Self {
+        self.num_rows = self.num_rows.to_inexact();
+        self.total_byte_size = self.total_byte_size.to_inexact();
+        self.column_statistics = self
+            .column_statistics
+            .into_iter()
+            .map(|s| s.to_inexact())
+            .collect();
+        self
+    }
+
+    /// Calculates the statistics after `fetch` and `skip` operations apply.
+    /// Here, `self` denotes per-partition statistics. Use the `n_partitions`
+    /// parameter to compute global statistics in a multi-partition setting.
+    pub fn with_fetch(
+        mut self,
+        schema: SchemaRef,
+        fetch: Option<usize>,
+        skip: usize,
+        n_partitions: usize,
+    ) -> Result<Self> {
+        let fetch_val = fetch.unwrap_or(usize::MAX);
+
+        self.num_rows = match self {
+            Statistics {
+                num_rows: Precision::Exact(nr),
+                ..
+            }
+            | Statistics {
+                num_rows: Precision::Inexact(nr),
+                ..
+            } => {
+                // Here, the inexact case gives us an upper bound on the number of rows.
+                if nr <= skip {
+                    // All input data will be skipped:
+                    Precision::Exact(0)
+                } else if nr <= fetch_val && skip == 0 {
+                    // If the input does not reach the `fetch` globally, and `skip`
+                    // is zero (meaning the input and output are identical), return
+                    // input stats as is.
+                    // TODO: Can input stats still be used, but adjusted, when `skip`
+                    //       is non-zero?
+                    return Ok(self);
+                } else if nr - skip <= fetch_val {
+                    // After `skip` input rows are skipped, the remaining rows are
+                    // less than or equal to the `fetch` values, so `num_rows` must
+                    // equal the remaining rows.
+                    check_num_rows(
+                        (nr - skip).checked_mul(n_partitions),
+                        // We know that we have an estimate for the number of rows:
+                        self.num_rows.is_exact().unwrap(),
+                    )
+                } else {
+                    // At this point we know that we were given a `fetch` value
+                    // as the `None` case would go into the branch above. Since
+                    // the input has more rows than `fetch + skip`, the number
+                    // of rows will be the `fetch`, but we won't be able to
+                    // predict the other statistics.
+                    check_num_rows(
+                        fetch_val.checked_mul(n_partitions),
+                        // We know that we have an estimate for the number of rows:
+                        self.num_rows.is_exact().unwrap(),
+                    )
+                }
+            }
+            Statistics {
+                num_rows: Precision::Absent,
+                ..
+            } => check_num_rows(fetch.and_then(|v| v.checked_mul(n_partitions)), false),
+        };
+        self.column_statistics = Statistics::unknown_column(&schema);
+        self.total_byte_size = Precision::Absent;
+        Ok(self)
+    }
+}
+
+/// Creates an estimate of the number of rows in the output using the given
+/// optional value and exactness flag.
+fn check_num_rows(value: Option<usize>, is_exact: bool) -> Precision<usize> {
+    if let Some(value) = value {
+        if is_exact {
+            Precision::Exact(value)
+        } else {
+            // If the input stats are inexact, so are the output stats.
+            Precision::Inexact(value)
         }
+    } else {
+        // If the estimate is not available (e.g. due to an overflow), we can
+        // not produce a reliable estimate.
+        Precision::Absent
     }
 }
 
@@ -336,14 +411,25 @@ impl ColumnStatistics {
     }
 
     /// Returns a [`ColumnStatistics`] instance having all [`Precision::Absent`] parameters.
-    pub fn new_unknown() -> ColumnStatistics {
-        ColumnStatistics {
+    pub fn new_unknown() -> Self {
+        Self {
             null_count: Precision::Absent,
             max_value: Precision::Absent,
             min_value: Precision::Absent,
             distinct_count: Precision::Absent,
         }
     }
+
+    /// If the exactness of a [`ColumnStatistics`] instance is lost, this
+    /// function relaxes the exactness of all information by converting them
+    /// [`Precision::Inexact`].
+    pub fn to_inexact(mut self) -> Self {
+        self.null_count = self.null_count.to_inexact();
+        self.max_value = self.max_value.to_inexact();
+        self.min_value = self.min_value.to_inexact();
+        self.distinct_count = self.distinct_count.to_inexact();
+        self
+    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 5fa65cb0da428..25a8d1c87f004 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -3000,13 +3000,13 @@ mod tests {
             .await?
             .select_columns(&["c1", "c2", "c3"])?
             .filter(col("c2").eq(lit(3)).and(col("c1").eq(lit("a"))))?
-            .limit(0, Some(1))?
             .sort(vec![
                 // make the test deterministic
                 col("c1").sort(true, true),
                 col("c2").sort(true, true),
                 col("c3").sort(true, true),
             ])?
+            .limit(0, Some(1))?
             .with_column("sum", col("c2") + col("c3"))?;
 
         let df_sum_renamed = df
@@ -3022,11 +3022,11 @@ mod tests {
 
         assert_batches_sorted_eq!(
             [
-                "+-----+-----+----+-------+",
-                "| one | two | c3 | total |",
-                "+-----+-----+----+-------+",
-                "| a   | 3   | 13 | 16    |",
-                "+-----+-----+----+-------+"
+                "+-----+-----+-----+-------+",
+                "| one | two | c3  | total |",
+                "+-----+-----+-----+-------+",
+                "| a   | 3   | -72 | -69   |",
+                "+-----+-----+-----+-------+",
             ],
             &df_sum_renamed
         );
diff --git a/datafusion/core/src/datasource/statistics.rs b/datafusion/core/src/datasource/statistics.rs
index 9d031a6bbc858..6697558776802 100644
--- a/datafusion/core/src/datasource/statistics.rs
+++ b/datafusion/core/src/datasource/statistics.rs
@@ -138,7 +138,7 @@ pub async fn get_statistics_with_limit(
         // If we still have files in the stream, it means that the limit kicked
         // in, and the statistic could have been different had we processed the
         // files in a different order.
-        statistics = statistics.into_inexact()
+        statistics = statistics.to_inexact()
     }
 
     Ok((result_files, statistics))
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 1f076e448e600..2ee5624c83dd1 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -44,6 +44,7 @@ use crate::physical_plan::windows::WindowAggExec;
 use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning};
 
 use arrow::compute::SortOptions;
+use datafusion_common::stats::Precision;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_expr::logical_plan::JoinType;
 use datafusion_physical_expr::expressions::{Column, NoOp};
@@ -1031,6 +1032,105 @@ fn replace_order_preserving_variants(
     context.update_plan_from_children()
 }
 
+/// A struct to keep track of repartition requirements for each child node.
+struct RepartitionRequirementStatus {
+    /// The distribution requirement for the node.
+    requirement: Distribution,
+    /// Designates whether round robin partitioning is theoretically beneficial;
+    /// i.e. the operator can actually utilize parallelism.
+    roundrobin_beneficial: bool,
+    /// Designates whether round robin partitioning is beneficial according to
+    /// the statistical information we have on the number of rows.
+    roundrobin_beneficial_stats: bool,
+    /// Designates whether hash partitioning is necessary.
+    hash_necessary: bool,
+}
+
+/// Calculates the `RepartitionRequirementStatus` for each children to generate
+/// consistent and sensible (in terms of performance) distribution requirements.
+/// As an example, a hash join's left (build) child might produce
+///
+/// ```text
+/// RepartitionRequirementStatus {
+///     ..,
+///     hash_necessary: true
+/// }
+/// ```
+///
+/// while its right (probe) child might have very few rows and produce:
+///
+/// ```text
+/// RepartitionRequirementStatus {
+///     ..,
+///     hash_necessary: false
+/// }
+/// ```
+///
+/// These statuses are not consistent as all children should agree on hash
+/// partitioning. This function aligns the statuses to generate consistent
+/// hash partitions for each children. After alignment, the right child's
+/// status would turn into:
+///
+/// ```text
+/// RepartitionRequirementStatus {
+///     ..,
+///     hash_necessary: true
+/// }
+/// ```
+fn get_repartition_requirement_status(
+    plan: &Arc<dyn ExecutionPlan>,
+    batch_size: usize,
+    should_use_estimates: bool,
+) -> Result<Vec<RepartitionRequirementStatus>> {
+    let mut needs_alignment = false;
+    let children = plan.children();
+    let rr_beneficial = plan.benefits_from_input_partitioning();
+    let requirements = plan.required_input_distribution();
+    let mut repartition_status_flags = vec![];
+    for (child, requirement, roundrobin_beneficial) in
+        izip!(children.into_iter(), requirements, rr_beneficial)
+    {
+        // Decide whether adding a round robin is beneficial depending on
+        // the statistical information we have on the number of rows:
+        let roundrobin_beneficial_stats = match child.statistics()?.num_rows {
+            Precision::Exact(n_rows) => n_rows > batch_size,
+            Precision::Inexact(n_rows) => !should_use_estimates || (n_rows > batch_size),
+            Precision::Absent => true,
+        };
+        let is_hash = matches!(requirement, Distribution::HashPartitioned(_));
+        // Hash re-partitioning is necessary when the input has more than one
+        // partitions:
+        let multi_partitions = child.output_partitioning().partition_count() > 1;
+        let roundrobin_sensible = roundrobin_beneficial && roundrobin_beneficial_stats;
+        needs_alignment |= is_hash && (multi_partitions || roundrobin_sensible);
+        repartition_status_flags.push((
+            is_hash,
+            RepartitionRequirementStatus {
+                requirement,
+                roundrobin_beneficial,
+                roundrobin_beneficial_stats,
+                hash_necessary: is_hash && multi_partitions,
+            },
+        ));
+    }
+    // Align hash necessary flags for hash partitions to generate consistent
+    // hash partitions at each children:
+    if needs_alignment {
+        // When there is at least one hash requirement that is necessary or
+        // beneficial according to statistics, make all children require hash
+        // repartitioning:
+        for (is_hash, status) in &mut repartition_status_flags {
+            if *is_hash {
+                status.hash_necessary = true;
+            }
+        }
+    }
+    Ok(repartition_status_flags
+        .into_iter()
+        .map(|(_, status)| status)
+        .collect())
+}
+
 /// This function checks whether we need to add additional data exchange
 /// operators to satisfy distribution requirements. Since this function
 /// takes care of such requirements, we should avoid manually adding data
@@ -1050,6 +1150,9 @@ fn ensure_distribution(
     let enable_round_robin = config.optimizer.enable_round_robin_repartition;
     let repartition_file_scans = config.optimizer.repartition_file_scans;
     let batch_size = config.execution.batch_size;
+    let should_use_estimates = config
+        .execution
+        .use_row_number_estimates_to_optimize_partitioning;
     let is_unbounded = dist_context.plan.execution_mode().is_unbounded();
     // Use order preserving variants either of the conditions true
     // - it is desired according to config
@@ -1082,6 +1185,8 @@ fn ensure_distribution(
         }
     };
 
+    let repartition_status_flags =
+        get_repartition_requirement_status(&plan, batch_size, should_use_estimates)?;
     // This loop iterates over all the children to:
     // - Increase parallelism for every child if it is beneficial.
     // - Satisfy the distribution requirements of every child, if it is not
@@ -1089,33 +1194,32 @@ fn ensure_distribution(
     // We store the updated children in `new_children`.
     let children = izip!(
         children.into_iter(),
-        plan.required_input_distribution().iter(),
         plan.required_input_ordering().iter(),
-        plan.benefits_from_input_partitioning(),
-        plan.maintains_input_order()
+        plan.maintains_input_order(),
+        repartition_status_flags.into_iter()
     )
     .map(
-        |(mut child, requirement, required_input_ordering, would_benefit, maintains)| {
-            // Don't need to apply when the returned row count is not greater than batch size
-            let num_rows = child.plan.statistics()?.num_rows;
-            let repartition_beneficial_stats = if num_rows.is_exact().unwrap_or(false) {
-                num_rows
-                    .get_value()
-                    .map(|value| value > &batch_size)
-                    .unwrap() // safe to unwrap since is_exact() is true
-            } else {
-                true
-            };
-
+        |(
+            mut child,
+            required_input_ordering,
+            maintains,
+            RepartitionRequirementStatus {
+                requirement,
+                roundrobin_beneficial,
+                roundrobin_beneficial_stats,
+                hash_necessary,
+            },
+        )| {
             let add_roundrobin = enable_round_robin
                 // Operator benefits from partitioning (e.g. filter):
-                && (would_benefit && repartition_beneficial_stats)
+                && roundrobin_beneficial
+                && roundrobin_beneficial_stats
                 // Unless partitioning increases the partition count, it is not beneficial:
                 && child.plan.output_partitioning().partition_count() < target_partitions;
 
             // When `repartition_file_scans` is set, attempt to increase
             // parallelism at the source.
-            if repartition_file_scans && repartition_beneficial_stats {
+            if repartition_file_scans && roundrobin_beneficial_stats {
                 if let Some(new_child) =
                     child.plan.repartitioned(target_partitions, config)?
                 {
@@ -1124,7 +1228,7 @@ fn ensure_distribution(
             }
 
             // Satisfy the distribution requirement if it is unmet.
-            match requirement {
+            match &requirement {
                 Distribution::SinglePartition => {
                     child = add_spm_on_top(child);
                 }
@@ -1134,7 +1238,11 @@ fn ensure_distribution(
                         // to increase parallelism.
                         child = add_roundrobin_on_top(child, target_partitions)?;
                     }
-                    child = add_hash_on_top(child, exprs.to_vec(), target_partitions)?;
+                    // When inserting hash is necessary to satisy hash requirement, insert hash repartition.
+                    if hash_necessary {
+                        child =
+                            add_hash_on_top(child, exprs.to_vec(), target_partitions)?;
+                    }
                 }
                 Distribution::UnspecifiedDistribution => {
                     if add_roundrobin {
@@ -1731,6 +1839,8 @@ pub(crate) mod tests {
             config.optimizer.repartition_file_min_size = $REPARTITION_FILE_MIN_SIZE;
             config.optimizer.prefer_existing_sort = $PREFER_EXISTING_SORT;
             config.optimizer.prefer_existing_union = $PREFER_EXISTING_UNION;
+            // Use a small batch size, to trigger RoundRobin in tests
+            config.execution.batch_size = 1;
 
             // NOTE: These tests verify the joint `EnforceDistribution` + `EnforceSorting` cascade
             //       because they were written prior to the separation of `BasicEnforcement` into
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index faf8d01a97fd9..76df99b82c538 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -61,7 +61,8 @@ use crate::physical_plan::{Distribution, ExecutionPlan, InputOrderMode};
 
 use datafusion_common::plan_err;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_physical_expr::{PhysicalSortExpr, PhysicalSortRequirement};
+use datafusion_physical_expr::{Partitioning, PhysicalSortExpr, PhysicalSortRequirement};
+use datafusion_physical_plan::limit::LocalLimitExec;
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::partial_sort::PartialSortExec;
 use datafusion_physical_plan::ExecutionPlanProperties;
@@ -189,7 +190,7 @@ impl PhysicalOptimizerRule for EnforceSorting {
         // missed by the bottom-up traversal:
         let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
         assign_initial_requirements(&mut sort_pushdown);
-        let adjusted = sort_pushdown.transform_down(pushdown_sorts)?.data;
+        let adjusted = pushdown_sorts(sort_pushdown)?;
 
         adjusted
             .plan
@@ -281,7 +282,7 @@ fn parallelize_sorts(
         // executors don't require single partition), then we can replace
         // the `CoalescePartitionsExec` + `SortExec` cascade with a `SortExec`
         // + `SortPreservingMergeExec` cascade to parallelize sorting.
-        requirements = remove_corresponding_coalesce_in_sub_plan(requirements)?;
+        requirements = remove_bottleneck_in_subplan(requirements)?;
         // We also need to remove the self node since `remove_corresponding_coalesce_in_sub_plan`
         // deals with the children and their children and so on.
         requirements = requirements.children.swap_remove(0);
@@ -299,7 +300,7 @@ fn parallelize_sorts(
     } else if is_coalesce_partitions(&requirements.plan) {
         // There is an unnecessary `CoalescePartitionsExec` in the plan.
         // This will handle the recursive `CoalescePartitionsExec` plans.
-        requirements = remove_corresponding_coalesce_in_sub_plan(requirements)?;
+        requirements = remove_bottleneck_in_subplan(requirements)?;
         // For the removal of self node which is also a `CoalescePartitionsExec`.
         requirements = requirements.children.swap_remove(0);
 
@@ -402,7 +403,12 @@ fn analyze_immediate_sort_removal(
             } else {
                 // Remove the sort:
                 node.children = node.children.swap_remove(0).children;
-                sort_input.clone()
+                if let Some(fetch) = sort_exec.fetch() {
+                    // If the sort has a fetch, we need to add a limit:
+                    Arc::new(LocalLimitExec::new(sort_input.clone(), fetch))
+                } else {
+                    sort_input.clone()
+                }
             };
             for child in node.children.iter_mut() {
                 child.data = false;
@@ -484,8 +490,11 @@ fn adjust_window_sort_removal(
     Ok(window_tree)
 }
 
-/// Removes the [`CoalescePartitionsExec`] from the plan in `node`.
-fn remove_corresponding_coalesce_in_sub_plan(
+/// Removes parallelization-reducing, avoidable [`CoalescePartitionsExec`]s from
+/// the plan in `node`. After the removal of such `CoalescePartitionsExec`s from
+/// the plan, some of the remaining `RepartitionExec`s might become unnecessary.
+/// Removes such `RepartitionExec`s from the plan as well.
+fn remove_bottleneck_in_subplan(
     mut requirements: PlanWithCorrespondingCoalescePartitions,
 ) -> Result<PlanWithCorrespondingCoalescePartitions> {
     let plan = &requirements.plan;
@@ -506,15 +515,27 @@ fn remove_corresponding_coalesce_in_sub_plan(
             .into_iter()
             .map(|node| {
                 if node.data {
-                    remove_corresponding_coalesce_in_sub_plan(node)
+                    remove_bottleneck_in_subplan(node)
                 } else {
                     Ok(node)
                 }
             })
             .collect::<Result<_>>()?;
     }
-
-    requirements.update_plan_from_children()
+    let mut new_reqs = requirements.update_plan_from_children()?;
+    if let Some(repartition) = new_reqs.plan.as_any().downcast_ref::<RepartitionExec>() {
+        let input_partitioning = repartition.input().output_partitioning();
+        // We can remove this repartitioning operator if it is now a no-op:
+        let mut can_remove = input_partitioning.eq(repartition.partitioning());
+        // We can also remove it if we ended up with an ineffective RR:
+        if let Partitioning::RoundRobinBatch(n_out) = repartition.partitioning() {
+            can_remove |= *n_out == input_partitioning.partition_count();
+        }
+        if can_remove {
+            new_reqs = new_reqs.children.swap_remove(0)
+        }
+    }
+    Ok(new_reqs)
 }
 
 /// Updates child to remove the unnecessary sort below it.
@@ -540,8 +561,11 @@ fn remove_corresponding_sort_from_sub_plan(
     requires_single_partition: bool,
 ) -> Result<PlanWithCorrespondingSort> {
     // A `SortExec` is always at the bottom of the tree.
-    if is_sort(&node.plan) {
-        node = node.children.swap_remove(0);
+    if let Some(sort_exec) = node.plan.as_any().downcast_ref::<SortExec>() {
+        // Do not remove sorts with fetch:
+        if sort_exec.fetch().is_none() {
+            node = node.children.swap_remove(0);
+        }
     } else {
         let mut any_connection = false;
         let required_dist = node.plan.required_input_distribution();
@@ -632,8 +656,9 @@ mod tests {
     use datafusion_common::Result;
     use datafusion_expr::JoinType;
     use datafusion_physical_expr::expressions::{col, Column, NotExpr};
-
     use datafusion_physical_optimizer::PhysicalOptimizerRule;
+    use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+
     use rstest::rstest;
 
     fn create_test_schema() -> Result<SchemaRef> {
@@ -716,10 +741,7 @@ mod tests {
 
                 let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
                 assign_initial_requirements(&mut sort_pushdown);
-                sort_pushdown
-                    .transform_down(pushdown_sorts)
-                    .data()
-                    .and_then(check_integrity)?;
+                check_integrity(pushdown_sorts(sort_pushdown)?)?;
                 // TODO: End state payloads will be checked here.
             }
 
@@ -1049,6 +1071,130 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_remove_unnecessary_sort6() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let input = Arc::new(
+            SortExec::new(vec![sort_expr("non_nullable_col", &schema)], source)
+                .with_fetch(Some(2)),
+        );
+        let physical_plan = sort_exec(
+            vec![
+                sort_expr("non_nullable_col", &schema),
+                sort_expr("nullable_col", &schema),
+            ],
+            input,
+        );
+
+        let expected_input = [
+            "SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        let expected_optimized = [
+            "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_remove_unnecessary_sort7() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let input = Arc::new(SortExec::new(
+            vec![
+                sort_expr("non_nullable_col", &schema),
+                sort_expr("nullable_col", &schema),
+            ],
+            source,
+        ));
+
+        let physical_plan = Arc::new(
+            SortExec::new(vec![sort_expr("non_nullable_col", &schema)], input)
+                .with_fetch(Some(2)),
+        ) as Arc<dyn ExecutionPlan>;
+
+        let expected_input = [
+            "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "  SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "    MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        let expected_optimized = [
+            "LocalLimitExec: fetch=2",
+            "  SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "    MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_remove_unnecessary_sort8() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let input = Arc::new(SortExec::new(
+            vec![sort_expr("non_nullable_col", &schema)],
+            source,
+        ));
+        let limit = Arc::new(LocalLimitExec::new(input, 2));
+        let physical_plan = sort_exec(
+            vec![
+                sort_expr("non_nullable_col", &schema),
+                sort_expr("nullable_col", &schema),
+            ],
+            limit,
+        );
+
+        let expected_input = [
+            "SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  LocalLimitExec: fetch=2",
+            "    SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        let expected_optimized = [
+            "LocalLimitExec: fetch=2",
+            "  SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "    MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_do_not_pushdown_through_limit() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        // let input = sort_exec(vec![sort_expr("non_nullable_col", &schema)], source);
+        let input = Arc::new(SortExec::new(
+            vec![sort_expr("non_nullable_col", &schema)],
+            source,
+        ));
+        let limit = Arc::new(GlobalLimitExec::new(input, 0, Some(5))) as _;
+        let physical_plan = sort_exec(vec![sort_expr("nullable_col", &schema)], limit);
+
+        let expected_input = [
+            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  GlobalLimitExec: skip=0, fetch=5",
+            "    SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        let expected_optimized = [
+            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  GlobalLimitExec: skip=0, fetch=5",
+            "    SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
     #[tokio::test]
     async fn test_remove_unnecessary_spm1() -> Result<()> {
         let schema = create_test_schema()?;
diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
index 3577e109b0697..17d63a06a6f8c 100644
--- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
@@ -15,12 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::fmt::Debug;
 use std::sync::Arc;
 
-use super::utils::add_sort_above;
-use crate::physical_optimizer::utils::{
-    is_limit, is_sort_preserving_merge, is_union, is_window,
-};
+use super::utils::{add_sort_above, is_sort};
+use crate::physical_optimizer::utils::{is_sort_preserving_merge, is_union, is_window};
 use crate::physical_plan::filter::FilterExec;
 use crate::physical_plan::joins::utils::calculate_join_output_ordering;
 use crate::physical_plan::joins::{HashJoinExec, SortMergeJoinExec};
@@ -30,7 +29,7 @@ use crate::physical_plan::sorts::sort::SortExec;
 use crate::physical_plan::tree_node::PlanContext;
 use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
-use datafusion_common::tree_node::Transformed;
+use datafusion_common::tree_node::{ConcreteTreeNode, Transformed, TreeNodeRecursion};
 use datafusion_common::{plan_err, JoinSide, Result};
 use datafusion_expr::JoinType;
 use datafusion_physical_expr::expressions::Column;
@@ -41,38 +40,63 @@ use datafusion_physical_expr::{
 /// This is a "data class" we use within the [`EnforceSorting`] rule to push
 /// down [`SortExec`] in the plan. In some cases, we can reduce the total
 /// computational cost by pushing down `SortExec`s through some executors. The
-/// object carries the parent required ordering as its data.
+/// object carries the parent required ordering and the (optional) `fetch` value
+/// of the parent node as its data.
 ///
 /// [`EnforceSorting`]: crate::physical_optimizer::enforce_sorting::EnforceSorting
-pub type SortPushDown = PlanContext<Option<Vec<PhysicalSortRequirement>>>;
+#[derive(Default, Clone)]
+pub struct ParentRequirements {
+    ordering_requirement: Option<Vec<PhysicalSortRequirement>>,
+    fetch: Option<usize>,
+}
+
+pub type SortPushDown = PlanContext<ParentRequirements>;
 
 /// Assigns the ordering requirement of the root node to the its children.
 pub fn assign_initial_requirements(node: &mut SortPushDown) {
     let reqs = node.plan.required_input_ordering();
     for (child, requirement) in node.children.iter_mut().zip(reqs) {
-        child.data = requirement;
+        child.data = ParentRequirements {
+            ordering_requirement: requirement,
+            fetch: None,
+        };
+    }
+}
+
+pub(crate) fn pushdown_sorts(sort_pushdown: SortPushDown) -> Result<SortPushDown> {
+    let mut new_node = pushdown_sorts_helper(sort_pushdown)?;
+    while new_node.tnr == TreeNodeRecursion::Stop {
+        new_node = pushdown_sorts_helper(new_node.data)?;
     }
+    let (new_node, children) = new_node.data.take_children();
+    let new_children = children
+        .into_iter()
+        .map(pushdown_sorts)
+        .collect::<Result<_>>()?;
+    new_node.with_new_children(new_children)
 }
 
-pub(crate) fn pushdown_sorts(
+fn pushdown_sorts_helper(
     mut requirements: SortPushDown,
 ) -> Result<Transformed<SortPushDown>> {
     let plan = &requirements.plan;
-    let parent_reqs = requirements.data.as_deref().unwrap_or(&[]);
+    let parent_reqs = requirements
+        .data
+        .ordering_requirement
+        .as_deref()
+        .unwrap_or(&[]);
     let satisfy_parent = plan
         .equivalence_properties()
         .ordering_satisfy_requirement(parent_reqs);
-
-    if let Some(sort_exec) = plan.as_any().downcast_ref::<SortExec>() {
+    if is_sort(plan) {
         let required_ordering = plan
             .output_ordering()
             .map(PhysicalSortRequirement::from_sort_exprs)
             .unwrap_or_default();
-
         if !satisfy_parent {
             // Make sure this `SortExec` satisfies parent requirements:
-            let fetch = sort_exec.fetch();
-            let sort_reqs = requirements.data.unwrap_or_default();
+            let sort_reqs = requirements.data.ordering_requirement.unwrap_or_default();
+            let fetch = requirements.data.fetch;
             requirements = requirements.children.swap_remove(0);
             requirements = add_sort_above(requirements, sort_reqs, fetch);
         };
@@ -82,12 +106,24 @@ pub(crate) fn pushdown_sorts(
         if let Some(adjusted) =
             pushdown_requirement_to_children(&child.plan, &required_ordering)?
         {
+            let fetch = child.plan.fetch();
             for (grand_child, order) in child.children.iter_mut().zip(adjusted) {
-                grand_child.data = order;
+                grand_child.data = ParentRequirements {
+                    ordering_requirement: order,
+                    fetch,
+                };
             }
             // Can push down requirements
-            child.data = None;
-            return Ok(Transformed::yes(child));
+            child.data = ParentRequirements {
+                ordering_requirement: Some(required_ordering),
+                fetch,
+            };
+
+            return Ok(Transformed {
+                data: child,
+                transformed: true,
+                tnr: TreeNodeRecursion::Stop,
+            });
         } else {
             // Can not push down requirements
             requirements.children = vec![child];
@@ -97,19 +133,24 @@ pub(crate) fn pushdown_sorts(
         // For non-sort operators, immediately return if parent requirements are met:
         let reqs = plan.required_input_ordering();
         for (child, order) in requirements.children.iter_mut().zip(reqs) {
-            child.data = order;
+            child.data.ordering_requirement = order;
         }
     } else if let Some(adjusted) = pushdown_requirement_to_children(plan, parent_reqs)? {
         // Can not satisfy the parent requirements, check whether we can push
         // requirements down:
         for (child, order) in requirements.children.iter_mut().zip(adjusted) {
-            child.data = order;
+            child.data.ordering_requirement = order;
         }
-        requirements.data = None;
+        requirements.data.ordering_requirement = None;
     } else {
         // Can not push down requirements, add new `SortExec`:
-        let sort_reqs = requirements.data.clone().unwrap_or_default();
-        requirements = add_sort_above(requirements, sort_reqs, None);
+        let sort_reqs = requirements
+            .data
+            .ordering_requirement
+            .clone()
+            .unwrap_or_default();
+        let fetch = requirements.data.fetch;
+        requirements = add_sort_above(requirements, sort_reqs, fetch);
         assign_initial_requirements(&mut requirements);
     }
     Ok(Transformed::yes(requirements))
@@ -132,6 +173,43 @@ fn pushdown_requirement_to_children(
             RequirementsCompatibility::Compatible(adjusted) => Ok(Some(vec![adjusted])),
             RequirementsCompatibility::NonCompatible => Ok(None),
         }
+    } else if let Some(sort_exec) = plan.as_any().downcast_ref::<SortExec>() {
+        let sort_req = PhysicalSortRequirement::from_sort_exprs(
+            sort_exec.properties().output_ordering().unwrap_or(&[]),
+        );
+        if sort_exec
+            .properties()
+            .eq_properties
+            .requirements_compatible(parent_required, &sort_req)
+        {
+            debug_assert!(!parent_required.is_empty());
+            Ok(Some(vec![Some(parent_required.to_vec())]))
+        } else {
+            Ok(None)
+        }
+    } else if plan.fetch().is_some()
+        && plan.supports_limit_pushdown()
+        && plan
+            .maintains_input_order()
+            .iter()
+            .all(|maintain| *maintain)
+    {
+        let output_req = PhysicalSortRequirement::from_sort_exprs(
+            plan.properties().output_ordering().unwrap_or(&[]),
+        );
+        // Push down through operator with fetch when:
+        // - requirement is aligned with output ordering
+        // - it preserves ordering during execution
+        if plan
+            .properties()
+            .eq_properties
+            .requirements_compatible(parent_required, &output_req)
+        {
+            let req = (!parent_required.is_empty()).then(|| parent_required.to_vec());
+            Ok(Some(vec![req]))
+        } else {
+            Ok(None)
+        }
     } else if is_union(plan) {
         // UnionExec does not have real sort requirements for its input. Here we change the adjusted_request_ordering to UnionExec's output ordering and
         // propagate the sort requirements down to correct the unnecessary descendant SortExec under the UnionExec
@@ -174,7 +252,6 @@ fn pushdown_requirement_to_children(
         || plan.as_any().is::<FilterExec>()
         // TODO: Add support for Projection push down
         || plan.as_any().is::<ProjectionExec>()
-        || is_limit(plan)
         || plan.as_any().is::<HashJoinExec>()
         || pushdown_would_violate_requirements(parent_required, plan.as_ref())
     {
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index de42a55ad3502..13c10c535c086 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -212,7 +212,7 @@ impl ExecutionPlan for CoalesceBatchesExec {
     }
 
     fn statistics(&self) -> Result<Statistics> {
-        self.input.statistics()
+        Statistics::with_fetch(self.input.statistics()?, self.schema(), self.fetch, 0, 1)
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
@@ -224,6 +224,10 @@ impl ExecutionPlan for CoalesceBatchesExec {
             cache: self.cache.clone(),
         }))
     }
+
+    fn fetch(&self) -> Option<usize> {
+        self.fetch
+    }
 }
 
 /// Stream for [`CoalesceBatchesExec`]. See [`CoalesceBatchesExec`] for more details.
diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
index 5a3fc086c1f89..a6a15e46860cc 100644
--- a/datafusion/physical-plan/src/execution_plan.rs
+++ b/datafusion/physical-plan/src/execution_plan.rs
@@ -399,6 +399,11 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
         None
     }
+
+    /// Gets the fetch count for the operator, `None` means there is no fetch.
+    fn fetch(&self) -> Option<usize> {
+        None
+    }
 }
 
 /// Extension trait provides an easy API to fetch various properties of
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 69bcfefcd4764..fa9108057cfe3 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -126,7 +126,7 @@ impl FilterExec {
         let schema = input.schema();
         if !check_support(predicate, &schema) {
             let selectivity = default_selectivity as f64 / 100.0;
-            let mut stats = input_stats.into_inexact();
+            let mut stats = input_stats.to_inexact();
             stats.num_rows = stats.num_rows.with_estimated_selectivity(selectivity);
             stats.total_byte_size = stats
                 .total_byte_size
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index f3dad6afabdea..360e942226d24 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -31,7 +31,6 @@ use crate::{DisplayFormatType, Distribution, ExecutionPlan, Partitioning};
 
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::stats::Precision;
 use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
 
@@ -185,80 +184,21 @@ impl ExecutionPlan for GlobalLimitExec {
     }
 
     fn statistics(&self) -> Result<Statistics> {
-        let input_stats = self.input.statistics()?;
-        let skip = self.skip;
-        let col_stats = Statistics::unknown_column(&self.schema());
-        let fetch = self.fetch.unwrap_or(usize::MAX);
-
-        let mut fetched_row_number_stats = Statistics {
-            num_rows: Precision::Exact(fetch),
-            column_statistics: col_stats.clone(),
-            total_byte_size: Precision::Absent,
-        };
+        Statistics::with_fetch(
+            self.input.statistics()?,
+            self.schema(),
+            self.fetch,
+            self.skip,
+            1,
+        )
+    }
 
-        let stats = match input_stats {
-            Statistics {
-                num_rows: Precision::Exact(nr),
-                ..
-            }
-            | Statistics {
-                num_rows: Precision::Inexact(nr),
-                ..
-            } => {
-                if nr <= skip {
-                    // if all input data will be skipped, return 0
-                    let mut skip_all_rows_stats = Statistics {
-                        num_rows: Precision::Exact(0),
-                        column_statistics: col_stats,
-                        total_byte_size: Precision::Absent,
-                    };
-                    if !input_stats.num_rows.is_exact().unwrap_or(false) {
-                        // The input stats are inexact, so the output stats must be too.
-                        skip_all_rows_stats = skip_all_rows_stats.into_inexact();
-                    }
-                    skip_all_rows_stats
-                } else if nr <= fetch && self.skip == 0 {
-                    // if the input does not reach the "fetch" globally, and "skip" is zero
-                    // (meaning the input and output are identical), return input stats.
-                    // Can input_stats still be used, but adjusted, in the "skip != 0" case?
-                    input_stats
-                } else if nr - skip <= fetch {
-                    // after "skip" input rows are skipped, the remaining rows are less than or equal to the
-                    // "fetch" values, so `num_rows` must equal the remaining rows
-                    let remaining_rows: usize = nr - skip;
-                    let mut skip_some_rows_stats = Statistics {
-                        num_rows: Precision::Exact(remaining_rows),
-                        column_statistics: col_stats,
-                        total_byte_size: Precision::Absent,
-                    };
-                    if !input_stats.num_rows.is_exact().unwrap_or(false) {
-                        // The input stats are inexact, so the output stats must be too.
-                        skip_some_rows_stats = skip_some_rows_stats.into_inexact();
-                    }
-                    skip_some_rows_stats
-                } else {
-                    // if the input is greater than "fetch+skip", the num_rows will be the "fetch",
-                    // but we won't be able to predict the other statistics
-                    if !input_stats.num_rows.is_exact().unwrap_or(false)
-                        || self.fetch.is_none()
-                    {
-                        // If the input stats are inexact, the output stats must be too.
-                        // If the fetch value is `usize::MAX` because no LIMIT was specified,
-                        // we also can't represent it as an exact value.
-                        fetched_row_number_stats =
-                            fetched_row_number_stats.into_inexact();
-                    }
-                    fetched_row_number_stats
-                }
-            }
-            _ => {
-                // The result output `num_rows` will always be no greater than the limit number.
-                // Should `num_rows` be marked as `Absent` here when the `fetch` value is large,
-                // as the actual `num_rows` may be far away from the `fetch` value?
-                fetched_row_number_stats.into_inexact()
-            }
-        };
-        Ok(stats)
+    fn fetch(&self) -> Option<usize> {
+        self.fetch
+    }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        true
     }
 }
 
@@ -380,53 +320,21 @@ impl ExecutionPlan for LocalLimitExec {
     }
 
     fn statistics(&self) -> Result<Statistics> {
-        let input_stats = self.input.statistics()?;
-        let col_stats = Statistics::unknown_column(&self.schema());
-        let stats = match input_stats {
-            // if the input does not reach the limit globally, return input stats
-            Statistics {
-                num_rows: Precision::Exact(nr),
-                ..
-            }
-            | Statistics {
-                num_rows: Precision::Inexact(nr),
-                ..
-            } if nr <= self.fetch => input_stats,
-            // if the input is greater than the limit, the num_row will be greater
-            // than the limit because the partitions will be limited separately
-            // the statistic
-            Statistics {
-                num_rows: Precision::Exact(nr),
-                ..
-            } if nr > self.fetch => Statistics {
-                num_rows: Precision::Exact(self.fetch),
-                // this is not actually exact, but will be when GlobalLimit is applied
-                // TODO stats: find a more explicit way to vehiculate this information
-                column_statistics: col_stats,
-                total_byte_size: Precision::Absent,
-            },
-            Statistics {
-                num_rows: Precision::Inexact(nr),
-                ..
-            } if nr > self.fetch => Statistics {
-                num_rows: Precision::Inexact(self.fetch),
-                // this is not actually exact, but will be when GlobalLimit is applied
-                // TODO stats: find a more explicit way to vehiculate this information
-                column_statistics: col_stats,
-                total_byte_size: Precision::Absent,
-            },
-            _ => Statistics {
-                // the result output row number will always be no greater than the limit number
-                num_rows: Precision::Inexact(
-                    self.fetch
-                        * self.properties().output_partitioning().partition_count(),
-                ),
-
-                column_statistics: col_stats,
-                total_byte_size: Precision::Absent,
-            },
-        };
-        Ok(stats)
+        Statistics::with_fetch(
+            self.input.statistics()?,
+            self.schema(),
+            Some(self.fetch),
+            0,
+            1,
+        )
+    }
+
+    fn fetch(&self) -> Option<usize> {
+        Some(self.fetch)
+    }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        true
     }
 }
 
@@ -565,6 +473,7 @@ mod tests {
     use crate::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy};
     use arrow_array::RecordBatchOptions;
     use arrow_schema::Schema;
+    use datafusion_common::stats::Precision;
     use datafusion_physical_expr::expressions::col;
     use datafusion_physical_expr::PhysicalExpr;
 
@@ -794,7 +703,7 @@ mod tests {
 
         let row_count =
             row_number_inexact_statistics_for_global_limit(400, Some(10)).await?;
-        assert_eq!(row_count, Precision::Inexact(0));
+        assert_eq!(row_count, Precision::Exact(0));
 
         let row_count =
             row_number_inexact_statistics_for_global_limit(398, Some(10)).await?;
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index eb77d7716848c..e7e1c5481f807 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -921,7 +921,7 @@ impl ExecutionPlan for SortExec {
     }
 
     fn statistics(&self) -> Result<Statistics> {
-        self.input.statistics()
+        Statistics::with_fetch(self.input.statistics()?, self.schema(), self.fetch, 0, 1)
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
@@ -934,6 +934,10 @@ impl ExecutionPlan for SortExec {
             cache: self.cache.clone(),
         }))
     }
+
+    fn fetch(&self) -> Option<usize> {
+        self.fetch
+    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/sqllogictest/test_files/count_star_rule.slt b/datafusion/sqllogictest/test_files/count_star_rule.slt
index 99d358ad17f02..b552e6053769a 100644
--- a/datafusion/sqllogictest/test_files/count_star_rule.slt
+++ b/datafusion/sqllogictest/test_files/count_star_rule.slt
@@ -86,10 +86,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as count_a]
 02)--WindowAggExec: wdw=[count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
-06)----------MemoryExec: partitions=1, partition_sizes=[1]
+03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
+04)------MemoryExec: partitions=1, partition_sizes=[1]
 
 query II
 SELECT a, COUNT() OVER (PARTITION BY a) AS count_a FROM t1 ORDER BY a;
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index bd096f61fb5da..a4a886c75a776 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2020,15 +2020,12 @@ physical_plan
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([col0@0, col1@1, col2@2], 4), input_partitions=4
 07)------------AggregateExec: mode=Partial, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]]
-08)--------------ProjectionExec: expr=[col0@2 as col0, col1@3 as col1, col2@4 as col2, col0@0 as col0, col1@1 as col1]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col0@0, col0@0)]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([col0@0], 4), input_partitions=1
-13)------------------------MemoryExec: partitions=1, partition_sizes=[3]
-14)--------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------RepartitionExec: partitioning=Hash([col0@0], 4), input_partitions=1
-16)------------------------MemoryExec: partitions=1, partition_sizes=[3]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------------ProjectionExec: expr=[col0@2 as col0, col1@3 as col1, col2@4 as col2, col0@0 as col0, col1@1 as col1]
+10)------------------CoalesceBatchesExec: target_batch_size=8192
+11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col0@0, col0@0)]
+12)----------------------MemoryExec: partitions=1, partition_sizes=[3]
+13)----------------------MemoryExec: partitions=1, partition_sizes=[3]
 
 # Columns in the table are a,b,c,d. Source is CsvExec which is ordered by
 # a,b,c column. Column a has cardinality 2, column b has cardinality 4.
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 0cbbbf3c608c0..ff793a72fd8a5 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -215,6 +215,7 @@ datafusion.execution.sort_spill_reservation_bytes 10485760
 datafusion.execution.split_file_groups_by_statistics false
 datafusion.execution.target_partitions 7
 datafusion.execution.time_zone +00:00
+datafusion.execution.use_row_number_estimates_to_optimize_partitioning false
 datafusion.explain.logical_plan_only false
 datafusion.explain.physical_plan_only false
 datafusion.explain.show_schema false
@@ -304,6 +305,7 @@ datafusion.execution.sort_spill_reservation_bytes 10485760 Specifies the reserve
 datafusion.execution.split_file_groups_by_statistics false Attempt to eliminate sorts by packing & sorting files with non-overlapping statistics into the same file groups. Currently experimental
 datafusion.execution.target_partitions 7 Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system
 datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
+datafusion.execution.use_row_number_estimates_to_optimize_partitioning false Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.
 datafusion.explain.logical_plan_only false When set to true, the explain statement will only print logical plans
 datafusion.explain.physical_plan_only false When set to true, the explain statement will only print physical plans
 datafusion.explain.show_schema false When set to true, the explain statement will print schema information
diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt
index dc3d444854c4d..4cdd40ac8c34c 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -390,8 +390,8 @@ SELECT ROW_NUMBER() OVER (PARTITION BY t1.column1) FROM t t1, t t2, t t3;
 statement ok
 set datafusion.explain.show_sizes = false;
 
-# verify that there are multiple partitions in the input (i.e. MemoryExec says
-# there are 4 partitions) so that this tests multi-partition limit.
+# verify that there are multiple partitions in the input so that this tests
+# multi-partition limit.
 query TT
 EXPLAIN SELECT DISTINCT i FROM t1000;
 ----
@@ -402,8 +402,9 @@ physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[i@0 as i], aggr=[]
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----RepartitionExec: partitioning=Hash([i@0], 4), input_partitions=4
-04)------AggregateExec: mode=Partial, gby=[i@0 as i], aggr=[]
-05)--------MemoryExec: partitions=4
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[i@0 as i], aggr=[]
+06)----------MemoryExec: partitions=1
 
 statement ok
 set datafusion.explain.show_sizes = true;
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index 3382d5ddabda4..569602166b389 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -1148,3 +1148,44 @@ SELECT (SELECT c from ordered_table ORDER BY c LIMIT 1) UNION ALL (SELECT 23 as
 ----
 0
 23
+
+statement ok
+set datafusion.execution.use_row_number_estimates_to_optimize_partitioning = true;
+
+# Do not increase the number of partitions after fetch one, as this will be unnecessary.
+query TT
+EXPLAIN SELECT a + b as sum1 FROM (SELECT a, b
+  FROM ordered_table
+  ORDER BY a ASC LIMIT 1
+);
+----
+logical_plan
+01)Projection: ordered_table.a + ordered_table.b AS sum1
+02)--Limit: skip=0, fetch=1
+03)----Sort: ordered_table.a ASC NULLS LAST, fetch=1
+04)------TableScan: ordered_table projection=[a, b]
+physical_plan
+01)ProjectionExec: expr=[a@0 + b@1 as sum1]
+02)--SortExec: TopK(fetch=1), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
+03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], has_header=true
+
+statement ok
+set datafusion.execution.use_row_number_estimates_to_optimize_partitioning = false;
+
+# Here, we have multiple partitions after fetch one, since the row count estimate is not exact.
+query TT
+EXPLAIN SELECT a + b as sum1 FROM (SELECT a, b
+  FROM ordered_table
+  ORDER BY a ASC LIMIT 1
+);
+----
+logical_plan
+01)Projection: ordered_table.a + ordered_table.b AS sum1
+02)--Limit: skip=0, fetch=1
+03)----Sort: ordered_table.a ASC NULLS LAST, fetch=1
+04)------TableScan: ordered_table projection=[a, b]
+physical_plan
+01)ProjectionExec: expr=[a@0 + b@1 as sum1]
+02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)----SortExec: TopK(fetch=1), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
+04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], has_header=true
diff --git a/datafusion/sqllogictest/test_files/sort_merge_join.slt b/datafusion/sqllogictest/test_files/sort_merge_join.slt
index 6e7b50973cde2..ea3088e69674d 100644
--- a/datafusion/sqllogictest/test_files/sort_merge_join.slt
+++ b/datafusion/sqllogictest/test_files/sort_merge_join.slt
@@ -38,14 +38,10 @@ logical_plan
 03)--TableScan: t2 projection=[a, b]
 physical_plan
 01)SortMergeJoin: join_type=Inner, on=[(a@0, a@0)], filter=CAST(b@1 AS Int64) * 50 <= CAST(b@0 AS Int64)
-02)--SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
-05)--------MemoryExec: partitions=1, partition_sizes=[1]
-06)--SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-07)----CoalesceBatchesExec: target_batch_size=8192
-08)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
-09)--------MemoryExec: partitions=1, partition_sizes=[1]
+02)--SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+03)----MemoryExec: partitions=1, partition_sizes=[1]
+04)--SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+05)----MemoryExec: partitions=1, partition_sizes=[1]
 
 # inner join with join filter
 query TITI rowsort
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index aedbee35400c2..476ebe7ebebe1 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -563,15 +563,12 @@ logical_plan
 physical_plan
 01)UnionExec
 02)--ProjectionExec: expr=[Int64(1)@0 as a]
-03)----AggregateExec: mode=FinalPartitioned, gby=[Int64(1)@0 as Int64(1)], aggr=[], ordering_mode=Sorted
-04)------CoalesceBatchesExec: target_batch_size=2
-05)--------RepartitionExec: partitioning=Hash([Int64(1)@0], 4), input_partitions=1
-06)----------AggregateExec: mode=Partial, gby=[1 as Int64(1)], aggr=[], ordering_mode=Sorted
-07)------------PlaceholderRowExec
-08)--ProjectionExec: expr=[2 as a]
-09)----PlaceholderRowExec
-10)--ProjectionExec: expr=[3 as a]
-11)----PlaceholderRowExec
+03)----AggregateExec: mode=SinglePartitioned, gby=[1 as Int64(1)], aggr=[], ordering_mode=Sorted
+04)------PlaceholderRowExec
+05)--ProjectionExec: expr=[2 as a]
+06)----PlaceholderRowExec
+07)--ProjectionExec: expr=[3 as a]
+08)----PlaceholderRowExec
 
 # test UNION ALL aliases correctly with aliased subquery
 query TT
@@ -594,15 +591,12 @@ logical_plan
 physical_plan
 01)UnionExec
 02)--ProjectionExec: expr=[count(*)@1 as count, n@0 as n]
-03)----AggregateExec: mode=FinalPartitioned, gby=[n@0 as n], aggr=[count(*)], ordering_mode=Sorted
-04)------CoalesceBatchesExec: target_batch_size=2
-05)--------RepartitionExec: partitioning=Hash([n@0], 4), input_partitions=1
-06)----------AggregateExec: mode=Partial, gby=[n@0 as n], aggr=[count(*)], ordering_mode=Sorted
-07)------------ProjectionExec: expr=[5 as n]
-08)--------------PlaceholderRowExec
-09)--ProjectionExec: expr=[1 as count, max(Int64(10))@0 as n]
-10)----AggregateExec: mode=Single, gby=[], aggr=[max(Int64(10))]
-11)------PlaceholderRowExec
+03)----AggregateExec: mode=SinglePartitioned, gby=[n@0 as n], aggr=[count(*)], ordering_mode=Sorted
+04)------ProjectionExec: expr=[5 as n]
+05)--------PlaceholderRowExec
+06)--ProjectionExec: expr=[1 as count, max(Int64(10))@0 as n]
+07)----AggregateExec: mode=Single, gby=[], aggr=[max(Int64(10))]
+08)------PlaceholderRowExec
 
 
 # Test issue: https://github.com/apache/datafusion/issues/11409
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 4f4b9749c561e..dfc8826676174 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -1777,17 +1777,17 @@ physical_plan
 02)--AggregateExec: mode=Final, gby=[], aggr=[count(*)]
 03)----CoalescePartitionsExec
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(*)]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2
-06)----------ProjectionExec: expr=[]
-07)------------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[]
-08)--------------CoalesceBatchesExec: target_batch_size=4096
-09)----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-10)------------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[]
-11)--------------------ProjectionExec: expr=[c1@0 as c1]
-12)----------------------CoalesceBatchesExec: target_batch_size=4096
-13)------------------------FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434
-14)--------------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-15)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c13], has_header=true
+05)--------ProjectionExec: expr=[]
+06)----------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[]
+07)------------CoalesceBatchesExec: target_batch_size=4096
+08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
+09)----------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[]
+10)------------------ProjectionExec: expr=[c1@0 as c1]
+11)--------------------CoalesceBatchesExec: target_batch_size=4096
+12)----------------------FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434
+13)------------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+14)--------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c13], has_header=true
+
 
 query I
 SELECT count(*) as global_count FROM
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index e0c8391a259a7..6f315f539b118 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -91,6 +91,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.keep_partition_by_columns                          | false                     | Should DataFusion keep the columns used for partition_by in the output RecordBatches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.skip_partial_aggregation_probe_ratio_threshold     | 0.8                       | Aggregation ratio (number of distinct groups / number of input rows) threshold for skipping partial aggregation. If the value is greater then partial aggregation will skip aggregation for further input                                                                                                                                                                                                                                                                                                                                                                                               |
 | datafusion.execution.skip_partial_aggregation_probe_rows_threshold      | 100000                    | Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.use_row_number_estimates_to_optimize_partitioning  | false                     | Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.                                                                                                                                                                                                                                            |
 | datafusion.optimizer.enable_distinct_aggregation_soft_limit             | true                      | When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.optimizer.enable_round_robin_repartition                     | true                      | When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datafusion.optimizer.enable_topk_aggregation                            | true                      | When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |

From 27304239ef79b50a443320791755bf74eed4a85d Mon Sep 17 00:00:00 2001
From: Martin Hilton <mhilton@influxdata.com>
Date: Sat, 10 Aug 2024 14:21:56 +0100
Subject: [PATCH 263/357] fix: make ScalarValue::Dictionary with NULL values
 produce NULL arrays (#11908)

Update the way ScalarValue::Dictionary values are turned into arrays
such that:

    scalar_value.is_null() == scalar_value.to_array()?.is_null(0)

Previously the dictionary would be created with a valid key entry
pointing to a NULL value.
https://arrow.apache.org/docs/format/Columnar.html#dictionary-encoded-layout
suggests that this does not constitute a NULL entry.
---
 datafusion/common/src/scalar/mod.rs | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 286df339adcf4..fd0c11ed0ab01 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -801,9 +801,13 @@ fn dict_from_scalar<K: ArrowDictionaryKeyType>(
     let values_array = value.to_array_of_size(1)?;
 
     // Create a key array with `size` elements, each of 0
-    let key_array: PrimitiveArray<K> = std::iter::repeat(Some(K::default_value()))
-        .take(size)
-        .collect();
+    let key_array: PrimitiveArray<K> = std::iter::repeat(if value.is_null() {
+        None
+    } else {
+        Some(K::default_value())
+    })
+    .take(size)
+    .collect();
 
     // create a new DictionaryArray
     //
@@ -6674,4 +6678,15 @@ mod tests {
         );
         assert!(dense_scalar.is_null());
     }
+
+    #[test]
+    fn null_dictionary_scalar_produces_null_dictionary_array() {
+        let dictionary_scalar = ScalarValue::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(ScalarValue::Null),
+        );
+        assert!(dictionary_scalar.is_null());
+        let dictionary_array = dictionary_scalar.to_array().unwrap();
+        assert!(dictionary_array.is_null(0));
+    }
 }

From ee6910b587cc6bdad78dfbec7e3a6431a5e6699d Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 11 Aug 2024 07:10:10 -0400
Subject: [PATCH 264/357] Minor: change wording for PMC membership notice
 (#11930)

---
 docs/source/contributor-guide/inviting.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/contributor-guide/inviting.md b/docs/source/contributor-guide/inviting.md
index 4066dd9699eeb..4e7ffeb7518d3 100644
--- a/docs/source/contributor-guide/inviting.md
+++ b/docs/source/contributor-guide/inviting.md
@@ -341,7 +341,7 @@ The DataFusion PMC Chair then sends a NOTICE to `board@apache.org` (cc'ing
 ```
 To: board@apache.org
 Cc: private@datafusion.apache.org
-Subject: [NOTICE] $NEW_PMC_MEMBER for DataFusion PMC
+Subject: [NOTICE] $NEW_PMC_MEMBER to join DataFusion PMC
 
 DataFusion proposes to invite $NEW_PMC_MEMBER ($NEW_PMC_MEMBER_APACHE_ID) to join the PMC.
 

From 64a928057dc1197acf05c6b08b049db19897918c Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Sun, 11 Aug 2024 20:06:14 +0800
Subject: [PATCH 265/357] fix approx_percentile_cont() bug (#11934)

---
 .../functions-aggregate-common/src/tdigest.rs | 14 ++++++-
 .../sqllogictest/test_files/aggregate.slt     | 40 ++++++++++++++++++-
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs
index 070ebc46483b6..620a68e83ecdc 100644
--- a/datafusion/functions-aggregate-common/src/tdigest.rs
+++ b/datafusion/functions-aggregate-common/src/tdigest.rs
@@ -233,7 +233,7 @@ impl TDigest {
     }
 
     fn clamp(v: f64, lo: f64, hi: f64) -> f64 {
-        if lo.is_nan() && hi.is_nan() {
+        if lo.is_nan() || hi.is_nan() {
             return v;
         }
         v.clamp(lo, hi)
@@ -539,6 +539,18 @@ impl TDigest {
         let value = self.centroids[pos].mean()
             + ((rank - t) / self.centroids[pos].weight() - 0.5) * delta;
 
+        // In `merge_digests()`: `min` is initialized to Inf, `max` is initialized to -Inf
+        // and gets updated according to different `TDigest`s
+        // However, `min`/`max` won't get updated if there is only one `NaN` within `TDigest`
+        // The following two checks is for such edge case
+        if !min.is_finite() && min.is_sign_positive() {
+            min = f64::NEG_INFINITY;
+        }
+
+        if !max.is_finite() && max.is_sign_negative() {
+            max = f64::INFINITY;
+        }
+
         Self::clamp(value, min, max)
     }
 
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index c68a6c345caa9..322ddcdb047b3 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -1249,6 +1249,44 @@ SELECT APPROX_PERCENTILE_CONT(v, 0.5) FROM (VALUES (CAST(NULL as INT))) as t (v)
 ----
 NULL
 
+#
+# percentile_cont edge cases
+#
+
+statement ok
+CREATE TABLE tmp_percentile_cont(v1 INT, v2 DOUBLE);
+
+statement ok
+INSERT INTO tmp_percentile_cont VALUES (1, 'NaN'::Double), (2, 'NaN'::Double), (3, 'NaN'::Double);
+
+# ISSUE: https://github.com/apache/datafusion/issues/11871
+# Note `approx_median()` is using the same implementation as `approx_percentile_cont()`
+query R
+select APPROX_MEDIAN(v2) from tmp_percentile_cont WHERE v1 = 1;
+----
+NaN
+
+# ISSUE: https://github.com/apache/datafusion/issues/11870
+query R
+select APPROX_PERCENTILE_CONT(v2, 0.8) from tmp_percentile_cont;
+----
+NaN
+
+# ISSUE: https://github.com/apache/datafusion/issues/11869
+# Note: `approx_percentile_cont_with_weight()` uses the same implementation as `approx_percentile_cont()`
+query R
+SELECT APPROX_PERCENTILE_CONT_WITH_WEIGHT(
+    v2,
+    '+Inf'::Double,
+    0.9
+)
+FROM tmp_percentile_cont;
+----
+NaN
+
+statement ok
+DROP TABLE tmp_percentile_cont;
+
 # csv_query_cube_avg
 query TIR
 SELECT c1, c2, AVG(c3) FROM aggregate_test_100 GROUP BY CUBE (c1, c2) ORDER BY c1, c2
@@ -5553,4 +5591,4 @@ drop table employee_csv;
 query I??III?T
 select count(null), min(null), max(null), bit_and(NULL), bit_or(NULL), bit_xor(NULL), nth_value(NULL, 1), string_agg(NULL, ',');
 ----
-0 NULL NULL NULL NULL NULL NULL NULL
\ No newline at end of file
+0 NULL NULL NULL NULL NULL NULL NULL

From 7d6cd13d55d7b63c74a2cb058a55293c2dcd9797 Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Sun, 11 Aug 2024 07:11:45 -0700
Subject: [PATCH 266/357] fix: throw error on sub-day generate_series
 increments (#11907)

* fix: throw error on sub-day generate_series increments

* refactor: avoid `loop`

* Add a few more tests

* Update datafusion/functions-nested/src/range.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* refactor: tweak from feedback

* fix: fix dup rows

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions-nested/src/range.rs     | 76 ++++++++++++--------
 datafusion/sqllogictest/test_files/array.slt | 18 ++++-
 2 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs
index 269eaa5602305..5b7315719631e 100644
--- a/datafusion/functions-nested/src/range.rs
+++ b/datafusion/functions-nested/src/range.rs
@@ -18,13 +18,11 @@
 //! [`ScalarUDFImpl`] definitions for range and gen_series functions.
 
 use crate::utils::make_scalar_function;
-use arrow::array::{Array, ArrayRef, Int64Array, ListArray};
+use arrow::array::{Array, ArrayRef, Date32Builder, Int64Array, ListArray, ListBuilder};
 use arrow::datatypes::{DataType, Field};
 use arrow_array::types::{Date32Type, IntervalMonthDayNanoType};
-use arrow_array::{Date32Array, NullArray};
-use arrow_buffer::{
-    BooleanBufferBuilder, IntervalMonthDayNano, NullBuffer, OffsetBuffer,
-};
+use arrow_array::NullArray;
+use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
 use arrow_schema::DataType::{Date32, Int64, Interval, List};
 use arrow_schema::IntervalUnit::MonthDayNano;
 use datafusion_common::cast::{as_date32_array, as_int64_array, as_interval_mdn_array};
@@ -33,6 +31,7 @@ use datafusion_expr::{
     ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility,
 };
 use std::any::Any;
+use std::iter::from_fn;
 use std::sync::Arc;
 
 make_udf_expr_and_func!(
@@ -166,8 +165,11 @@ impl ScalarUDFImpl for GenSeries {
         match args[0].data_type() {
             Int64 => make_scalar_function(|args| gen_range_inner(args, true))(args),
             Date32 => make_scalar_function(|args| gen_range_date(args, true))(args),
-            _ => {
-                exec_err!("unsupported type for range")
+            dt => {
+                exec_err!(
+                    "unsupported type for range. Expected Int64 or Date32, got: {}",
+                    dt
+                )
             }
         }
     }
@@ -311,39 +313,53 @@ fn gen_range_date(args: &[ArrayRef], include_upper: bool) -> Result<ArrayRef> {
         Some(as_interval_mdn_array(&args[2])?),
     );
 
-    let mut values = vec![];
-    let mut offsets = vec![0];
+    // values are date32s
+    let values_builder = Date32Builder::new();
+    let mut list_builder = ListBuilder::new(values_builder);
+
     for (idx, stop) in stop_array.iter().enumerate() {
         let mut stop = stop.unwrap_or(0);
-        let start = start_array.as_ref().map(|x| x.value(idx)).unwrap_or(0);
-        let step = step_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(
-            IntervalMonthDayNano {
-                months: 0,
-                days: 0,
-                nanoseconds: 1,
-            },
-        );
+
+        let start = if let Some(start_array_values) = start_array {
+            start_array_values.value(idx)
+        } else {
+            list_builder.append_null();
+            continue;
+        };
+
+        let step = if let Some(step) = step_array {
+            step.value(idx)
+        } else {
+            list_builder.append_null();
+            continue;
+        };
+
         let (months, days, _) = IntervalMonthDayNanoType::to_parts(step);
+
+        if months == 0 && days == 0 {
+            return exec_err!("Cannot generate date range less than 1 day.");
+        }
+
         let neg = months < 0 || days < 0;
         if !include_upper {
             stop = Date32Type::subtract_month_day_nano(stop, step);
         }
         let mut new_date = start;
-        loop {
-            if neg && new_date < stop || !neg && new_date > stop {
-                break;
+
+        let values = from_fn(|| {
+            if (neg && new_date < stop) || (!neg && new_date > stop) {
+                None
+            } else {
+                let current_date = new_date;
+                new_date = Date32Type::add_month_day_nano(new_date, step);
+                Some(Some(current_date))
             }
-            values.push(new_date);
-            new_date = Date32Type::add_month_day_nano(new_date, step);
-        }
-        offsets.push(values.len() as i32);
+        });
+
+        list_builder.append_value(values);
     }
 
-    let arr = Arc::new(ListArray::try_new(
-        Arc::new(Field::new("item", Date32, true)),
-        OffsetBuffer::new(offsets.into()),
-        Arc::new(Date32Array::from(values)),
-        None,
-    )?);
+    let arr = Arc::new(list_builder.finish());
+
     Ok(arr)
 }
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 9e34db8f8dc24..b97ecced57e35 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -1971,7 +1971,7 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), co
 # `from` may be larger than `to` and `stride` is positive
 query ????
 select array_slice(a, -1, 2, 1), array_slice(a, -1, 2),
-       array_slice(a, 3, 2, 1), array_slice(a, 3, 2) 
+       array_slice(a, 3, 2, 1), array_slice(a, 3, 2)
   from (values ([1.0, 2.0, 3.0, 3.0]), ([4.0, 5.0, 3.0]), ([6.0])) t(a);
 ----
 [] [] [] []
@@ -5711,7 +5711,7 @@ select
 
 # Test range for other edge cases
 query ????????
-select 
+select
   range(9223372036854775807, 9223372036854775807, -1) as c1,
   range(9223372036854775807, 9223372036854775806, -1) as c2,
   range(9223372036854775807, 9223372036854775807, 1) as c3,
@@ -5787,6 +5787,9 @@ select range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR)
 ----
 []
 
+query error DataFusion error: Execution error: Cannot generate date range less than 1 day\.
+select range(DATE '1993-03-01', DATE '1993-03-01', INTERVAL '1' HOUR)
+
 query ?????????
 select generate_series(5),
        generate_series(2, 5),
@@ -5801,6 +5804,9 @@ select generate_series(5),
 ----
 [0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01]
 
+query error DataFusion error: Execution error: unsupported type for range. Expected Int64 or Date32, got: Timestamp\(Nanosecond, None\)
+select generate_series('2021-01-01'::timestamp, '2021-01-02'::timestamp, INTERVAL '1' HOUR);
+
 ## should return NULL
 query ?
 select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR);
@@ -5832,6 +5838,12 @@ select generate_series(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR)
 ----
 []
 
+query error DataFusion error: Execution error: Cannot generate date range less than 1 day.
+select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '1' HOUR)
+
+query error DataFusion error: Execution error: Cannot generate date range less than 1 day.
+select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '-1' HOUR)
+
 # Test generate_series with zero step
 query error DataFusion error: Execution error: step can't be 0 for function generate_series\(start \[, stop, step\]\)
 select generate_series(1, 1, 0);
@@ -5849,7 +5861,7 @@ select
 
 # Test generate_series for other edge cases
 query ????
-select 
+select
   generate_series(9223372036854775807, 9223372036854775807, -1) as c1,
   generate_series(9223372036854775807, 9223372036854775807, 1) as c2,
   generate_series(-9223372036854775808, -9223372036854775808, -1) as c3,

From fd237f8705b18fa089fdfb8dd5b04655ccb4d691 Mon Sep 17 00:00:00 2001
From: Chojan Shang <psiace@apache.org>
Date: Sun, 11 Aug 2024 22:39:14 +0800
Subject: [PATCH 267/357] Ingore shebang at top of file (#11927)

Signed-off-by: Chojan Shang <psiace@apache.org>
---
 datafusion-cli/src/exec.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index 178bce6f2fe65..db4242d971758 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -70,6 +70,9 @@ pub async fn exec_from_lines(
 
     for line in reader.lines() {
         match line {
+            Ok(line) if line.starts_with("#!") => {
+                continue;
+            }
             Ok(line) if line.starts_with("--") => {
                 continue;
             }

From 63ca7142fb8bda027e05eb0e89958ab3b9d49873 Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Mon, 12 Aug 2024 16:49:08 +0800
Subject: [PATCH 268/357] Parse Sqllogictest column types from physical schema
 (#11929)

* Parse Sqllogictest column types from physical schema

* Use execute_stream
---
 .../src/engines/datafusion_engine/runner.rs   | 10 +++-
 .../sqllogictest/test_files/coalesce.slt      | 12 ++---
 datafusion/sqllogictest/test_files/copy.slt   | 48 +++++++++----------
 .../sqllogictest/test_files/csv_files.slt     | 10 ++--
 datafusion/sqllogictest/test_files/expr.slt   | 30 ++++++------
 .../sqllogictest/test_files/functions.slt     | 34 ++++++-------
 .../sqllogictest/test_files/group_by.slt      |  8 ++--
 datafusion/sqllogictest/test_files/limit.slt  |  2 +-
 datafusion/sqllogictest/test_files/nvl.slt    |  2 +-
 .../sqllogictest/test_files/parquet.slt       |  8 ++--
 .../test_files/parquet_sorted_statistics.slt  |  6 +--
 datafusion/sqllogictest/test_files/regexp.slt |  6 +--
 datafusion/sqllogictest/test_files/scalar.slt |  8 ++--
 datafusion/sqllogictest/test_files/unnest.slt |  2 +-
 14 files changed, 96 insertions(+), 90 deletions(-)

diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs
index afd0a241ca5ef..5c24b49cfe868 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs
@@ -15,10 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::sync::Arc;
 use std::{path::PathBuf, time::Duration};
 
 use arrow::record_batch::RecordBatch;
 use async_trait::async_trait;
+use datafusion::physical_plan::common::collect;
+use datafusion::physical_plan::execute_stream;
 use datafusion::prelude::SessionContext;
 use log::info;
 use sqllogictest::DBOutput;
@@ -69,9 +72,12 @@ impl sqllogictest::AsyncDB for DataFusion {
 
 async fn run_query(ctx: &SessionContext, sql: impl Into<String>) -> Result<DFOutput> {
     let df = ctx.sql(sql.into().as_str()).await?;
+    let task_ctx = Arc::new(df.task_ctx());
+    let plan = df.create_physical_plan().await?;
 
-    let types = normalize::convert_schema_to_types(df.schema().fields());
-    let results: Vec<RecordBatch> = df.collect().await?;
+    let stream = execute_stream(plan, task_ctx)?;
+    let types = normalize::convert_schema_to_types(stream.schema().fields());
+    let results: Vec<RecordBatch> = collect(stream).await?;
     let rows = normalize::convert_batches(results)?;
 
     if rows.is_empty() && types.is_empty() {
diff --git a/datafusion/sqllogictest/test_files/coalesce.slt b/datafusion/sqllogictest/test_files/coalesce.slt
index d16b79734c62c..0e977666ccfdf 100644
--- a/datafusion/sqllogictest/test_files/coalesce.slt
+++ b/datafusion/sqllogictest/test_files/coalesce.slt
@@ -23,7 +23,7 @@ select coalesce(1, 2, 3);
 1
 
 # test with first null
-query ?T
+query IT
 select coalesce(null, 3, 2, 1), arrow_typeof(coalesce(null, 3, 2, 1));
 ----
 3 Int64
@@ -35,7 +35,7 @@ select coalesce(null, null);
 NULL
 
 # cast to float
-query IT
+query RT
 select
   coalesce(1, 2.0),
   arrow_typeof(coalesce(1, 2.0))
@@ -51,7 +51,7 @@ select
 ----
 2 Float64
 
-query IT
+query RT
 select
   coalesce(1, arrow_cast(2.0, 'Float32')),
   arrow_typeof(coalesce(1, arrow_cast(2.0, 'Float32')))
@@ -177,7 +177,7 @@ select
 2 Decimal256(22, 2)
 
 # coalesce string
-query T?
+query TT
 select
   coalesce('', 'test'),
   coalesce(null, 'test');
@@ -246,7 +246,7 @@ drop table test1
 statement ok
 create table t(c varchar) as values ('a'), (null);
 
-query TT
+query ?T
 select 
   coalesce(c, arrow_cast('b', 'Dictionary(Int32, Utf8)')),
   arrow_typeof(coalesce(c, arrow_cast('b', 'Dictionary(Int32, Utf8)')))
@@ -295,7 +295,7 @@ statement ok
 drop table t;
 
 # test dict(int32, int8)
-query I
+query ?
 select coalesce(34, arrow_cast(123, 'Dictionary(Int32, Int8)'));
 ----
 34
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index ff7040926caa8..ebb3ca2173b83 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -20,13 +20,13 @@ statement ok
 create table source_table(col1 integer, col2 varchar) as values (1, 'Foo'), (2, 'Bar');
 
 # Copy to directory as multiple files
-query IT
+query I
 COPY source_table TO 'test_files/scratch/copy/table/' STORED AS parquet OPTIONS ('format.compression' 'zstd(10)');
 ----
 2
 
 # Copy to directory as partitioned files
-query IT
+query I
 COPY source_table TO 'test_files/scratch/copy/partitioned_table1/' STORED AS parquet PARTITIONED BY (col2) OPTIONS ('format.compression' 'zstd(10)');
 ----
 2
@@ -53,7 +53,7 @@ select * from validate_partitioned_parquet_bar order by col1;
 2
 
 # Copy to directory as partitioned files
-query ITT
+query I
 COPY (values (1, 'a', 'x'), (2, 'b', 'y'), (3, 'c', 'z')) TO 'test_files/scratch/copy/partitioned_table2/' STORED AS parquet PARTITIONED BY (column2, column3)
 OPTIONS ('format.compression' 'zstd(10)');
 ----
@@ -81,7 +81,7 @@ select * from validate_partitioned_parquet_a_x order by column1;
 1
 
 # Copy to directory as partitioned files
-query TTT
+query I
 COPY (values ('1', 'a', 'x'), ('2', 'b', 'y'), ('3', 'c', 'z')) TO 'test_files/scratch/copy/partitioned_table3/' STORED AS parquet PARTITIONED BY (column1, column3)
 OPTIONS ('format.compression' 'zstd(10)');
 ----
@@ -167,7 +167,7 @@ physical_plan
 02)--MemoryExec: partitions=1, partition_sizes=[1]
 
 # Copy to directory as partitioned files with keep_partition_by_columns enabled
-query TT
+query I
 COPY (values ('1', 'a'), ('2', 'b'), ('3', 'c')) TO 'test_files/scratch/copy/partitioned_table4/' STORED AS parquet PARTITIONED BY (column1)
 OPTIONS (execution.keep_partition_by_columns true);
 ----
@@ -184,7 +184,7 @@ select column1, column2 from validate_partitioned_parquet4 order by column1,colu
 1 a
 
 # Copy more files to directory via query
-query IT
+query I
 COPY (select * from source_table UNION ALL select * from source_table) to 'test_files/scratch/copy/table/' STORED AS PARQUET;
 ----
 4
@@ -203,7 +203,7 @@ select * from validate_parquet;
 1 Foo
 2 Bar
 
-query ?
+query I
 copy (values (struct(timestamp '2021-01-01 01:00:01', 1)), (struct(timestamp '2022-01-01 01:00:01', 2)), 
 (struct(timestamp '2023-01-03 01:00:01', 3)), (struct(timestamp '2024-01-01 01:00:01', 4)))
 to 'test_files/scratch/copy/table_nested2/' STORED AS PARQUET;
@@ -221,7 +221,7 @@ select * from validate_parquet_nested2;
 {c0: 2023-01-03T01:00:01, c1: 3}
 {c0: 2024-01-01T01:00:01, c1: 4}
 
-query ??
+query I
 COPY 
 (values (struct ('foo', (struct ('foo', make_array(struct('a',1), struct('b',2))))), make_array(timestamp '2023-01-01 01:00:01',timestamp '2023-01-01 01:00:01')), 
 (struct('bar', (struct ('foo', make_array(struct('aa',10), struct('bb',20))))), make_array(timestamp '2024-01-01 01:00:01', timestamp '2024-01-01 01:00:01'))) 
@@ -239,7 +239,7 @@ select * from validate_parquet_nested;
 {c0: foo, c1: {c0: foo, c1: [{c0: a, c1: 1}, {c0: b, c1: 2}]}} [2023-01-01T01:00:01, 2023-01-01T01:00:01]
 {c0: bar, c1: {c0: foo, c1: [{c0: aa, c1: 10}, {c0: bb, c1: 20}]}} [2024-01-01T01:00:01, 2024-01-01T01:00:01]
 
-query ?
+query I
 copy (values ([struct('foo', 1), struct('bar', 2)])) 
 to 'test_files/scratch/copy/array_of_struct/'
 STORED AS PARQUET;
@@ -255,7 +255,7 @@ select * from validate_array_of_struct;
 ----
 [{c0: foo, c1: 1}, {c0: bar, c1: 2}]
 
-query ?
+query I
 copy (values (struct('foo', [1,2,3], struct('bar', [2,3,4])))) 
 to 'test_files/scratch/copy/struct_with_array/' STORED AS PARQUET;
 ----
@@ -272,7 +272,7 @@ select * from validate_struct_with_array;
 
 
 # Copy parquet with all supported statement overrides
-query IT
+query I
 COPY source_table
 TO 'test_files/scratch/copy/table_with_options/'
 STORED AS PARQUET
@@ -378,7 +378,7 @@ select * from validate_parquet_with_options;
 2 Bar
 
 # Copy from table to single file
-query IT
+query I
 COPY source_table to 'test_files/scratch/copy/table.parquet';
 ----
 2
@@ -394,7 +394,7 @@ select * from validate_parquet_single;
 2 Bar
 
 # copy from table to folder of compressed json files
-query IT
+query I
 COPY source_table  to 'test_files/scratch/copy/table_json_gz' STORED AS JSON OPTIONS ('format.compression' gzip);
 ----
 2
@@ -410,7 +410,7 @@ select * from validate_json_gz;
 2 Bar
 
 # copy from table to folder of compressed csv files
-query IT
+query I
 COPY source_table  to 'test_files/scratch/copy/table_csv' STORED AS CSV OPTIONS ('format.has_header' false, 'format.compression' gzip);
 ----
 2
@@ -426,7 +426,7 @@ select * from validate_csv;
 2 Bar
 
 # Copy from table to single csv
-query IT
+query I
 COPY source_table  to 'test_files/scratch/copy/table.csv';
 ----
 2
@@ -442,7 +442,7 @@ select * from validate_single_csv;
 2 Bar
 
 # Copy from table to folder of json
-query IT
+query I
 COPY source_table to 'test_files/scratch/copy/table_json' STORED AS JSON;
 ----
 2
@@ -458,7 +458,7 @@ select * from validate_json;
 2 Bar
 
 # Copy from table to single json file
-query IT
+query I
 COPY source_table  to 'test_files/scratch/copy/table.json' STORED AS JSON ;
 ----
 2
@@ -474,7 +474,7 @@ select * from validate_single_json;
 2 Bar
 
 # COPY csv files with all options set
-query IT
+query I
 COPY source_table
 to 'test_files/scratch/copy/table_csv_with_options'
 STORED AS CSV OPTIONS (
@@ -499,7 +499,7 @@ select * from validate_csv_with_options;
 2;Bar
 
 # Copy from table to single arrow file
-query IT
+query I
 COPY source_table to 'test_files/scratch/copy/table.arrow' STORED AS ARROW;
 ----
 2
@@ -517,7 +517,7 @@ select * from validate_arrow_file;
 2 Bar
 
 # Copy from dict encoded values to single arrow file
-query T?
+query I
 COPY (values 
 ('c', arrow_cast('foo', 'Dictionary(Int32, Utf8)')), ('d', arrow_cast('bar', 'Dictionary(Int32, Utf8)'))) 
 to 'test_files/scratch/copy/table_dict.arrow' STORED AS ARROW;
@@ -538,7 +538,7 @@ d bar
 
 
 # Copy from table to folder of json
-query IT
+query I
 COPY source_table to 'test_files/scratch/copy/table_arrow' STORED AS ARROW;
 ----
 2
@@ -556,7 +556,7 @@ select * from validate_arrow;
 # Format Options Support without the 'format.' prefix
 
 # Copy with format options for Parquet without the 'format.' prefix
-query IT
+query I
 COPY source_table TO 'test_files/scratch/copy/format_table.parquet'
 OPTIONS (
     compression snappy,
@@ -566,14 +566,14 @@ OPTIONS (
 2
 
 # Copy with format options for JSON without the 'format.' prefix
-query IT
+query I
 COPY source_table  to 'test_files/scratch/copy/format_table'
 STORED AS JSON OPTIONS (compression gzip);
 ----
 2
 
 # Copy with format options for CSV without the 'format.' prefix
-query IT
+query I
 COPY source_table to 'test_files/scratch/copy/format_table.csv'
 OPTIONS (
     has_header false,
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
index f7f5aa54dd0d5..3fb9a6f20c24a 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -115,14 +115,14 @@ CREATE TABLE src_table_2 (
 (7, 'ggg', 700, 2),
 (8, 'hhh', 800, 2);
 
-query ITII
+query I
 COPY  src_table_1 TO 'test_files/scratch/csv_files/csv_partitions/1.csv'
 STORED AS CSV;
 ----
 4
 
 
-query ITII
+query I
 COPY  src_table_2 TO 'test_files/scratch/csv_files/csv_partitions/2.csv'
 STORED AS CSV;
 ----
@@ -175,7 +175,7 @@ CREATE TABLE table_with_necessary_quoting (
 (4, 'h|h|h');
 
 # quote is required because `|` is delimiter and part of the data
-query IT
+query I
 COPY table_with_necessary_quoting TO 'test_files/scratch/csv_files/table_with_necessary_quoting.csv'
 STORED AS csv
 OPTIONS ('format.quote' '~',
@@ -247,7 +247,7 @@ id2 "value2"
 id3 "value3"
 
 # ensure that double quote option is used when writing to csv
-query TT
+query I
 COPY csv_with_double_quote TO 'test_files/scratch/csv_files/table_with_double_quotes.csv'
 STORED AS csv
 OPTIONS ('format.double_quote' 'true');
@@ -271,7 +271,7 @@ id2 "value2"
 id3 "value3"
 
 # ensure when double quote option is disabled that quotes are escaped instead
-query TT
+query I
 COPY csv_with_double_quote TO 'test_files/scratch/csv_files/table_with_escaped_quotes.csv'
 STORED AS csv
 OPTIONS ('format.double_quote' 'false', 'format.escape' '#');
diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt
index 1d5f9ba23d580..3c3b0631e3ff7 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -22,7 +22,7 @@ SELECT true, false, false = false, true = false
 true false true false
 
 # test_mathematical_expressions_with_null
-query RRRRRRRRRRRRRRRRRR?RRRRRRRIRRRRRRBB
+query RRRRRRRRRRRRRRRRRR?RRRRRIIIRRRRRRBB
 SELECT
     sqrt(NULL),
     cbrt(NULL),
@@ -365,7 +365,7 @@ SELECT bit_length('josé')
 ----
 40
 
-query ?
+query I
 SELECT bit_length(NULL)
 ----
 NULL
@@ -395,7 +395,7 @@ SELECT btrim('\nxyxtrimyyx\n', 'xyz\n')
 ----
 trim
 
-query ?
+query T
 SELECT btrim(NULL, 'xyz')
 ----
 NULL
@@ -476,7 +476,7 @@ SELECT initcap('hi THOMAS')
 ----
 Hi Thomas
 
-query ?
+query T
 SELECT initcap(NULL)
 ----
 NULL
@@ -491,7 +491,7 @@ SELECT lower('TOM')
 ----
 tom
 
-query ?
+query T
 SELECT lower(NULL)
 ----
 NULL
@@ -511,7 +511,7 @@ SELECT ltrim('zzzytest', 'xyz')
 ----
 test
 
-query ?
+query T
 SELECT ltrim(NULL, 'xyz')
 ----
 NULL
@@ -531,7 +531,7 @@ SELECT octet_length('josé')
 ----
 5
 
-query ?
+query I
 SELECT octet_length(NULL)
 ----
 NULL
@@ -551,7 +551,7 @@ SELECT repeat('Pg', CAST(NULL AS INT))
 ----
 NULL
 
-query ?
+query T
 SELECT repeat(NULL, 4)
 ----
 NULL
@@ -576,7 +576,7 @@ SELECT replace('abcdefabcdef', NULL, 'XX')
 ----
 NULL
 
-query ?
+query T
 SELECT replace(NULL, 'cd', 'XX')
 ----
 NULL
@@ -596,7 +596,7 @@ SELECT rtrim('testxxzx', 'xyz')
 ----
 test
 
-query ?
+query T
 SELECT rtrim(NULL, 'xyz')
 ----
 NULL
@@ -611,7 +611,7 @@ SELECT split_part('abc~@~def~@~ghi', '~@~', 20)
 ----
 (empty)
 
-query ?
+query T
 SELECT split_part(NULL, '~@~', 20)
 ----
 NULL
@@ -788,7 +788,7 @@ SELECT upper('tom')
 ----
 TOM
 
-query ?
+query T
 SELECT upper(NULL)
 ----
 NULL
@@ -1774,7 +1774,7 @@ SELECT arrow_cast(decode(arrow_cast('746f6d', 'LargeBinary'),'hex'), 'Utf8');
 ----
 tom
 
-query ?
+query T
 SELECT encode(NULL,'base64');
 ----
 NULL
@@ -1784,7 +1784,7 @@ SELECT decode(NULL,'base64');
 ----
 NULL
 
-query ?
+query T
 SELECT encode(NULL,'hex');
 ----
 NULL
@@ -1829,7 +1829,7 @@ SELECT md5('');
 ----
 d41d8cd98f00b204e9800998ecf8427e
 
-query ?
+query T
 SELECT md5(NULL);
 ----
 NULL
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index c3dd791f6ca8a..3255ddccdb81f 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -33,7 +33,7 @@ SELECT char_length('josé')
 ----
 4
 
-query ?
+query I
 SELECT char_length(NULL)
 ----
 NULL
@@ -53,7 +53,7 @@ SELECT character_length('josé')
 ----
 4
 
-query ?
+query I
 SELECT character_length(NULL)
 ----
 NULL
@@ -93,12 +93,12 @@ SELECT left('abcde', CAST(NULL AS INT))
 ----
 NULL
 
-query ?
+query T
 SELECT left(NULL, 2)
 ----
 NULL
 
-query ?
+query T
 SELECT left(NULL, CAST(NULL AS INT))
 ----
 NULL
@@ -128,7 +128,7 @@ SELECT length(arrow_cast('josé', 'Dictionary(Int32, Utf8)'))
 ----
 4
 
-query ?
+query I
 SELECT length(NULL)
 ----
 NULL
@@ -193,12 +193,12 @@ SELECT lpad('xyxhi', 3)
 ----
 xyx
 
-query ?
+query T
 SELECT lpad(NULL, 0)
 ----
 NULL
 
-query ?
+query T
 SELECT lpad(NULL, 5, 'xy')
 ----
 NULL
@@ -218,7 +218,7 @@ SELECT reverse('loẅks')
 ----
 sk̈wol
 
-query ?
+query T
 SELECT reverse(NULL)
 ----
 NULL
@@ -258,12 +258,12 @@ SELECT right('abcde', CAST(NULL AS INT))
 ----
 NULL
 
-query ?
+query T
 SELECT right(NULL, 2)
 ----
 NULL
 
-query ?
+query T
 SELECT right(NULL, CAST(NULL AS INT))
 ----
 NULL
@@ -348,7 +348,7 @@ SELECT strpos('joséésoj', 'abc')
 ----
 0
 
-query ?
+query I
 SELECT strpos(NULL, 'abc')
 ----
 NULL
@@ -429,7 +429,7 @@ SELECT translate(arrow_cast('12345', 'Dictionary(Int32, Utf8)'), '143', 'ax')
 ----
 a2x5
 
-query ?
+query T
 SELECT translate(NULL, '143', 'ax')
 ----
 NULL
@@ -923,12 +923,12 @@ SELECT levenshtein('kitten', NULL)
 ----
 NULL
 
-query ?
+query I
 SELECT levenshtein(NULL, 'sitting')
 ----
 NULL
 
-query ?
+query I
 SELECT levenshtein(NULL, NULL)
 ----
 NULL
@@ -1015,7 +1015,7 @@ arrow 1 arrow
 arrow 2 arrow
 
 # Test substring_index with NULL values
-query ?TT?
+query TTTT
 SELECT
   substring_index(NULL, '.', 1),
   substring_index('arrow.apache.org', NULL, 1),
@@ -1094,7 +1094,7 @@ SELECT find_in_set('', '')
 ----
 1
 
-query ?
+query I
 SELECT find_in_set(NULL, 'a,b,c,d')
 ----
 NULL
@@ -1105,7 +1105,7 @@ SELECT find_in_set('a', NULL)
 NULL
 
 
-query ?
+query I
 SELECT find_in_set(NULL, NULL)
 ----
 NULL
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index a4a886c75a776..5571315e2accd 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -4502,28 +4502,28 @@ CREATE TABLE src_table (
 ('2020-12-19T00:00:00.00Z', 9);
 
 # Use src_table to create a partitioned file
-query PI
+query I
 COPY (SELECT * FROM src_table)
 TO 'test_files/scratch/group_by/timestamp_table/0.csv'
 STORED AS CSV;
 ----
 10
 
-query PI
+query I
 COPY (SELECT * FROM src_table)
 TO 'test_files/scratch/group_by/timestamp_table/1.csv'
 STORED AS CSV;
 ----
 10
 
-query PI
+query I
 COPY (SELECT * FROM src_table)
 TO 'test_files/scratch/group_by/timestamp_table/2.csv'
 STORED AS CSV;
 ----
 10
 
-query PI
+query I
 COPY (SELECT * FROM src_table)
 TO 'test_files/scratch/group_by/timestamp_table/3.csv'
 STORED AS CSV;
diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt
index 4cdd40ac8c34c..439df7fede511 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -518,7 +518,7 @@ drop table aggregate_test_100;
 ## Test limit pushdown in StreamingTableExec
 
 ## Create sorted table with 5 rows
-query IT
+query I
 COPY (select * from (values
    (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')
 )) TO 'test_files/scratch/limit/data.csv' STORED AS CSV;
diff --git a/datafusion/sqllogictest/test_files/nvl.slt b/datafusion/sqllogictest/test_files/nvl.slt
index c77214cc302a1..81e79e1eb5b06 100644
--- a/datafusion/sqllogictest/test_files/nvl.slt
+++ b/datafusion/sqllogictest/test_files/nvl.slt
@@ -114,7 +114,7 @@ SELECT NVL(1, 3);
 ----
 1
 
-query ?
+query I
 SELECT NVL(NULL, NULL);
 ----
 NULL
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index 3342f85c81417..a7ca0384d3f43 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -42,7 +42,7 @@ CREATE TABLE src_table (
 # Setup 2 files, i.e., as many as there are partitions:
 
 # File 1:
-query ITID
+query I
 COPY (SELECT * FROM src_table LIMIT 3)
 TO 'test_files/scratch/parquet/test_table/0.parquet'
 STORED AS PARQUET;
@@ -50,7 +50,7 @@ STORED AS PARQUET;
 3
 
 # File 2:
-query ITID
+query I
 COPY (SELECT * FROM src_table WHERE int_col > 3 LIMIT 3)
 TO 'test_files/scratch/parquet/test_table/1.parquet'
 STORED AS PARQUET;
@@ -123,7 +123,7 @@ physical_plan
 02)--ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/1.parquet]]}, projection=[int_col, string_col], output_ordering=[string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST]
 
 # Add another file to the directory underlying test_table
-query ITID
+query I
 COPY (SELECT * FROM src_table WHERE int_col > 6 LIMIT 3)
 TO 'test_files/scratch/parquet/test_table/2.parquet'
 STORED AS PARQUET;
@@ -275,7 +275,7 @@ SELECT COUNT(*) FROM timestamp_with_tz;
 
 # Test config listing_table_ignore_subdirectory:
 
-query ITID
+query I
 COPY (SELECT * FROM src_table WHERE int_col > 6 LIMIT 3)
 TO 'test_files/scratch/parquet/test_table/subdir/3.parquet'
 STORED AS PARQUET;
diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
index f7a81f08456f3..b68d4f52d21c8 100644
--- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
+++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
@@ -56,7 +56,7 @@ CREATE TABLE src_table (
 # Setup 3 files, in particular more files than there are partitions
 
 # File 1:
-query IITIDII
+query I
 COPY (SELECT * FROM src_table ORDER BY int_col LIMIT 3)
 TO 'test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet'
 STORED AS PARQUET;
@@ -64,7 +64,7 @@ STORED AS PARQUET;
 3
 
 # File 2:
-query IITIDII
+query I
 COPY (SELECT * FROM src_table WHERE int_col > 3 ORDER BY int_col LIMIT 3)
 TO 'test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet'
 STORED AS PARQUET;
@@ -72,7 +72,7 @@ STORED AS PARQUET;
 3
 
 # Add another file to the directory underlying test_table
-query IITIDII
+query I
 COPY (SELECT * FROM src_table WHERE int_col > 6 ORDER BY int_col LIMIT 3)
 TO 'test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet'
 STORED AS PARQUET;
diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt
index aa99a54c26eea..149ad7f6fdcd2 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -94,7 +94,7 @@ SELECT regexp_like('aa', '.*-(\d)');
 ----
 false
 
-query ?
+query B
 SELECT regexp_like(NULL, '.*-(\d)');
 ----
 NULL
@@ -104,7 +104,7 @@ SELECT regexp_like('aaa-0', NULL);
 ----
 NULL
 
-query ?
+query B
 SELECT regexp_like(null, '.*-(\d)');
 ----
 NULL
@@ -294,7 +294,7 @@ SELECT regexp_replace('Thomas', '.[mN]a.', 'M');
 ----
 ThM
 
-query ?
+query T
 SELECT regexp_replace(NULL, 'b(..)', 'X\\1Y', 'g');
 ----
 NULL
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index 188a2c5863e6f..6eed72e914bd0 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -706,7 +706,7 @@ select power(2, 0), power(2, 1), power(2, 2);
 1 2 4
 
 # power scalar nulls
-query R rowsort
+query I rowsort
 select power(null, 64);
 ----
 NULL
@@ -718,7 +718,7 @@ select power(2, null);
 NULL
 
 # power scalar nulls #2
-query R rowsort
+query I rowsort
 select power(null, null);
 ----
 NULL
@@ -1720,7 +1720,7 @@ CREATE TABLE test(
 (-14, -14, -14.5, -14.5),
 (NULL, NULL, NULL, NULL);
 
-query RRRRIR rowsort
+query IRRRIR rowsort
 SELECT power(i32, exp_i) as power_i32,
        power(i64, exp_f) as power_i64,
        pow(f32, exp_i) as power_f32,
@@ -1895,7 +1895,7 @@ select 100000 where position('legend' in 'league of legend') = 11;
 100000
 
 # test null
-query ?
+query I
 select position(null in null)
 ----
 NULL
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 4957011b8ba23..afa576d127468 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -235,7 +235,7 @@ NULL 10 NULL
 NULL NULL 17
 NULL NULL 18
 
-query IIII
+query IIIT
 select 
     unnest(column1), unnest(column2) + 2, 
     column3 * 10, unnest(array_remove(column1, '4')) 

From f2685d3a01e5f5a3f87b5186512d64078a0b89ce Mon Sep 17 00:00:00 2001
From: Xin Li <33629085+xinlifoobar@users.noreply.github.com>
Date: Mon, 12 Aug 2024 18:31:28 +0800
Subject: [PATCH 269/357] Update INITCAP scalar function to support Utf8View
 (#11888)

* Update INITCAP scalar function to support Utf8View

* Fix comments

* Fix comments
---
 datafusion/functions/src/string/initcap.rs    | 92 +++++++++++++++----
 .../sqllogictest/test_files/string_view.slt   | 56 ++++++++---
 2 files changed, 116 insertions(+), 32 deletions(-)

diff --git a/datafusion/functions/src/string/initcap.rs b/datafusion/functions/src/string/initcap.rs
index 864179d130fdc..4e1eb213ef57d 100644
--- a/datafusion/functions/src/string/initcap.rs
+++ b/datafusion/functions/src/string/initcap.rs
@@ -18,10 +18,10 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::as_generic_string_array;
+use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::{ColumnarValue, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
@@ -45,7 +45,7 @@ impl InitcapFunc {
         Self {
             signature: Signature::uniform(
                 1,
-                vec![Utf8, LargeUtf8],
+                vec![Utf8, LargeUtf8, Utf8View],
                 Volatility::Immutable,
             ),
         }
@@ -73,6 +73,7 @@ impl ScalarUDFImpl for InitcapFunc {
         match args[0].data_type() {
             DataType::Utf8 => make_scalar_function(initcap::<i32>, vec![])(args),
             DataType::LargeUtf8 => make_scalar_function(initcap::<i64>, vec![])(args),
+            DataType::Utf8View => make_scalar_function(initcap_utf8view, vec![])(args),
             other => {
                 exec_err!("Unsupported data type {other:?} for function initcap")
             }
@@ -88,28 +89,41 @@ fn initcap<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     // first map is the iterator, second is for the `Option<_>`
     let result = string_array
         .iter()
-        .map(|string| {
-            string.map(|string: &str| {
-                let mut char_vector = Vec::<char>::new();
-                let mut previous_character_letter_or_number = false;
-                for c in string.chars() {
-                    if previous_character_letter_or_number {
-                        char_vector.push(c.to_ascii_lowercase());
-                    } else {
-                        char_vector.push(c.to_ascii_uppercase());
-                    }
-                    previous_character_letter_or_number = c.is_ascii_uppercase()
-                        || c.is_ascii_lowercase()
-                        || c.is_ascii_digit();
-                }
-                char_vector.iter().collect::<String>()
-            })
-        })
+        .map(initcap_string)
         .collect::<GenericStringArray<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
+fn initcap_utf8view(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_view_array = as_string_view_array(&args[0])?;
+
+    let result = string_view_array
+        .iter()
+        .map(initcap_string)
+        .collect::<StringArray>();
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+fn initcap_string(string: Option<&str>) -> Option<String> {
+    let mut char_vector = Vec::<char>::new();
+    string.map(|string: &str| {
+        char_vector.clear();
+        let mut previous_character_letter_or_number = false;
+        for c in string.chars() {
+            if previous_character_letter_or_number {
+                char_vector.push(c.to_ascii_lowercase());
+            } else {
+                char_vector.push(c.to_ascii_uppercase());
+            }
+            previous_character_letter_or_number =
+                c.is_ascii_uppercase() || c.is_ascii_lowercase() || c.is_ascii_digit();
+        }
+        char_vector.iter().collect::<String>()
+    })
+}
+
 #[cfg(test)]
 mod tests {
     use crate::string::initcap::InitcapFunc;
@@ -153,6 +167,44 @@ mod tests {
             Utf8,
             StringArray
         );
+        test_function!(
+            InitcapFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                "hi THOMAS".to_string()
+            )))],
+            Ok(Some("Hi Thomas")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            InitcapFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                "hi THOMAS wIth M0re ThAN 12 ChaRs".to_string()
+            )))],
+            Ok(Some("Hi Thomas With M0re Than 12 Chars")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            InitcapFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                "".to_string()
+            )))],
+            Ok(Some("")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            InitcapFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(None))],
+            Ok(None),
+            &str,
+            Utf8,
+            StringArray
+        );
 
         Ok(())
     }
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index e7166690580f9..a61e3830fd082 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -425,6 +425,50 @@ logical_plan
 01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1, starts_with(test.column1_utf8view, Utf8View("")) AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
 02)--TableScan: test projection=[column1_utf8view]
 
+### Initcap
+
+query TT
+EXPLAIN SELECT
+  INITCAP(column1_utf8view) as c
+FROM test;
+----
+logical_plan
+01)Projection: initcap(test.column1_utf8view) AS c
+02)--TableScan: test projection=[column1_utf8view]
+
+# Create a table with lowercase strings
+statement ok
+CREATE TABLE test_lowercase AS SELECT
+  lower(column1_utf8) as column1_utf8_lower,
+  lower(column1_large_utf8) as column1_large_utf8_lower,
+  lower(column1_utf8view) as column1_utf8view_lower
+FROM test;
+
+# Test INITCAP with utf8view, utf8, and largeutf8
+# Should not cast anything
+query TT
+EXPLAIN SELECT
+  INITCAP(column1_utf8view_lower) as c1,
+  INITCAP(column1_utf8_lower) as c2,
+  INITCAP(column1_large_utf8_lower) as c3
+FROM test_lowercase;
+----
+logical_plan
+01)Projection: initcap(test_lowercase.column1_utf8view_lower) AS c1, initcap(test_lowercase.column1_utf8_lower) AS c2, initcap(test_lowercase.column1_large_utf8_lower) AS c3
+02)--TableScan: test_lowercase projection=[column1_utf8_lower, column1_large_utf8_lower, column1_utf8view_lower]
+
+query TTT
+SELECT
+  INITCAP(column1_utf8view_lower) as c1,
+  INITCAP(column1_utf8_lower) as c2,
+  INITCAP(column1_large_utf8_lower) as c3
+FROM test_lowercase;
+----
+Andrew Andrew Andrew    
+Xiangpeng Xiangpeng Xiangpeng
+Raphael Raphael Raphael
+NULL NULL NULL
+
 # Ensure string functions use native StringView implementation
 # and do not fall back to Utf8 or LargeUtf8
 # Should see no casts to Utf8 in the plans below
@@ -586,18 +630,6 @@ logical_plan
 02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view
 03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
 
-
-## Ensure no casts for INITCAP
-## TODO https://github.com/apache/datafusion/issues/11853
-query TT
-EXPLAIN SELECT
-  INITCAP(column1_utf8view) as c
-FROM test;
-----
-logical_plan
-01)Projection: initcap(CAST(test.column1_utf8view AS Utf8)) AS c
-02)--TableScan: test projection=[column1_utf8view]
-
 ## Ensure no casts for LEVENSHTEIN
 ## TODO https://github.com/apache/datafusion/issues/11854
 query TT

From 5251dc91ae18017dc205ee92bf32efaa95fd76b9 Mon Sep 17 00:00:00 2001
From: Chojan Shang <psiace@apache.org>
Date: Mon, 12 Aug 2024 03:31:42 -0700
Subject: [PATCH 270/357] Implement native support StringView for Octet Length
 (#11906)

* Implement native support StringView for Octet Length

Signed-off-by: Chojan Shang <psiace@apache.org>

* Minor fix

Signed-off-by: Chojan Shang <psiace@apache.org>

---------

Signed-off-by: Chojan Shang <psiace@apache.org>
---
 .../functions/src/string/octet_length.rs      | 35 ++++++++++++++++++-
 .../sqllogictest/test_files/string_view.slt   |  3 +-
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs
index 12980fab1f118..f792914d862e4 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -43,7 +43,7 @@ impl OctetLengthFunc {
         Self {
             signature: Signature::uniform(
                 1,
-                vec![Utf8, LargeUtf8],
+                vec![Utf8, LargeUtf8, Utf8View],
                 Volatility::Immutable,
             ),
         }
@@ -84,6 +84,9 @@ impl ScalarUDFImpl for OctetLengthFunc {
                 ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
                     ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)),
                 )),
+                ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
+                    ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)),
+                )),
                 _ => unreachable!(),
             },
         }
@@ -176,6 +179,36 @@ mod tests {
             Int32,
             Int32Array
         );
+        test_function!(
+            OctetLengthFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                String::from("joséjoséjoséjosé")
+            )))],
+            Ok(Some(20)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            OctetLengthFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                String::from("josé")
+            )))],
+            Ok(Some(5)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            OctetLengthFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                String::from("")
+            )))],
+            Ok(Some(0)),
+            i32,
+            Int32,
+            Int32Array
+        );
 
         Ok(())
     }
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index a61e3830fd082..7ceccf1af0e34 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -678,14 +678,13 @@ logical_plan
 
 
 ## Ensure no casts for OCTET_LENGTH
-## TODO https://github.com/apache/datafusion/issues/11858
 query TT
 EXPLAIN SELECT
   OCTET_LENGTH(column1_utf8view) as c1
 FROM test;
 ----
 logical_plan
-01)Projection: octet_length(CAST(test.column1_utf8view AS Utf8)) AS c1
+01)Projection: octet_length(test.column1_utf8view) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for OVERLAY

From 032b9c9c2d4dbbe7c65cc8122fb23258860ce148 Mon Sep 17 00:00:00 2001
From: Huaijin <haohuaijin@gmail.com>
Date: Mon, 12 Aug 2024 18:33:58 +0800
Subject: [PATCH 271/357] fix: impl ordering for serialization/deserialization
 for AggregateUdf (#11926)

* fix: support ordering and pencentile function ser/der

* add more test case
---
 .../core/src/physical_optimizer/test_utils.rs |  1 -
 datafusion/core/src/physical_planner.rs       |  1 -
 .../core/tests/fuzz_cases/window_fuzz.rs      |  4 --
 .../src/windows/bounded_window_agg_exec.rs    |  6 +-
 datafusion/physical-plan/src/windows/mod.rs   |  6 +-
 .../proto/src/physical_plan/from_proto.rs     |  3 -
 datafusion/proto/src/physical_plan/mod.rs     |  6 +-
 .../tests/cases/roundtrip_physical_plan.rs    | 66 +++++++++++++++++++
 8 files changed, 71 insertions(+), 22 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs
index 55a0fa8145527..90853c347672d 100644
--- a/datafusion/core/src/physical_optimizer/test_utils.rs
+++ b/datafusion/core/src/physical_optimizer/test_utils.rs
@@ -251,7 +251,6 @@ pub fn bounded_window_exec(
                 "count".to_owned(),
                 &[col(col_name, &schema).unwrap()],
                 &[],
-                &[],
                 &sort_exprs,
                 Arc::new(WindowFrame::new(Some(false))),
                 schema.as_ref(),
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 7eb468f56eeb1..9cc2f253f8dad 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -1510,7 +1510,6 @@ pub fn create_window_expr_with_name(
                 fun,
                 name,
                 &physical_args,
-                args,
                 &partition_by,
                 &order_by,
                 window_frame,
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index 813862c4cc2f5..d75d8e43370d1 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -253,7 +253,6 @@ async fn bounded_window_causal_non_causal() -> Result<()> {
 
     let partitionby_exprs = vec![];
     let orderby_exprs = vec![];
-    let logical_exprs = vec![];
     // Window frame starts with "UNBOUNDED PRECEDING":
     let start_bound = WindowFrameBound::Preceding(ScalarValue::UInt64(None));
 
@@ -285,7 +284,6 @@ async fn bounded_window_causal_non_causal() -> Result<()> {
                     &window_fn,
                     fn_name.to_string(),
                     &args,
-                    &logical_exprs,
                     &partitionby_exprs,
                     &orderby_exprs,
                     Arc::new(window_frame),
@@ -674,7 +672,6 @@ async fn run_window_test(
             &window_fn,
             fn_name.clone(),
             &args,
-            &[],
             &partitionby_exprs,
             &orderby_exprs,
             Arc::new(window_frame.clone()),
@@ -693,7 +690,6 @@ async fn run_window_test(
             &window_fn,
             fn_name,
             &args,
-            &[],
             &partitionby_exprs,
             &orderby_exprs,
             Arc::new(window_frame.clone()),
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 6311107f7b580..29ead35895fee 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -1196,7 +1196,7 @@ mod tests {
         RecordBatchStream, SendableRecordBatchStream, TaskContext,
     };
     use datafusion_expr::{
-        Expr, WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
+        WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
     };
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_physical_expr::expressions::{col, Column, NthValue};
@@ -1303,10 +1303,7 @@ mod tests {
         let window_fn = WindowFunctionDefinition::AggregateUDF(count_udaf());
         let col_expr =
             Arc::new(Column::new(schema.fields[0].name(), 0)) as Arc<dyn PhysicalExpr>;
-        let log_expr =
-            Expr::Column(datafusion_common::Column::from(schema.fields[0].name()));
         let args = vec![col_expr];
-        let log_args = vec![log_expr];
         let partitionby_exprs = vec![col(hash, &schema)?];
         let orderby_exprs = vec![PhysicalSortExpr {
             expr: col(order_by, &schema)?,
@@ -1327,7 +1324,6 @@ mod tests {
                 &window_fn,
                 fn_name,
                 &args,
-                &log_args,
                 &partitionby_exprs,
                 &orderby_exprs,
                 Arc::new(window_frame.clone()),
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 2e6ad4e1a14fc..1fd0ca36b1eb9 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -32,8 +32,8 @@ use arrow::datatypes::Schema;
 use arrow_schema::{DataType, Field, SchemaRef};
 use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
 use datafusion_expr::{
-    BuiltInWindowFunction, Expr, PartitionEvaluator, WindowFrame,
-    WindowFunctionDefinition, WindowUDF,
+    BuiltInWindowFunction, PartitionEvaluator, WindowFrame, WindowFunctionDefinition,
+    WindowUDF,
 };
 use datafusion_physical_expr::equivalence::collapse_lex_req;
 use datafusion_physical_expr::{
@@ -94,7 +94,6 @@ pub fn create_window_expr(
     fun: &WindowFunctionDefinition,
     name: String,
     args: &[Arc<dyn PhysicalExpr>],
-    _logical_args: &[Expr],
     partition_by: &[Arc<dyn PhysicalExpr>],
     order_by: &[PhysicalSortExpr],
     window_frame: Arc<WindowFrame>,
@@ -746,7 +745,6 @@ mod tests {
                 &[col("a", &schema)?],
                 &[],
                 &[],
-                &[],
                 Arc::new(WindowFrame::new(None)),
                 schema.as_ref(),
                 false,
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index bc0a19336bae4..b2f92f4b2ee42 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -169,13 +169,10 @@ pub fn parse_physical_window_expr(
     // TODO: Remove extended_schema if functions are all UDAF
     let extended_schema =
         schema_add_window_field(&window_node_expr, input_schema, &fun, &name)?;
-    // approx_percentile_cont and approx_percentile_cont_weight are not supported for UDAF from protobuf yet.
-    let logical_exprs = &[];
     create_window_expr(
         &fun,
         name,
         &window_node_expr,
-        logical_exprs,
         &partition_by,
         &order_by,
         Arc::new(window_frame),
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index b5d28f40a68f8..0f6722dd375b8 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -477,7 +477,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                             ExprType::AggregateExpr(agg_node) => {
                                 let input_phy_expr: Vec<Arc<dyn PhysicalExpr>> = agg_node.expr.iter()
                                     .map(|e| parse_physical_expr(e, registry, &physical_schema, extension_codec)).collect::<Result<Vec<_>>>()?;
-                                let _ordering_req: Vec<PhysicalSortExpr> = agg_node.ordering_req.iter()
+                                let ordering_req: Vec<PhysicalSortExpr> = agg_node.ordering_req.iter()
                                     .map(|e| parse_physical_sort_expr(e, registry, &physical_schema, extension_codec)).collect::<Result<Vec<_>>>()?;
                                 agg_node.aggregate_function.as_ref().map(|func| {
                                     match func {
@@ -487,14 +487,12 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                                                 None => registry.udaf(udaf_name)?
                                             };
 
-                                            // TODO: approx_percentile_cont and approx_percentile_cont_weight are not supported for UDAF from protobuf yet.
-                                            // TODO: `order by` is not supported for UDAF yet
-                                            // https://github.com/apache/datafusion/issues/11804
                                             AggregateExprBuilder::new(agg_udf, input_phy_expr)
                                                 .schema(Arc::clone(&physical_schema))
                                                 .alias(name)
                                                 .with_ignore_nulls(agg_node.ignore_nulls)
                                                 .with_distinct(agg_node.distinct)
+                                                .order_by(ordering_req)
                                                 .build()
                                         }
                                     }
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 1a9c6d40ebe6c..6766468ef443d 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -25,6 +25,8 @@ use std::vec;
 use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
 use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
+use datafusion_functions_aggregate::approx_percentile_cont::approx_percentile_cont_udaf;
+use datafusion_functions_aggregate::array_agg::array_agg_udaf;
 use datafusion_functions_aggregate::min_max::max_udaf;
 use prost::Message;
 
@@ -412,6 +414,70 @@ fn rountrip_aggregate_with_limit() -> Result<()> {
     roundtrip_test(Arc::new(agg))
 }
 
+#[test]
+fn rountrip_aggregate_with_approx_pencentile_cont() -> Result<()> {
+    let field_a = Field::new("a", DataType::Int64, false);
+    let field_b = Field::new("b", DataType::Int64, false);
+    let schema = Arc::new(Schema::new(vec![field_a, field_b]));
+
+    let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
+        vec![(col("a", &schema)?, "unused".to_string())];
+
+    let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![AggregateExprBuilder::new(
+        approx_percentile_cont_udaf(),
+        vec![col("b", &schema)?, lit(0.5)],
+    )
+    .schema(Arc::clone(&schema))
+    .alias("APPROX_PERCENTILE_CONT(b, 0.5)")
+    .build()?];
+
+    let agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::new_single(groups.clone()),
+        aggregates.clone(),
+        vec![None],
+        Arc::new(EmptyExec::new(schema.clone())),
+        schema,
+    )?;
+    roundtrip_test(Arc::new(agg))
+}
+
+#[test]
+fn rountrip_aggregate_with_sort() -> Result<()> {
+    let field_a = Field::new("a", DataType::Int64, false);
+    let field_b = Field::new("b", DataType::Int64, false);
+    let schema = Arc::new(Schema::new(vec![field_a, field_b]));
+
+    let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
+        vec![(col("a", &schema)?, "unused".to_string())];
+    let sort_exprs = vec![PhysicalSortExpr {
+        expr: col("b", &schema)?,
+        options: SortOptions {
+            descending: false,
+            nulls_first: true,
+        },
+    }];
+
+    let aggregates: Vec<Arc<dyn AggregateExpr>> =
+        vec![
+            AggregateExprBuilder::new(array_agg_udaf(), vec![col("b", &schema)?])
+                .schema(Arc::clone(&schema))
+                .alias("ARRAY_AGG(b)")
+                .order_by(sort_exprs)
+                .build()?,
+        ];
+
+    let agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::new_single(groups.clone()),
+        aggregates.clone(),
+        vec![None],
+        Arc::new(EmptyExec::new(schema.clone())),
+        schema,
+    )?;
+    roundtrip_test(Arc::new(agg))
+}
+
 #[test]
 fn roundtrip_aggregate_udaf() -> Result<()> {
     let field_a = Field::new("a", DataType::Int64, false);

From 8deba021348c488a075b414a354c4a51af6b3582 Mon Sep 17 00:00:00 2001
From: Chojan Shang <psiace@apache.org>
Date: Mon, 12 Aug 2024 04:00:07 -0700
Subject: [PATCH 272/357] Implement native support StringView for Ends With
 (#11924)

Signed-off-by: Chojan Shang <psiace@apache.org>
---
 datafusion/functions/src/string/ends_with.rs  | 33 +++++++++----------
 .../sqllogictest/test_files/string_view.slt   |  6 ++--
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/datafusion/functions/src/string/ends_with.rs b/datafusion/functions/src/string/ends_with.rs
index b72cf0f66fa6a..03a1795954d03 100644
--- a/datafusion/functions/src/string/ends_with.rs
+++ b/datafusion/functions/src/string/ends_with.rs
@@ -18,12 +18,10 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, OffsetSizeTrait};
+use arrow::array::ArrayRef;
 use arrow::datatypes::DataType;
-use arrow::datatypes::DataType::Boolean;
 
-use datafusion_common::cast::as_generic_string_array;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
@@ -43,14 +41,15 @@ impl Default for EndsWithFunc {
 
 impl EndsWithFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![Utf8, LargeUtf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8]),
+                    // Planner attempts coercion to the target type starting with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![DataType::Utf8View, DataType::Utf8View]),
+                    Exact(vec![DataType::Utf8, DataType::Utf8]),
+                    Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
                 ],
                 Volatility::Immutable,
             ),
@@ -72,15 +71,16 @@ impl ScalarUDFImpl for EndsWithFunc {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(Boolean)
+        Ok(DataType::Boolean)
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(ends_with::<i32>, vec![])(args),
-            DataType::LargeUtf8 => make_scalar_function(ends_with::<i64>, vec![])(args),
+            DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => {
+                make_scalar_function(ends_with, vec![])(args)
+            }
             other => {
-                exec_err!("Unsupported data type {other:?} for function ends_with")
+                internal_err!("Unsupported data type {other:?} for function ends_with. Expected Utf8, LargeUtf8 or Utf8View")?
             }
         }
     }
@@ -88,11 +88,8 @@ impl ScalarUDFImpl for EndsWithFunc {
 
 /// Returns true if string ends with suffix.
 /// ends_with('alphabet', 'abet') = 't'
-pub fn ends_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let left = as_generic_string_array::<T>(&args[0])?;
-    let right = as_generic_string_array::<T>(&args[1])?;
-
-    let result = arrow::compute::kernels::comparison::ends_with(left, right)?;
+pub fn ends_with(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let result = arrow::compute::kernels::comparison::ends_with(&args[0], &args[1])?;
 
     Ok(Arc::new(result) as ArrayRef)
 }
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 7ceccf1af0e34..e23c7d8bbf25d 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -618,7 +618,6 @@ logical_plan
 03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for ENDS_WITH
-## TODO https://github.com/apache/datafusion/issues/11852
 query TT
 EXPLAIN SELECT
   ENDS_WITH(column1_utf8view, 'foo') as c1,
@@ -626,9 +625,8 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: ends_with(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c1, ends_with(__common_expr_1, __common_expr_1) AS c2
-02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+01)Projection: ends_with(test.column1_utf8view, Utf8View("foo")) AS c1, ends_with(test.column2_utf8view, test.column2_utf8view) AS c2
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for LEVENSHTEIN
 ## TODO https://github.com/apache/datafusion/issues/11854

From b60cdc787dcc9e9752846ad66de6922eca06695e Mon Sep 17 00:00:00 2001
From: Chojan Shang <psiace@apache.org>
Date: Mon, 12 Aug 2024 04:00:54 -0700
Subject: [PATCH 273/357] Implement native support StringView for Levenshtein
 (#11925)

* Implement native support StringView for Levenshtein

Signed-off-by: Chojan Shang <psiace@apache.org>

* Remove useless code

Signed-off-by: Chojan Shang <psiace@apache.org>

* Minor fix

Signed-off-by: Chojan Shang <psiace@apache.org>

---------

Signed-off-by: Chojan Shang <psiace@apache.org>
---
 .../functions/src/string/levenshtein.rs       | 37 +++++++++++++++----
 .../sqllogictest/test_files/string_view.slt   |  6 +--
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/datafusion/functions/src/string/levenshtein.rs b/datafusion/functions/src/string/levenshtein.rs
index 3edf6de8c8631..430c402a50c54 100644
--- a/datafusion/functions/src/string/levenshtein.rs
+++ b/datafusion/functions/src/string/levenshtein.rs
@@ -22,7 +22,7 @@ use arrow::array::{ArrayRef, Int32Array, Int64Array, OffsetSizeTrait};
 use arrow::datatypes::DataType;
 
 use crate::utils::{make_scalar_function, utf8_to_int_type};
-use datafusion_common::cast::as_generic_string_array;
+use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::utils::datafusion_strsim;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
@@ -42,10 +42,13 @@ impl Default for LevenshteinFunc {
 
 impl LevenshteinFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Utf8]), Exact(vec![LargeUtf8, LargeUtf8])],
+                vec![
+                    Exact(vec![DataType::Utf8View, DataType::Utf8View]),
+                    Exact(vec![DataType::Utf8, DataType::Utf8]),
+                    Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -71,7 +74,9 @@ impl ScalarUDFImpl for LevenshteinFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(levenshtein::<i32>, vec![])(args),
+            DataType::Utf8View | DataType::Utf8 => {
+                make_scalar_function(levenshtein::<i32>, vec![])(args)
+            }
             DataType::LargeUtf8 => make_scalar_function(levenshtein::<i64>, vec![])(args),
             other => {
                 exec_err!("Unsupported data type {other:?} for function levenshtein")
@@ -89,10 +94,26 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             args.len()
         );
     }
-    let str1_array = as_generic_string_array::<T>(&args[0])?;
-    let str2_array = as_generic_string_array::<T>(&args[1])?;
+
     match args[0].data_type() {
+        DataType::Utf8View => {
+            let str1_array = as_string_view_array(&args[0])?;
+            let str2_array = as_string_view_array(&args[1])?;
+            let result = str1_array
+                .iter()
+                .zip(str2_array.iter())
+                .map(|(string1, string2)| match (string1, string2) {
+                    (Some(string1), Some(string2)) => {
+                        Some(datafusion_strsim::levenshtein(string1, string2) as i32)
+                    }
+                    _ => None,
+                })
+                .collect::<Int32Array>();
+            Ok(Arc::new(result) as ArrayRef)
+        }
         DataType::Utf8 => {
+            let str1_array = as_generic_string_array::<T>(&args[0])?;
+            let str2_array = as_generic_string_array::<T>(&args[1])?;
             let result = str1_array
                 .iter()
                 .zip(str2_array.iter())
@@ -106,6 +127,8 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             Ok(Arc::new(result) as ArrayRef)
         }
         DataType::LargeUtf8 => {
+            let str1_array = as_generic_string_array::<T>(&args[0])?;
+            let str2_array = as_generic_string_array::<T>(&args[1])?;
             let result = str1_array
                 .iter()
                 .zip(str2_array.iter())
@@ -120,7 +143,7 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
         }
         other => {
             exec_err!(
-                "levenshtein was called with {other} datatype arguments. It requires Utf8 or LargeUtf8."
+                "levenshtein was called with {other} datatype arguments. It requires Utf8View, Utf8 or LargeUtf8."
             )
         }
     }
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index e23c7d8bbf25d..5edda9b804318 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -629,7 +629,6 @@ logical_plan
 02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for LEVENSHTEIN
-## TODO https://github.com/apache/datafusion/issues/11854
 query TT
 EXPLAIN SELECT
   levenshtein(column1_utf8view, 'foo') as c1,
@@ -637,9 +636,8 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: levenshtein(__common_expr_1, Utf8("foo")) AS c1, levenshtein(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+01)Projection: levenshtein(test.column1_utf8view, Utf8View("foo")) AS c1, levenshtein(test.column1_utf8view, test.column2_utf8view) AS c2
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for LOWER
 ## TODO https://github.com/apache/datafusion/issues/11855

From 34ec9d41faa8e73f37cd68f971985e851987bb3d Mon Sep 17 00:00:00 2001
From: kf zheng <100595273+Kev1n8@users.noreply.github.com>
Date: Mon, 12 Aug 2024 20:01:40 +0800
Subject: [PATCH 274/357] Implement native stringview support for BTRIM
 (#11920)

* add utf8view support for generic_trim

* add utf8view support for BTRIM

* stop LTRIM and RTRIM from complaining generic_trim missing args

* add tests to cover utf8view support of BTRIM

* fix typo and tiny err

* remove useless imports
---
 datafusion/functions/src/string/btrim.rs      | 24 ++++--
 datafusion/functions/src/string/common.rs     | 78 ++++++++++++++++++-
 datafusion/functions/src/string/ltrim.rs      |  2 +-
 datafusion/functions/src/string/rtrim.rs      |  2 +-
 .../sqllogictest/test_files/string_view.slt   | 37 ++++++++-
 5 files changed, 131 insertions(+), 12 deletions(-)

diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs
index 349928d09664f..86470dd7a6469 100644
--- a/datafusion/functions/src/string/btrim.rs
+++ b/datafusion/functions/src/string/btrim.rs
@@ -16,9 +16,8 @@
 // under the License.
 
 use arrow::array::{ArrayRef, OffsetSizeTrait};
-use std::any::Any;
-
 use arrow::datatypes::DataType;
+use std::any::Any;
 
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::function::Hint;
@@ -32,7 +31,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 /// Returns the longest string with leading and trailing characters removed. If the characters are not specified, whitespace is removed.
 /// btrim('xyxtrimyyx', 'xyz') = 'trim'
 fn btrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    general_trim::<T>(args, TrimType::Both)
+    let use_string_view = args[0].data_type() == &DataType::Utf8View;
+    general_trim::<T>(args, TrimType::Both, use_string_view)
 }
 
 #[derive(Debug)]
@@ -52,7 +52,16 @@ impl BTrimFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+                vec![
+                    // Planner attempts coercion to the target type starting with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![Utf8View, Utf8View]),
+                    // Exact(vec![Utf8, Utf8View]),
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8View]),
+                    Exact(vec![Utf8]),
+                ],
                 Volatility::Immutable,
             ),
             aliases: vec![String::from("trim")],
@@ -79,7 +88,7 @@ impl ScalarUDFImpl for BTrimFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(
+            DataType::Utf8 | DataType::Utf8View => make_scalar_function(
                 btrim::<i32>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
@@ -87,7 +96,10 @@ impl ScalarUDFImpl for BTrimFunc {
                 btrim::<i64>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
-            other => exec_err!("Unsupported data type {other:?} for function btrim"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function btrim,\
+                expected for Utf8, LargeUtf8 or Utf8View."
+            ),
         }
     }
 
diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs
index d36bd5cecc471..7037c1d1c3c3b 100644
--- a/datafusion/functions/src/string/common.rs
+++ b/datafusion/functions/src/string/common.rs
@@ -25,7 +25,7 @@ use arrow::array::{
 use arrow::buffer::{Buffer, MutableBuffer, NullBuffer};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::as_generic_string_array;
+use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::Result;
 use datafusion_common::{exec_err, ScalarValue};
 use datafusion_expr::ColumnarValue;
@@ -49,6 +49,7 @@ impl Display for TrimType {
 pub(crate) fn general_trim<T: OffsetSizeTrait>(
     args: &[ArrayRef],
     trim_type: TrimType,
+    use_string_view: bool,
 ) -> Result<ArrayRef> {
     let func = match trim_type {
         TrimType::Left => |input, pattern: &str| {
@@ -68,6 +69,74 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
         },
     };
 
+    if use_string_view {
+        string_view_trim::<T>(trim_type, func, args)
+    } else {
+        string_trim::<T>(trim_type, func, args)
+    }
+}
+
+// removing 'a will cause compiler complaining lifetime of `func`
+fn string_view_trim<'a, T: OffsetSizeTrait>(
+    trim_type: TrimType,
+    func: fn(&'a str, &'a str) -> &'a str,
+    args: &'a [ArrayRef],
+) -> Result<ArrayRef> {
+    let string_array = as_string_view_array(&args[0])?;
+
+    match args.len() {
+        1 => {
+            let result = string_array
+                .iter()
+                .map(|string| string.map(|string: &str| func(string, " ")))
+                .collect::<GenericStringArray<T>>();
+
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        2 => {
+            let characters_array = as_string_view_array(&args[1])?;
+
+            if characters_array.len() == 1 {
+                if characters_array.is_null(0) {
+                    return Ok(new_null_array(
+                        // The schema is expecting utf8 as null
+                        &DataType::Utf8,
+                        string_array.len(),
+                    ));
+                }
+
+                let characters = characters_array.value(0);
+                let result = string_array
+                    .iter()
+                    .map(|item| item.map(|string| func(string, characters)))
+                    .collect::<GenericStringArray<T>>();
+                return Ok(Arc::new(result) as ArrayRef);
+            }
+
+            let result = string_array
+                .iter()
+                .zip(characters_array.iter())
+                .map(|(string, characters)| match (string, characters) {
+                    (Some(string), Some(characters)) => Some(func(string, characters)),
+                    _ => None,
+                })
+                .collect::<GenericStringArray<T>>();
+
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        other => {
+            exec_err!(
+            "{trim_type} was called with {other} arguments. It requires at least 1 and at most 2."
+            )
+        }
+    }
+}
+
+fn string_trim<'a, T: OffsetSizeTrait>(
+    trim_type: TrimType,
+    func: fn(&'a str, &'a str) -> &'a str,
+    args: &'a [ArrayRef],
+) -> Result<ArrayRef> {
     let string_array = as_generic_string_array::<T>(&args[0])?;
 
     match args.len() {
@@ -84,7 +153,10 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
 
             if characters_array.len() == 1 {
                 if characters_array.is_null(0) {
-                    return Ok(new_null_array(args[0].data_type(), args[0].len()));
+                    return Ok(new_null_array(
+                        string_array.data_type(),
+                        string_array.len(),
+                    ));
                 }
 
                 let characters = characters_array.value(0);
@@ -109,7 +181,7 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
         other => {
             exec_err!(
             "{trim_type} was called with {other} arguments. It requires at least 1 and at most 2."
-        )
+            )
         }
     }
 }
diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs
index de14bbaa2bcf8..6a9fafdd9299a 100644
--- a/datafusion/functions/src/string/ltrim.rs
+++ b/datafusion/functions/src/string/ltrim.rs
@@ -32,7 +32,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 /// Returns the longest string  with leading characters removed. If the characters are not specified, whitespace is removed.
 /// ltrim('zzzytest', 'xyz') = 'test'
 fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    general_trim::<T>(args, TrimType::Left)
+    general_trim::<T>(args, TrimType::Left, false)
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs
index 2d29b50cb1739..50b626e3df0e9 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -32,7 +32,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 /// Returns the longest string  with trailing characters removed. If the characters are not specified, whitespace is removed.
 /// rtrim('testxxzx', 'xyz') = 'test'
 fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    general_trim::<T>(args, TrimType::Right)
+    general_trim::<T>(args, TrimType::Right, false)
 }
 
 #[derive(Debug)]
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 5edda9b804318..fcd71b7f7e943 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -563,15 +563,50 @@ SELECT
 228 0 NULL
 
 ## Ensure no casts for BTRIM
+# Test BTRIM with Utf8View input
+query TT
+EXPLAIN SELECT
+  BTRIM(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: btrim(test.column1_utf8view) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test BTRIM with Utf8View input and Utf8View pattern
 query TT
 EXPLAIN SELECT
   BTRIM(column1_utf8view, 'foo') AS l
 FROM test;
 ----
 logical_plan
-01)Projection: btrim(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS l
+01)Projection: btrim(test.column1_utf8view, Utf8View("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test BTRIM with Utf8View bytes longer than 12
+query TT
+EXPLAIN SELECT
+  BTRIM(column1_utf8view, 'this is longer than 12') AS l
+FROM test;
+----
+logical_plan
+01)Projection: btrim(test.column1_utf8view, Utf8View("this is longer than 12")) AS l
 02)--TableScan: test projection=[column1_utf8view]
 
+# Test BTRIM outputs
+query TTTT
+SELECT
+  BTRIM(column1_utf8view, 'foo') AS l1,
+  BTRIM(column1_utf8view, 'A') AS l2,
+  BTRIM(column1_utf8view) AS l3,
+  BTRIM(column1_utf8view, NULL) AS l4
+FROM test;
+----
+Andrew    ndrew     Andrew    NULL
+Xiangpeng Xiangpeng Xiangpeng NULL
+Raphael   Raphael   Raphael   NULL
+NULL      NULL      NULL      NULL
+
 ## Ensure no casts for CHARACTER_LENGTH
 query TT
 EXPLAIN SELECT

From 2cf09566af7d7d5f83a8bdff5f0adda97d40deee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Mon, 12 Aug 2024 21:46:07 +0800
Subject: [PATCH 275/357] Move `LimitPushdown` to physical-optimizer crate
 (#11945)

* Move LimitPushdown to physical-optimizer crate

* Add comment about test code location

* asf header

* Fix clippy

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/common/src/utils/mod.rs            |  65 +-
 .../src/physical_optimizer/limit_pushdown.rs  | 661 ------------------
 datafusion/core/src/physical_optimizer/mod.rs |   1 -
 .../physical_optimizer/limit_pushdown.rs      | 427 +++++++++++
 .../core/tests/physical_optimizer/mod.rs      |   1 +
 datafusion/optimizer/src/push_down_limit.rs   |  64 +-
 datafusion/physical-optimizer/src/lib.rs      |   1 +
 .../physical-optimizer/src/limit_pushdown.rs  | 253 +++++++
 .../src/output_requirements.rs                |   2 +
 9 files changed, 749 insertions(+), 726 deletions(-)
 delete mode 100644 datafusion/core/src/physical_optimizer/limit_pushdown.rs
 create mode 100644 datafusion/core/tests/physical_optimizer/limit_pushdown.rs
 create mode 100644 datafusion/physical-optimizer/src/limit_pushdown.rs

diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index 58dc8f40b5773..12e306ffaf031 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -37,7 +37,7 @@ use sqlparser::ast::Ident;
 use sqlparser::dialect::GenericDialect;
 use sqlparser::parser::Parser;
 use std::borrow::{Borrow, Cow};
-use std::cmp::Ordering;
+use std::cmp::{min, Ordering};
 use std::collections::HashSet;
 use std::ops::Range;
 use std::sync::Arc;
@@ -683,6 +683,69 @@ pub fn transpose<T>(original: Vec<Vec<T>>) -> Vec<Vec<T>> {
     }
 }
 
+/// Computes the `skip` and `fetch` parameters of a single limit that would be
+/// equivalent to two consecutive limits with the given `skip`/`fetch` parameters.
+///
+/// There are multiple cases to consider:
+///
+/// # Case 0: Parent and child are disjoint (`child_fetch <= skip`).
+///
+/// ```text
+///   Before merging:
+///                     |........skip........|---fetch-->|     Parent limit
+///    |...child_skip...|---child_fetch-->|                    Child limit
+/// ```
+///
+///   After merging:
+/// ```text
+///    |.........(child_skip + skip).........|
+/// ```
+///
+/// # Case 1: Parent is beyond child's range (`skip < child_fetch <= skip + fetch`).
+///
+///   Before merging:
+/// ```text
+///                     |...skip...|------------fetch------------>|   Parent limit
+///    |...child_skip...|-------------child_fetch------------>|       Child limit
+/// ```
+///
+///   After merging:
+/// ```text
+///    |....(child_skip + skip)....|---(child_fetch - skip)-->|
+/// ```
+///
+///  # Case 2: Parent is within child's range (`skip + fetch < child_fetch`).
+///
+///   Before merging:
+/// ```text
+///                     |...skip...|---fetch-->|                   Parent limit
+///    |...child_skip...|-------------child_fetch------------>|    Child limit
+/// ```
+///
+///   After merging:
+/// ```text
+///    |....(child_skip + skip)....|---fetch-->|
+/// ```
+pub fn combine_limit(
+    parent_skip: usize,
+    parent_fetch: Option<usize>,
+    child_skip: usize,
+    child_fetch: Option<usize>,
+) -> (usize, Option<usize>) {
+    let combined_skip = child_skip.saturating_add(parent_skip);
+
+    let combined_fetch = match (parent_fetch, child_fetch) {
+        (Some(parent_fetch), Some(child_fetch)) => {
+            Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
+        }
+        (Some(parent_fetch), None) => Some(parent_fetch),
+        (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
+        (None, None) => None,
+    };
+
+    (combined_skip, combined_fetch)
+}
+
 #[cfg(test)]
 mod tests {
     use crate::ScalarValue::Null;
diff --git a/datafusion/core/src/physical_optimizer/limit_pushdown.rs b/datafusion/core/src/physical_optimizer/limit_pushdown.rs
deleted file mode 100644
index d02737ff09599..0000000000000
--- a/datafusion/core/src/physical_optimizer/limit_pushdown.rs
+++ /dev/null
@@ -1,661 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! [`LimitPushdown`] pushes `LIMIT` down through `ExecutionPlan`s to reduce
-//! data transfer as much as possible.
-
-use std::fmt::Debug;
-use std::sync::Arc;
-
-use crate::error::Result;
-use crate::physical_optimizer::PhysicalOptimizerRule;
-use crate::physical_plan::ExecutionPlan;
-
-use datafusion_common::config::ConfigOptions;
-use datafusion_common::plan_datafusion_err;
-use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_optimizer::push_down_limit::combine_limit;
-use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
-use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-
-/// This rule inspects [`ExecutionPlan`]'s and pushes down the fetch limit from
-/// the parent to the child if applicable.
-#[derive(Default)]
-pub struct LimitPushdown {}
-
-impl LimitPushdown {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl PhysicalOptimizerRule for LimitPushdown {
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        _config: &ConfigOptions,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        plan.transform_down(push_down_limits).data()
-    }
-
-    fn name(&self) -> &str {
-        "LimitPushdown"
-    }
-
-    fn schema_check(&self) -> bool {
-        true
-    }
-}
-
-/// This enumeration makes `skip` and `fetch` calculations easier by providing
-/// a single API for both local and global limit operators.
-#[derive(Debug)]
-enum LimitExec {
-    Global(GlobalLimitExec),
-    Local(LocalLimitExec),
-}
-
-impl LimitExec {
-    fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        match self {
-            Self::Global(global) => global.input(),
-            Self::Local(local) => local.input(),
-        }
-    }
-
-    fn fetch(&self) -> Option<usize> {
-        match self {
-            Self::Global(global) => global.fetch(),
-            Self::Local(local) => Some(local.fetch()),
-        }
-    }
-
-    fn skip(&self) -> usize {
-        match self {
-            Self::Global(global) => global.skip(),
-            Self::Local(_) => 0,
-        }
-    }
-
-    fn with_child(&self, child: Arc<dyn ExecutionPlan>) -> Self {
-        match self {
-            Self::Global(global) => {
-                Self::Global(GlobalLimitExec::new(child, global.skip(), global.fetch()))
-            }
-            Self::Local(local) => Self::Local(LocalLimitExec::new(child, local.fetch())),
-        }
-    }
-}
-
-impl From<LimitExec> for Arc<dyn ExecutionPlan> {
-    fn from(limit_exec: LimitExec) -> Self {
-        match limit_exec {
-            LimitExec::Global(global) => Arc::new(global),
-            LimitExec::Local(local) => Arc::new(local),
-        }
-    }
-}
-
-/// Pushes down the limit through the plan.
-pub fn push_down_limits(
-    plan: Arc<dyn ExecutionPlan>,
-) -> Result<Transformed<Arc<dyn ExecutionPlan>>> {
-    let maybe_modified = if let Some(limit_exec) = extract_limit(&plan) {
-        let child = limit_exec.input();
-        if let Some(child_limit) = extract_limit(child) {
-            let merged = merge_limits(&limit_exec, &child_limit);
-            // Revisit current node in case of consecutive pushdowns
-            Some(push_down_limits(merged)?.data)
-        } else if child.supports_limit_pushdown() {
-            try_push_down_limit(&limit_exec, child.clone())?
-        } else {
-            add_fetch_to_child(&limit_exec, child.clone())
-        }
-    } else {
-        None
-    };
-
-    Ok(maybe_modified.map_or(Transformed::no(plan), Transformed::yes))
-}
-
-/// Transforms the [`ExecutionPlan`] into a [`LimitExec`] if it is a
-/// [`GlobalLimitExec`] or a [`LocalLimitExec`].
-fn extract_limit(plan: &Arc<dyn ExecutionPlan>) -> Option<LimitExec> {
-    if let Some(global_limit) = plan.as_any().downcast_ref::<GlobalLimitExec>() {
-        Some(LimitExec::Global(GlobalLimitExec::new(
-            global_limit.input().clone(),
-            global_limit.skip(),
-            global_limit.fetch(),
-        )))
-    } else {
-        plan.as_any()
-            .downcast_ref::<LocalLimitExec>()
-            .map(|local_limit| {
-                LimitExec::Local(LocalLimitExec::new(
-                    local_limit.input().clone(),
-                    local_limit.fetch(),
-                ))
-            })
-    }
-}
-
-/// Merge the limits of the parent and the child. If at least one of them is a
-/// [`GlobalLimitExec`], the result is also a [`GlobalLimitExec`]. Otherwise,
-/// the result is a [`LocalLimitExec`].
-fn merge_limits(
-    parent_limit_exec: &LimitExec,
-    child_limit_exec: &LimitExec,
-) -> Arc<dyn ExecutionPlan> {
-    // We can use the logic in `combine_limit` from the logical optimizer:
-    let (skip, fetch) = combine_limit(
-        parent_limit_exec.skip(),
-        parent_limit_exec.fetch(),
-        child_limit_exec.skip(),
-        child_limit_exec.fetch(),
-    );
-    match (parent_limit_exec, child_limit_exec) {
-        (LimitExec::Local(_), LimitExec::Local(_)) => {
-            // The fetch is present in this case, can unwrap.
-            Arc::new(LocalLimitExec::new(
-                child_limit_exec.input().clone(),
-                fetch.unwrap(),
-            ))
-        }
-        _ => Arc::new(GlobalLimitExec::new(
-            child_limit_exec.input().clone(),
-            skip,
-            fetch,
-        )),
-    }
-}
-
-/// Pushes down the limit through the child. If the child has a single input
-/// partition, simply swaps the parent and the child. Otherwise, adds a
-/// [`LocalLimitExec`] after in between in addition to swapping, because of
-/// multiple input partitions.
-fn try_push_down_limit(
-    limit_exec: &LimitExec,
-    child: Arc<dyn ExecutionPlan>,
-) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-    let grandchildren = child.children();
-    if let Some(&grandchild) = grandchildren.first() {
-        // GlobalLimitExec and LocalLimitExec must have an input after pushdown
-        if combines_input_partitions(&child) {
-            // We still need a LocalLimitExec after the child
-            if let Some(fetch) = limit_exec.fetch() {
-                let new_local_limit = Arc::new(LocalLimitExec::new(
-                    grandchild.clone(),
-                    fetch + limit_exec.skip(),
-                ));
-                let new_child = child.clone().with_new_children(vec![new_local_limit])?;
-                Ok(Some(limit_exec.with_child(new_child).into()))
-            } else {
-                Ok(None)
-            }
-        } else {
-            // Swap current with child
-            let new_limit = limit_exec.with_child(grandchild.clone());
-            let new_child = child.clone().with_new_children(vec![new_limit.into()])?;
-            Ok(Some(new_child))
-        }
-    } else {
-        // Operators supporting limit push down must have a child.
-        Err(plan_datafusion_err!(
-            "{:#?} must have a child to push down limit",
-            child
-        ))
-    }
-}
-
-fn combines_input_partitions(exec: &Arc<dyn ExecutionPlan>) -> bool {
-    let exec = exec.as_any();
-    exec.is::<CoalescePartitionsExec>() || exec.is::<SortPreservingMergeExec>()
-}
-
-/// Transforms child to the fetching version if supported. Removes the parent if
-/// skip is zero. Otherwise, keeps the parent.
-fn add_fetch_to_child(
-    limit_exec: &LimitExec,
-    child: Arc<dyn ExecutionPlan>,
-) -> Option<Arc<dyn ExecutionPlan>> {
-    let fetch = limit_exec.fetch();
-    let skip = limit_exec.skip();
-
-    let child_fetch = fetch.map(|f| f + skip);
-
-    if let Some(child_with_fetch) = child.with_fetch(child_fetch) {
-        if skip > 0 {
-            Some(limit_exec.with_child(child_with_fetch).into())
-        } else {
-            Some(child_with_fetch)
-        }
-    } else {
-        None
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use arrow_schema::{DataType, Field, Schema, SchemaRef};
-    use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-    use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::BinaryExpr;
-    use datafusion_physical_expr::expressions::{col, lit};
-    use datafusion_physical_expr::Partitioning;
-    use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
-    use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
-    use datafusion_physical_plan::empty::EmptyExec;
-    use datafusion_physical_plan::filter::FilterExec;
-    use datafusion_physical_plan::get_plan_string;
-    use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-    use datafusion_physical_plan::projection::ProjectionExec;
-    use datafusion_physical_plan::repartition::RepartitionExec;
-    use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
-
-    struct DummyStreamPartition {
-        schema: SchemaRef,
-    }
-    impl PartitionStream for DummyStreamPartition {
-        fn schema(&self) -> &SchemaRef {
-            &self.schema
-        }
-        fn execute(&self, _ctx: Arc<TaskContext>) -> SendableRecordBatchStream {
-            unreachable!()
-        }
-    }
-
-    #[test]
-    fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero(
-    ) -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone())?;
-        let global_limit = global_limit_exec(streaming_table, 0, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero(
-    ) -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone())?;
-        let global_limit = global_limit_exec(streaming_table, 2, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=2, fetch=5",
-            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "GlobalLimitExec: skip=2, fetch=5",
-            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit(
-    ) -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone())?;
-        let repartition = repartition_exec(streaming_table)?;
-        let filter = filter_exec(schema.clone(), repartition)?;
-        let coalesce_batches = coalesce_batches_exec(filter);
-        let local_limit = local_limit_exec(coalesce_batches, 5);
-        let coalesce_partitions = coalesce_partitions_exec(local_limit);
-        let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  CoalescePartitionsExec",
-            "    LocalLimitExec: fetch=5",
-            "      CoalesceBatchesExec: target_batch_size=8192",
-            "        FilterExec: c3@2 > 0",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  CoalescePartitionsExec",
-            "    CoalesceBatchesExec: target_batch_size=8192, fetch=5",
-            "      FilterExec: c3@2 > 0",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn pushes_global_limit_exec_through_projection_exec() -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone())?;
-        let filter = filter_exec(schema.clone(), streaming_table)?;
-        let projection = projection_exec(schema.clone(), filter)?;
-        let global_limit = global_limit_exec(projection, 0, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "    FilterExec: c3@2 > 0",
-            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "  GlobalLimitExec: skip=0, fetch=5",
-            "    FilterExec: c3@2 > 0",
-            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version(
-    ) -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone()).unwrap();
-        let coalesce_batches = coalesce_batches_exec(streaming_table);
-        let projection = projection_exec(schema.clone(), coalesce_batches)?;
-        let global_limit = global_limit_exec(projection, 0, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "  CoalesceBatchesExec: target_batch_size=8192, fetch=5",
-            "    StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions(
-    ) -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone())?;
-        let repartition = repartition_exec(streaming_table)?;
-        let filter = filter_exec(schema.clone(), repartition)?;
-        let coalesce_partitions = coalesce_partitions_exec(filter);
-        let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  CoalescePartitionsExec",
-            "    FilterExec: c3@2 > 0",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  CoalescePartitionsExec",
-            "    LocalLimitExec: fetch=5",
-            "      FilterExec: c3@2 > 0",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn merges_local_limit_with_local_limit() -> Result<()> {
-        let schema = create_schema();
-        let empty_exec = empty_exec(schema);
-        let child_local_limit = local_limit_exec(empty_exec, 10);
-        let parent_local_limit = local_limit_exec(child_local_limit, 20);
-
-        let initial = get_plan_string(&parent_local_limit);
-        let expected_initial = [
-            "LocalLimitExec: fetch=20",
-            "  LocalLimitExec: fetch=10",
-            "    EmptyExec",
-        ];
-
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?;
-
-        let expected = ["LocalLimitExec: fetch=10", "  EmptyExec"];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn merges_global_limit_with_global_limit() -> Result<()> {
-        let schema = create_schema();
-        let empty_exec = empty_exec(schema);
-        let child_global_limit = global_limit_exec(empty_exec, 10, Some(30));
-        let parent_global_limit = global_limit_exec(child_global_limit, 10, Some(20));
-
-        let initial = get_plan_string(&parent_global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=10, fetch=20",
-            "  GlobalLimitExec: skip=10, fetch=30",
-            "    EmptyExec",
-        ];
-
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(parent_global_limit, &ConfigOptions::new())?;
-
-        let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn merges_global_limit_with_local_limit() -> Result<()> {
-        let schema = create_schema();
-        let empty_exec = empty_exec(schema);
-        let local_limit = local_limit_exec(empty_exec, 40);
-        let global_limit = global_limit_exec(local_limit, 20, Some(30));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=20, fetch=30",
-            "  LocalLimitExec: fetch=40",
-            "    EmptyExec",
-        ];
-
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn merges_local_limit_with_global_limit() -> Result<()> {
-        let schema = create_schema();
-        let empty_exec = empty_exec(schema);
-        let global_limit = global_limit_exec(empty_exec, 20, Some(30));
-        let local_limit = local_limit_exec(global_limit, 20);
-
-        let initial = get_plan_string(&local_limit);
-        let expected_initial = [
-            "LocalLimitExec: fetch=20",
-            "  GlobalLimitExec: skip=20, fetch=30",
-            "    EmptyExec",
-        ];
-
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(local_limit, &ConfigOptions::new())?;
-
-        let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    fn create_schema() -> SchemaRef {
-        Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::Int32, true),
-            Field::new("c2", DataType::Int32, true),
-            Field::new("c3", DataType::Int32, true),
-        ]))
-    }
-
-    fn streaming_table_exec(schema: SchemaRef) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(StreamingTableExec::try_new(
-            schema.clone(),
-            vec![Arc::new(DummyStreamPartition {
-                schema: schema.clone(),
-            }) as _],
-            None,
-            None,
-            true,
-            None,
-        )?))
-    }
-
-    fn global_limit_exec(
-        input: Arc<dyn ExecutionPlan>,
-        skip: usize,
-        fetch: Option<usize>,
-    ) -> Arc<dyn ExecutionPlan> {
-        Arc::new(GlobalLimitExec::new(input, skip, fetch))
-    }
-
-    fn local_limit_exec(
-        input: Arc<dyn ExecutionPlan>,
-        fetch: usize,
-    ) -> Arc<dyn ExecutionPlan> {
-        Arc::new(LocalLimitExec::new(input, fetch))
-    }
-
-    fn projection_exec(
-        schema: SchemaRef,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(ProjectionExec::try_new(
-            vec![
-                (col("c1", schema.as_ref()).unwrap(), "c1".to_string()),
-                (col("c2", schema.as_ref()).unwrap(), "c2".to_string()),
-                (col("c3", schema.as_ref()).unwrap(), "c3".to_string()),
-            ],
-            input,
-        )?))
-    }
-
-    fn filter_exec(
-        schema: SchemaRef,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(FilterExec::try_new(
-            Arc::new(BinaryExpr::new(
-                col("c3", schema.as_ref()).unwrap(),
-                Operator::Gt,
-                lit(0),
-            )),
-            input,
-        )?))
-    }
-
-    fn coalesce_batches_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
-        Arc::new(CoalesceBatchesExec::new(input, 8192))
-    }
-
-    fn coalesce_partitions_exec(
-        local_limit: Arc<dyn ExecutionPlan>,
-    ) -> Arc<dyn ExecutionPlan> {
-        Arc::new(CoalescePartitionsExec::new(local_limit))
-    }
-
-    fn repartition_exec(
-        streaming_table: Arc<dyn ExecutionPlan>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(RepartitionExec::try_new(
-            streaming_table,
-            Partitioning::RoundRobinBatch(8),
-        )?))
-    }
-
-    fn empty_exec(schema: SchemaRef) -> Arc<dyn ExecutionPlan> {
-        Arc::new(EmptyExec::new(schema))
-    }
-}
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index 9291d0b848653..0e68a05d855c7 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -26,7 +26,6 @@ pub mod combine_partial_final_agg;
 pub mod enforce_distribution;
 pub mod enforce_sorting;
 pub mod join_selection;
-pub mod limit_pushdown;
 pub mod limited_distinct_aggregation;
 pub mod optimizer;
 pub mod projection_pushdown;
diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
new file mode 100644
index 0000000000000..8f3a47c95e9d2
--- /dev/null
+++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
@@ -0,0 +1,427 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use datafusion::physical_optimizer::limit_pushdown::LimitPushdown;
+use datafusion_common::config::ConfigOptions;
+use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+use datafusion_expr::Operator;
+use datafusion_physical_expr::expressions::BinaryExpr;
+use datafusion_physical_expr::expressions::{col, lit};
+use datafusion_physical_expr::Partitioning;
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
+use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::empty::EmptyExec;
+use datafusion_physical_plan::filter::FilterExec;
+use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+use datafusion_physical_plan::projection::ProjectionExec;
+use datafusion_physical_plan::repartition::RepartitionExec;
+use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
+use datafusion_physical_plan::{get_plan_string, ExecutionPlan};
+use std::sync::Arc;
+
+struct DummyStreamPartition {
+    schema: SchemaRef,
+}
+impl PartitionStream for DummyStreamPartition {
+    fn schema(&self) -> &SchemaRef {
+        &self.schema
+    }
+    fn execute(&self, _ctx: Arc<TaskContext>) -> SendableRecordBatchStream {
+        unreachable!()
+    }
+}
+
+#[test]
+fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero(
+) -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone())?;
+    let global_limit = global_limit_exec(streaming_table, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero(
+) -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone())?;
+    let global_limit = global_limit_exec(streaming_table, 2, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=2, fetch=5",
+        "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "GlobalLimitExec: skip=2, fetch=5",
+        "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit(
+) -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone())?;
+    let repartition = repartition_exec(streaming_table)?;
+    let filter = filter_exec(schema.clone(), repartition)?;
+    let coalesce_batches = coalesce_batches_exec(filter);
+    let local_limit = local_limit_exec(coalesce_batches, 5);
+    let coalesce_partitions = coalesce_partitions_exec(local_limit);
+    let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  CoalescePartitionsExec",
+        "    LocalLimitExec: fetch=5",
+        "      CoalesceBatchesExec: target_batch_size=8192",
+        "        FilterExec: c3@2 > 0",
+        "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "            StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  CoalescePartitionsExec",
+        "    CoalesceBatchesExec: target_batch_size=8192, fetch=5",
+        "      FilterExec: c3@2 > 0",
+        "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn pushes_global_limit_exec_through_projection_exec() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone())?;
+    let filter = filter_exec(schema.clone(), streaming_table)?;
+    let projection = projection_exec(schema.clone(), filter)?;
+    let global_limit = global_limit_exec(projection, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "    FilterExec: c3@2 > 0",
+        "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "  GlobalLimitExec: skip=0, fetch=5",
+        "    FilterExec: c3@2 > 0",
+        "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version(
+) -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone()).unwrap();
+    let coalesce_batches = coalesce_batches_exec(streaming_table);
+    let projection = projection_exec(schema.clone(), coalesce_batches)?;
+    let global_limit = global_limit_exec(projection, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "    CoalesceBatchesExec: target_batch_size=8192",
+        "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "  CoalesceBatchesExec: target_batch_size=8192, fetch=5",
+        "    StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions(
+) -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone())?;
+    let repartition = repartition_exec(streaming_table)?;
+    let filter = filter_exec(schema.clone(), repartition)?;
+    let coalesce_partitions = coalesce_partitions_exec(filter);
+    let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  CoalescePartitionsExec",
+        "    FilterExec: c3@2 > 0",
+        "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "        StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  CoalescePartitionsExec",
+        "    LocalLimitExec: fetch=5",
+        "      FilterExec: c3@2 > 0",
+        "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn merges_local_limit_with_local_limit() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let empty_exec = empty_exec(schema);
+    let child_local_limit = local_limit_exec(empty_exec, 10);
+    let parent_local_limit = local_limit_exec(child_local_limit, 20);
+
+    let initial = get_plan_string(&parent_local_limit);
+    let expected_initial = [
+        "LocalLimitExec: fetch=20",
+        "  LocalLimitExec: fetch=10",
+        "    EmptyExec",
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?;
+
+    let expected = ["LocalLimitExec: fetch=10", "  EmptyExec"];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn merges_global_limit_with_global_limit() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let empty_exec = empty_exec(schema);
+    let child_global_limit = global_limit_exec(empty_exec, 10, Some(30));
+    let parent_global_limit = global_limit_exec(child_global_limit, 10, Some(20));
+
+    let initial = get_plan_string(&parent_global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=10, fetch=20",
+        "  GlobalLimitExec: skip=10, fetch=30",
+        "    EmptyExec",
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(parent_global_limit, &ConfigOptions::new())?;
+
+    let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn merges_global_limit_with_local_limit() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let empty_exec = empty_exec(schema);
+    let local_limit = local_limit_exec(empty_exec, 40);
+    let global_limit = global_limit_exec(local_limit, 20, Some(30));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=20, fetch=30",
+        "  LocalLimitExec: fetch=40",
+        "    EmptyExec",
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn merges_local_limit_with_global_limit() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let empty_exec = empty_exec(schema);
+    let global_limit = global_limit_exec(empty_exec, 20, Some(30));
+    let local_limit = local_limit_exec(global_limit, 20);
+
+    let initial = get_plan_string(&local_limit);
+    let expected_initial = [
+        "LocalLimitExec: fetch=20",
+        "  GlobalLimitExec: skip=20, fetch=30",
+        "    EmptyExec",
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(local_limit, &ConfigOptions::new())?;
+
+    let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+fn create_schema() -> SchemaRef {
+    Arc::new(Schema::new(vec![
+        Field::new("c1", DataType::Int32, true),
+        Field::new("c2", DataType::Int32, true),
+        Field::new("c3", DataType::Int32, true),
+    ]))
+}
+
+fn streaming_table_exec(
+    schema: SchemaRef,
+) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+    Ok(Arc::new(StreamingTableExec::try_new(
+        schema.clone(),
+        vec![Arc::new(DummyStreamPartition {
+            schema: schema.clone(),
+        }) as _],
+        None,
+        None,
+        true,
+        None,
+    )?))
+}
+
+fn global_limit_exec(
+    input: Arc<dyn ExecutionPlan>,
+    skip: usize,
+    fetch: Option<usize>,
+) -> Arc<dyn ExecutionPlan> {
+    Arc::new(GlobalLimitExec::new(input, skip, fetch))
+}
+
+fn local_limit_exec(
+    input: Arc<dyn ExecutionPlan>,
+    fetch: usize,
+) -> Arc<dyn ExecutionPlan> {
+    Arc::new(LocalLimitExec::new(input, fetch))
+}
+
+fn projection_exec(
+    schema: SchemaRef,
+    input: Arc<dyn ExecutionPlan>,
+) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+    Ok(Arc::new(ProjectionExec::try_new(
+        vec![
+            (col("c1", schema.as_ref()).unwrap(), "c1".to_string()),
+            (col("c2", schema.as_ref()).unwrap(), "c2".to_string()),
+            (col("c3", schema.as_ref()).unwrap(), "c3".to_string()),
+        ],
+        input,
+    )?))
+}
+
+fn filter_exec(
+    schema: SchemaRef,
+    input: Arc<dyn ExecutionPlan>,
+) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+    Ok(Arc::new(FilterExec::try_new(
+        Arc::new(BinaryExpr::new(
+            col("c3", schema.as_ref()).unwrap(),
+            Operator::Gt,
+            lit(0),
+        )),
+        input,
+    )?))
+}
+
+fn coalesce_batches_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
+    Arc::new(CoalesceBatchesExec::new(input, 8192))
+}
+
+fn coalesce_partitions_exec(
+    local_limit: Arc<dyn ExecutionPlan>,
+) -> Arc<dyn ExecutionPlan> {
+    Arc::new(CoalescePartitionsExec::new(local_limit))
+}
+
+fn repartition_exec(
+    streaming_table: Arc<dyn ExecutionPlan>,
+) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+    Ok(Arc::new(RepartitionExec::try_new(
+        streaming_table,
+        Partitioning::RoundRobinBatch(8),
+    )?))
+}
+
+fn empty_exec(schema: SchemaRef) -> Arc<dyn ExecutionPlan> {
+    Arc::new(EmptyExec::new(schema))
+}
diff --git a/datafusion/core/tests/physical_optimizer/mod.rs b/datafusion/core/tests/physical_optimizer/mod.rs
index 0ee89a3d213c4..904a8b9fbb380 100644
--- a/datafusion/core/tests/physical_optimizer/mod.rs
+++ b/datafusion/core/tests/physical_optimizer/mod.rs
@@ -16,3 +16,4 @@
 // under the License.
 
 mod aggregate_statistics;
+mod limit_pushdown;
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index 612aac1d152d7..4d8f1dbdb9558 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -24,6 +24,7 @@ use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 
 use datafusion_common::tree_node::Transformed;
+use datafusion_common::utils::combine_limit;
 use datafusion_common::Result;
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::{Join, JoinType, Limit, LogicalPlan};
@@ -217,69 +218,6 @@ fn transformed_limit(
     })))
 }
 
-/// Computes the `skip` and `fetch` parameters of a single limit that would be
-/// equivalent to two consecutive limits with the given `skip`/`fetch` parameters.
-///
-/// There are multiple cases to consider:
-///
-/// # Case 0: Parent and child are disjoint (`child_fetch <= skip`).
-///
-/// ```text
-///   Before merging:
-///                     |........skip........|---fetch-->|     Parent limit
-///    |...child_skip...|---child_fetch-->|                    Child limit
-/// ```
-///
-///   After merging:
-/// ```text
-///    |.........(child_skip + skip).........|
-/// ```
-///
-/// # Case 1: Parent is beyond child's range (`skip < child_fetch <= skip + fetch`).
-///
-///   Before merging:
-/// ```text
-///                     |...skip...|------------fetch------------>|   Parent limit
-///    |...child_skip...|-------------child_fetch------------>|       Child limit
-/// ```
-///
-///   After merging:
-/// ```text
-///    |....(child_skip + skip)....|---(child_fetch - skip)-->|
-/// ```
-///
-///  # Case 2: Parent is within child's range (`skip + fetch < child_fetch`).
-///
-///   Before merging:
-/// ```text
-///                     |...skip...|---fetch-->|                   Parent limit
-///    |...child_skip...|-------------child_fetch------------>|    Child limit
-/// ```
-///
-///   After merging:
-/// ```text
-///    |....(child_skip + skip)....|---fetch-->|
-/// ```
-pub fn combine_limit(
-    parent_skip: usize,
-    parent_fetch: Option<usize>,
-    child_skip: usize,
-    child_fetch: Option<usize>,
-) -> (usize, Option<usize>) {
-    let combined_skip = child_skip.saturating_add(parent_skip);
-
-    let combined_fetch = match (parent_fetch, child_fetch) {
-        (Some(parent_fetch), Some(child_fetch)) => {
-            Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
-        }
-        (Some(parent_fetch), None) => Some(parent_fetch),
-        (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
-        (None, None) => None,
-    };
-
-    (combined_skip, combined_fetch)
-}
-
 /// Adds a limit to the inputs of a join, if possible
 fn push_down_join(mut join: Join, limit: usize) -> Transformed<Join> {
     use JoinType::*;
diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs
index 8108493a0d3b6..d54e6dbcab8fc 100644
--- a/datafusion/physical-optimizer/src/lib.rs
+++ b/datafusion/physical-optimizer/src/lib.rs
@@ -18,6 +18,7 @@
 #![deny(clippy::clone_on_ref_ptr)]
 
 pub mod aggregate_statistics;
+pub mod limit_pushdown;
 mod optimizer;
 pub mod output_requirements;
 
diff --git a/datafusion/physical-optimizer/src/limit_pushdown.rs b/datafusion/physical-optimizer/src/limit_pushdown.rs
new file mode 100644
index 0000000000000..2b787980585a5
--- /dev/null
+++ b/datafusion/physical-optimizer/src/limit_pushdown.rs
@@ -0,0 +1,253 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`LimitPushdown`] pushes `LIMIT` down through `ExecutionPlan`s to reduce
+//! data transfer as much as possible.
+
+use std::fmt::Debug;
+use std::sync::Arc;
+
+use crate::PhysicalOptimizerRule;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::plan_datafusion_err;
+use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+use datafusion_common::utils::combine_limit;
+use datafusion_common::Result;
+use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+use datafusion_physical_plan::ExecutionPlan;
+
+/// This rule inspects [`ExecutionPlan`]'s and pushes down the fetch limit from
+/// the parent to the child if applicable.
+#[derive(Default)]
+pub struct LimitPushdown {}
+
+impl LimitPushdown {
+    #[allow(missing_docs)]
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl PhysicalOptimizerRule for LimitPushdown {
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        _config: &ConfigOptions,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        plan.transform_down(push_down_limits).data()
+    }
+
+    fn name(&self) -> &str {
+        "LimitPushdown"
+    }
+
+    fn schema_check(&self) -> bool {
+        true
+    }
+}
+
+/// This enumeration makes `skip` and `fetch` calculations easier by providing
+/// a single API for both local and global limit operators.
+#[derive(Debug)]
+enum LimitExec {
+    Global(GlobalLimitExec),
+    Local(LocalLimitExec),
+}
+
+impl LimitExec {
+    fn input(&self) -> &Arc<dyn ExecutionPlan> {
+        match self {
+            Self::Global(global) => global.input(),
+            Self::Local(local) => local.input(),
+        }
+    }
+
+    fn fetch(&self) -> Option<usize> {
+        match self {
+            Self::Global(global) => global.fetch(),
+            Self::Local(local) => Some(local.fetch()),
+        }
+    }
+
+    fn skip(&self) -> usize {
+        match self {
+            Self::Global(global) => global.skip(),
+            Self::Local(_) => 0,
+        }
+    }
+
+    fn with_child(&self, child: Arc<dyn ExecutionPlan>) -> Self {
+        match self {
+            Self::Global(global) => {
+                Self::Global(GlobalLimitExec::new(child, global.skip(), global.fetch()))
+            }
+            Self::Local(local) => Self::Local(LocalLimitExec::new(child, local.fetch())),
+        }
+    }
+}
+
+impl From<LimitExec> for Arc<dyn ExecutionPlan> {
+    fn from(limit_exec: LimitExec) -> Self {
+        match limit_exec {
+            LimitExec::Global(global) => Arc::new(global),
+            LimitExec::Local(local) => Arc::new(local),
+        }
+    }
+}
+
+/// Pushes down the limit through the plan.
+pub fn push_down_limits(
+    plan: Arc<dyn ExecutionPlan>,
+) -> Result<Transformed<Arc<dyn ExecutionPlan>>> {
+    let maybe_modified = if let Some(limit_exec) = extract_limit(&plan) {
+        let child = limit_exec.input();
+        if let Some(child_limit) = extract_limit(child) {
+            let merged = merge_limits(&limit_exec, &child_limit);
+            // Revisit current node in case of consecutive pushdowns
+            Some(push_down_limits(merged)?.data)
+        } else if child.supports_limit_pushdown() {
+            try_push_down_limit(&limit_exec, Arc::clone(child))?
+        } else {
+            add_fetch_to_child(&limit_exec, Arc::clone(child))
+        }
+    } else {
+        None
+    };
+
+    Ok(maybe_modified.map_or(Transformed::no(plan), Transformed::yes))
+}
+
+/// Transforms the [`ExecutionPlan`] into a [`LimitExec`] if it is a
+/// [`GlobalLimitExec`] or a [`LocalLimitExec`].
+fn extract_limit(plan: &Arc<dyn ExecutionPlan>) -> Option<LimitExec> {
+    if let Some(global_limit) = plan.as_any().downcast_ref::<GlobalLimitExec>() {
+        Some(LimitExec::Global(GlobalLimitExec::new(
+            Arc::clone(global_limit.input()),
+            global_limit.skip(),
+            global_limit.fetch(),
+        )))
+    } else {
+        plan.as_any()
+            .downcast_ref::<LocalLimitExec>()
+            .map(|local_limit| {
+                LimitExec::Local(LocalLimitExec::new(
+                    Arc::clone(local_limit.input()),
+                    local_limit.fetch(),
+                ))
+            })
+    }
+}
+
+/// Merge the limits of the parent and the child. If at least one of them is a
+/// [`GlobalLimitExec`], the result is also a [`GlobalLimitExec`]. Otherwise,
+/// the result is a [`LocalLimitExec`].
+fn merge_limits(
+    parent_limit_exec: &LimitExec,
+    child_limit_exec: &LimitExec,
+) -> Arc<dyn ExecutionPlan> {
+    // We can use the logic in `combine_limit` from the logical optimizer:
+    let (skip, fetch) = combine_limit(
+        parent_limit_exec.skip(),
+        parent_limit_exec.fetch(),
+        child_limit_exec.skip(),
+        child_limit_exec.fetch(),
+    );
+    match (parent_limit_exec, child_limit_exec) {
+        (LimitExec::Local(_), LimitExec::Local(_)) => {
+            // The fetch is present in this case, can unwrap.
+            Arc::new(LocalLimitExec::new(
+                Arc::clone(child_limit_exec.input()),
+                fetch.unwrap(),
+            ))
+        }
+        _ => Arc::new(GlobalLimitExec::new(
+            Arc::clone(child_limit_exec.input()),
+            skip,
+            fetch,
+        )),
+    }
+}
+
+/// Pushes down the limit through the child. If the child has a single input
+/// partition, simply swaps the parent and the child. Otherwise, adds a
+/// [`LocalLimitExec`] after in between in addition to swapping, because of
+/// multiple input partitions.
+fn try_push_down_limit(
+    limit_exec: &LimitExec,
+    child: Arc<dyn ExecutionPlan>,
+) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+    let grandchildren = child.children();
+    if let Some(&grandchild) = grandchildren.first() {
+        // GlobalLimitExec and LocalLimitExec must have an input after pushdown
+        if combines_input_partitions(&child) {
+            // We still need a LocalLimitExec after the child
+            if let Some(fetch) = limit_exec.fetch() {
+                let new_local_limit = Arc::new(LocalLimitExec::new(
+                    Arc::clone(grandchild),
+                    fetch + limit_exec.skip(),
+                ));
+                let new_child =
+                    Arc::clone(&child).with_new_children(vec![new_local_limit])?;
+                Ok(Some(limit_exec.with_child(new_child).into()))
+            } else {
+                Ok(None)
+            }
+        } else {
+            // Swap current with child
+            let new_limit = limit_exec.with_child(Arc::clone(grandchild));
+            let new_child = child.with_new_children(vec![new_limit.into()])?;
+            Ok(Some(new_child))
+        }
+    } else {
+        // Operators supporting limit push down must have a child.
+        Err(plan_datafusion_err!(
+            "{:#?} must have a child to push down limit",
+            child
+        ))
+    }
+}
+
+fn combines_input_partitions(exec: &Arc<dyn ExecutionPlan>) -> bool {
+    let exec = exec.as_any();
+    exec.is::<CoalescePartitionsExec>() || exec.is::<SortPreservingMergeExec>()
+}
+
+/// Transforms child to the fetching version if supported. Removes the parent if
+/// skip is zero. Otherwise, keeps the parent.
+fn add_fetch_to_child(
+    limit_exec: &LimitExec,
+    child: Arc<dyn ExecutionPlan>,
+) -> Option<Arc<dyn ExecutionPlan>> {
+    let fetch = limit_exec.fetch();
+    let skip = limit_exec.skip();
+
+    let child_fetch = fetch.map(|f| f + skip);
+
+    if let Some(child_with_fetch) = child.with_fetch(child_fetch) {
+        if skip > 0 {
+            Some(limit_exec.with_child(child_with_fetch).into())
+        } else {
+            Some(child_with_fetch)
+        }
+    } else {
+        None
+    }
+}
+
+// See tests in datafusion/core/tests/physical_optimizer
diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs
index f971d8f1f0aaf..fdfdd349e36e7 100644
--- a/datafusion/physical-optimizer/src/output_requirements.rs
+++ b/datafusion/physical-optimizer/src/output_requirements.rs
@@ -286,3 +286,5 @@ fn require_top_ordering_helper(
         Ok((plan, false))
     }
 }
+
+// See tests in datafusion/core/tests/physical_optimizer

From e66636d3652de00faf3c4c0e566d05528124baaf Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 12 Aug 2024 09:58:18 -0400
Subject: [PATCH 276/357] Minor: Improve comments in row_hash.rs for skipping
 aggregation (#11820)

* Improve comments in row_hash.rs for skipping aggregation

* Update datafusion/physical-plan/src/aggregates/row_hash.rs

Co-authored-by: Andy Grove <agrove@apache.org>

* Apply suggestions from code review

Co-authored-by: Eduard Karacharov <eduard.karacharov@gmail.com>

---------

Co-authored-by: Andy Grove <agrove@apache.org>
Co-authored-by: Eduard Karacharov <eduard.karacharov@gmail.com>
---
 .../physical-plan/src/aggregates/row_hash.rs  | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index ed3d6d49f9f3d..b3221752d0349 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -100,22 +100,24 @@ struct SpillState {
 ///
 /// See "partial aggregation" discussion on [`GroupedHashAggregateStream`]
 struct SkipAggregationProbe {
-    /// Number of processed input rows
+    /// Number of processed input rows (updated during probing)
     input_rows: usize,
-    /// Number of total group values for `input_rows`
+    /// Number of total group values for `input_rows` (updated during probing)
     num_groups: usize,
 
-    /// Aggregation ratio check should be performed only when the
-    /// number of input rows exceeds this threshold
+    /// Aggregation ratio check performed when the number of input rows exceeds
+    /// this threshold (from `SessionConfig`)
     probe_rows_threshold: usize,
-    /// Maximum allowed value of `input_rows` / `num_groups` to
-    /// continue aggregation
+    /// Maximum ratio of `num_groups` to `input_rows` for continuing aggregation
+    /// (from `SessionConfig`). If the ratio exceeds this value, aggregation
+    /// is skipped and input rows are directly converted to output
     probe_ratio_threshold: f64,
 
-    /// Flag indicating that further data aggregation mey be skipped
+    /// Flag indicating further data aggregation may be skipped (decision made
+    /// when probing complete)
     should_skip: bool,
-    /// Flag indicating that further updates of `SkipAggregationProbe`
-    /// state won't make any effect
+    /// Flag indicating further updates of `SkipAggregationProbe` state won't
+    /// make any effect (set either while probing or on probing completion)
     is_locked: bool,
 
     /// Number of rows where state was output without aggregation.

From 18193e6224603c92ce1ab16136ffcd926ca267b5 Mon Sep 17 00:00:00 2001
From: Dharan Aditya <dharan.aditya@gmail.com>
Date: Mon, 12 Aug 2024 19:56:47 +0530
Subject: [PATCH 277/357] chore: Add SessionState to MockContextProvider just
 like SessionContextProvider (#11940)

* refac: mock context provide to match public api

* lower udaf names

* cleanup

* typos

Co-authored-by: Jay Zhan <jayzhan211@gmail.com>

* more typos

Co-authored-by: Jay Zhan <jayzhan211@gmail.com>

* typos

* refactor func name

---------

Co-authored-by: Jay Zhan <jayzhan211@gmail.com>
---
 datafusion/sql/tests/cases/plan_to_sql.rs | 40 ++++++++++-------
 datafusion/sql/tests/common/mod.rs        | 52 ++++++++++++-----------
 datafusion/sql/tests/sql_integration.rs   | 52 +++++++++++++----------
 3 files changed, 83 insertions(+), 61 deletions(-)

diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 179fc108e6d20..ed23fada0cfb9 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -33,7 +33,7 @@ use datafusion_functions::core::planner::CoreFunctionPlanner;
 use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect};
 use sqlparser::parser::Parser;
 
-use crate::common::MockContextProvider;
+use crate::common::{MockContextProvider, MockSessionState};
 
 #[test]
 fn roundtrip_expr() {
@@ -59,8 +59,8 @@ fn roundtrip_expr() {
     let roundtrip = |table, sql: &str| -> Result<String> {
         let dialect = GenericDialect {};
         let sql_expr = Parser::new(&dialect).try_with_sql(sql)?.parse_expr()?;
-
-        let context = MockContextProvider::default().with_udaf(sum_udaf());
+        let state = MockSessionState::default().with_aggregate_function(sum_udaf());
+        let context = MockContextProvider { state };
         let schema = context.get_table_source(table)?.schema();
         let df_schema = DFSchema::try_from(schema.as_ref().clone())?;
         let sql_to_rel = SqlToRel::new(&context);
@@ -156,11 +156,11 @@ fn roundtrip_statement() -> Result<()> {
         let statement = Parser::new(&dialect)
             .try_with_sql(query)?
             .parse_statement()?;
-
-        let context = MockContextProvider::default()
-            .with_udaf(sum_udaf())
-            .with_udaf(count_udaf())
+        let state = MockSessionState::default()
+            .with_aggregate_function(sum_udaf())
+            .with_aggregate_function(count_udaf())
             .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
+        let context = MockContextProvider { state };
         let sql_to_rel = SqlToRel::new(&context);
         let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
@@ -189,8 +189,10 @@ fn roundtrip_crossjoin() -> Result<()> {
         .try_with_sql(query)?
         .parse_statement()?;
 
-    let context = MockContextProvider::default()
+    let state = MockSessionState::default()
         .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
+
+    let context = MockContextProvider { state };
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
@@ -412,10 +414,12 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
             .try_with_sql(query.sql)?
             .parse_statement()?;
 
-        let context = MockContextProvider::default()
-            .with_expr_planner(Arc::new(CoreFunctionPlanner::default()))
-            .with_udaf(max_udaf())
-            .with_udaf(min_udaf());
+        let state = MockSessionState::default()
+            .with_aggregate_function(max_udaf())
+            .with_aggregate_function(min_udaf())
+            .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
+
+        let context = MockContextProvider { state };
         let sql_to_rel = SqlToRel::new(&context);
         let plan = sql_to_rel
             .sql_statement_to_plan(statement)
@@ -443,7 +447,9 @@ fn test_unnest_logical_plan() -> Result<()> {
         .try_with_sql(query)?
         .parse_statement()?;
 
-    let context = MockContextProvider::default();
+    let context = MockContextProvider {
+        state: MockSessionState::default(),
+    };
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
@@ -516,7 +522,9 @@ fn test_pretty_roundtrip() -> Result<()> {
 
     let df_schema = DFSchema::try_from(schema)?;
 
-    let context = MockContextProvider::default();
+    let context = MockContextProvider {
+        state: MockSessionState::default(),
+    };
     let sql_to_rel = SqlToRel::new(&context);
 
     let unparser = Unparser::default().with_pretty(true);
@@ -589,7 +597,9 @@ fn sql_round_trip(query: &str, expect: &str) {
         .parse_statement()
         .unwrap();
 
-    let context = MockContextProvider::default();
+    let context = MockContextProvider {
+        state: MockSessionState::default(),
+    };
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs
index 374aa9db6714a..fe0e5f7283a47 100644
--- a/datafusion/sql/tests/common/mod.rs
+++ b/datafusion/sql/tests/common/mod.rs
@@ -50,36 +50,40 @@ impl Display for MockCsvType {
 }
 
 #[derive(Default)]
-pub(crate) struct MockContextProvider {
-    options: ConfigOptions,
-    udfs: HashMap<String, Arc<ScalarUDF>>,
-    udafs: HashMap<String, Arc<AggregateUDF>>,
+pub(crate) struct MockSessionState {
+    scalar_functions: HashMap<String, Arc<ScalarUDF>>,
+    aggregate_functions: HashMap<String, Arc<AggregateUDF>>,
     expr_planners: Vec<Arc<dyn ExprPlanner>>,
+    pub config_options: ConfigOptions,
 }
 
-impl MockContextProvider {
-    // Suppressing dead code warning, as this is used in integration test crates
-    #[allow(dead_code)]
-    pub(crate) fn options_mut(&mut self) -> &mut ConfigOptions {
-        &mut self.options
+impl MockSessionState {
+    pub fn with_expr_planner(mut self, expr_planner: Arc<dyn ExprPlanner>) -> Self {
+        self.expr_planners.push(expr_planner);
+        self
     }
 
-    #[allow(dead_code)]
-    pub(crate) fn with_udf(mut self, udf: ScalarUDF) -> Self {
-        self.udfs.insert(udf.name().to_string(), Arc::new(udf));
+    pub fn with_scalar_function(mut self, scalar_function: Arc<ScalarUDF>) -> Self {
+        self.scalar_functions
+            .insert(scalar_function.name().to_string(), scalar_function);
         self
     }
 
-    pub(crate) fn with_udaf(mut self, udaf: Arc<AggregateUDF>) -> Self {
+    pub fn with_aggregate_function(
+        mut self,
+        aggregate_function: Arc<AggregateUDF>,
+    ) -> Self {
         // TODO: change to to_string() if all the function name is converted to lowercase
-        self.udafs.insert(udaf.name().to_lowercase(), udaf);
+        self.aggregate_functions.insert(
+            aggregate_function.name().to_string().to_lowercase(),
+            aggregate_function,
+        );
         self
     }
+}
 
-    pub(crate) fn with_expr_planner(mut self, planner: Arc<dyn ExprPlanner>) -> Self {
-        self.expr_planners.push(planner);
-        self
-    }
+pub(crate) struct MockContextProvider {
+    pub(crate) state: MockSessionState,
 }
 
 impl ContextProvider for MockContextProvider {
@@ -202,11 +206,11 @@ impl ContextProvider for MockContextProvider {
     }
 
     fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
-        self.udfs.get(name).cloned()
+        self.state.scalar_functions.get(name).cloned()
     }
 
     fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
-        self.udafs.get(name).cloned()
+        self.state.aggregate_functions.get(name).cloned()
     }
 
     fn get_variable_type(&self, _: &[String]) -> Option<DataType> {
@@ -218,7 +222,7 @@ impl ContextProvider for MockContextProvider {
     }
 
     fn options(&self) -> &ConfigOptions {
-        &self.options
+        &self.state.config_options
     }
 
     fn get_file_type(
@@ -237,11 +241,11 @@ impl ContextProvider for MockContextProvider {
     }
 
     fn udf_names(&self) -> Vec<String> {
-        self.udfs.keys().cloned().collect()
+        self.state.scalar_functions.keys().cloned().collect()
     }
 
     fn udaf_names(&self) -> Vec<String> {
-        self.udafs.keys().cloned().collect()
+        self.state.aggregate_functions.keys().cloned().collect()
     }
 
     fn udwf_names(&self) -> Vec<String> {
@@ -249,7 +253,7 @@ impl ContextProvider for MockContextProvider {
     }
 
     fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
-        &self.expr_planners
+        &self.state.expr_planners
     }
 }
 
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 4d7e60805657c..5a0317c47c85a 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -41,6 +41,7 @@ use datafusion_sql::{
     planner::{ParserOptions, SqlToRel},
 };
 
+use crate::common::MockSessionState;
 use datafusion_functions::core::planner::CoreFunctionPlanner;
 use datafusion_functions_aggregate::{
     approx_median::approx_median_udaf, count::count_udaf, min_max::max_udaf,
@@ -1495,8 +1496,9 @@ fn recursive_ctes_disabled() {
         select * from numbers;";
 
     // manually setting up test here so that we can disable recursive ctes
-    let mut context = MockContextProvider::default();
-    context.options_mut().execution.enable_recursive_ctes = false;
+    let mut state = MockSessionState::default();
+    state.config_options.execution.enable_recursive_ctes = false;
+    let context = MockContextProvider { state };
 
     let planner = SqlToRel::new_with_options(&context, ParserOptions::default());
     let result = DFParser::parse_sql_with_dialect(sql, &GenericDialect {});
@@ -2727,7 +2729,8 @@ fn logical_plan_with_options(sql: &str, options: ParserOptions) -> Result<Logica
 }
 
 fn logical_plan_with_dialect(sql: &str, dialect: &dyn Dialect) -> Result<LogicalPlan> {
-    let context = MockContextProvider::default().with_udaf(sum_udaf());
+    let state = MockSessionState::default().with_aggregate_function(sum_udaf());
+    let context = MockContextProvider { state };
     let planner = SqlToRel::new(&context);
     let result = DFParser::parse_sql_with_dialect(sql, dialect);
     let mut ast = result?;
@@ -2739,39 +2742,44 @@ fn logical_plan_with_dialect_and_options(
     dialect: &dyn Dialect,
     options: ParserOptions,
 ) -> Result<LogicalPlan> {
-    let context = MockContextProvider::default()
-        .with_udf(unicode::character_length().as_ref().clone())
-        .with_udf(string::concat().as_ref().clone())
-        .with_udf(make_udf(
+    let state = MockSessionState::default()
+        .with_scalar_function(Arc::new(unicode::character_length().as_ref().clone()))
+        .with_scalar_function(Arc::new(string::concat().as_ref().clone()))
+        .with_scalar_function(Arc::new(make_udf(
             "nullif",
             vec![DataType::Int32, DataType::Int32],
             DataType::Int32,
-        ))
-        .with_udf(make_udf(
+        )))
+        .with_scalar_function(Arc::new(make_udf(
             "round",
             vec![DataType::Float64, DataType::Int64],
             DataType::Float32,
-        ))
-        .with_udf(make_udf(
+        )))
+        .with_scalar_function(Arc::new(make_udf(
             "arrow_cast",
             vec![DataType::Int64, DataType::Utf8],
             DataType::Float64,
-        ))
-        .with_udf(make_udf(
+        )))
+        .with_scalar_function(Arc::new(make_udf(
             "date_trunc",
             vec![DataType::Utf8, DataType::Timestamp(Nanosecond, None)],
             DataType::Int32,
-        ))
-        .with_udf(make_udf("sqrt", vec![DataType::Int64], DataType::Int64))
-        .with_udaf(sum_udaf())
-        .with_udaf(approx_median_udaf())
-        .with_udaf(count_udaf())
-        .with_udaf(avg_udaf())
-        .with_udaf(min_udaf())
-        .with_udaf(max_udaf())
-        .with_udaf(grouping_udaf())
+        )))
+        .with_scalar_function(Arc::new(make_udf(
+            "sqrt",
+            vec![DataType::Int64],
+            DataType::Int64,
+        )))
+        .with_aggregate_function(sum_udaf())
+        .with_aggregate_function(approx_median_udaf())
+        .with_aggregate_function(count_udaf())
+        .with_aggregate_function(avg_udaf())
+        .with_aggregate_function(min_udaf())
+        .with_aggregate_function(max_udaf())
+        .with_aggregate_function(grouping_udaf())
         .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
 
+    let context = MockContextProvider { state };
     let planner = SqlToRel::new_with_options(&context, options);
     let result = DFParser::parse_sql_with_dialect(sql, dialect);
     let mut ast = result?;

From ffdc61d33501515efceb05b022e852c289d0782a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 12 Aug 2024 12:56:37 -0400
Subject: [PATCH 278/357] Update labeler.yml to match crates (#11937)

---
 .github/workflows/dev_pr/labeler.yml | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index 308abd1688a6d..4e44e47f59689 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -33,16 +33,37 @@ logical-expr:
 
 physical-expr:
 - changed-files:
-  - any-glob-to-any-file: ['datafusion/physical-expr/**/*']
+  - any-glob-to-any-file: ['datafusion/physical-expr/**/*', 'datafusion/physical-expr-common/**/*', 'datafusion/physical-expr-aggregate/**/*', 'datafusion/physical-plan/**/*']
+
+catalog:
+  - changed-files:
+      - any-glob-to-any-file: ['datafusion/catalog/**/*']
+
+common:
+  - changed-files:
+      - any-glob-to-any-file: ['datafusion/common/**/*', 'datafusion/common-runtime/**/*']
+
+execution:
+  - changed-files:
+      - any-glob-to-any-file: ['datafusion/execution/**/*']
+
+functions:
+  - changed-files:
+      - any-glob-to-any-file: ['datafusion/functions/**/*', 'datafusion/functions-aggregate/**/*', 'datafusion/functions-aggregate-common', 'datafusion/functions-nested']
+
 
 optimizer:
 - changed-files:
-  - any-glob-to-any-file: ['datafusion/optimizer/**/*']
+  - any-glob-to-any-file: ['datafusion/optimizer/**/*', 'datafusion/physical-optimizer/**/*']
 
 core:
 - changed-files:
   - any-glob-to-any-file: ['datafusion/core/**/*']
 
+proto:
+  - changed-files:
+      - any-glob-to-any-file: ['datafusion/proto/**/*', 'datafusion/proto-common/**/*']
+
 substrait:
 - changed-files:
   - any-glob-to-any-file: ['datafusion/substrait/**/*']

From 140f7cec78febd73d3db537a816badaaf567530a Mon Sep 17 00:00:00 2001
From: Samuel Colvin <s@muelcolvin.com>
Date: Mon, 12 Aug 2024 20:02:50 +0100
Subject: [PATCH 279/357] Support tuples as types (#11896)

* support tuples as types

* use compare_op_for_nested

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../expr-common/src/type_coercion/binary.rs   | 28 +++++++++++-
 .../physical-expr/src/expressions/in_list.rs  | 10 +++--
 datafusion/sql/src/expr/mod.rs                | 20 ++++++++-
 datafusion/sqllogictest/test_files/struct.slt | 44 +++++++++++++++++--
 4 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 05e365a0b9885..251ac6cb8c0e2 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -25,7 +25,7 @@ use crate::operator::Operator;
 use arrow::array::{new_empty_array, Array};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{
-    DataType, Field, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
+    DataType, Field, FieldRef, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
     DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
 };
 use datafusion_common::{exec_datafusion_err, plan_datafusion_err, plan_err, Result};
@@ -498,6 +498,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
         .or_else(|| string_numeric_coercion(lhs_type, rhs_type))
         .or_else(|| string_temporal_coercion(lhs_type, rhs_type))
         .or_else(|| binary_coercion(lhs_type, rhs_type))
+        .or_else(|| struct_coercion(lhs_type, rhs_type))
 }
 
 /// Coerce `lhs_type` and `rhs_type` to a common type for value exprs
@@ -780,6 +781,31 @@ fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option<DataType
     }
 }
 
+fn struct_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+    match (lhs_type, rhs_type) {
+        (Struct(lhs_fields), Struct(rhs_fields)) => {
+            if lhs_fields.len() != rhs_fields.len() {
+                return None;
+            }
+
+            let types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter())
+                .map(|(lhs, rhs)| comparison_coercion(lhs.data_type(), rhs.data_type()))
+                .collect::<Option<Vec<DataType>>>()?;
+
+            let fields = types
+                .into_iter()
+                .enumerate()
+                .map(|(i, datatype)| {
+                    Arc::new(Field::new(format!("c{i}"), datatype, true))
+                })
+                .collect::<Vec<FieldRef>>();
+            Some(Struct(fields.into()))
+        }
+        _ => None,
+    }
+}
+
 /// Returns the output type of applying mathematics operations such as
 /// `+` to arguments of `lhs_type` and `rhs_type`.
 fn mathematics_numerical_coercion(
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index 8a3885030b9df..dfc70551ccf6e 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -28,7 +28,6 @@ use crate::PhysicalExpr;
 use arrow::array::*;
 use arrow::buffer::BooleanBuffer;
 use arrow::compute::kernels::boolean::{not, or_kleene};
-use arrow::compute::kernels::cmp::eq;
 use arrow::compute::take;
 use arrow::datatypes::*;
 use arrow::util::bit_iterator::BitIndexIterator;
@@ -41,7 +40,8 @@ use datafusion_common::hash_utils::HashValue;
 use datafusion_common::{
     exec_err, internal_err, not_impl_err, DFSchema, Result, ScalarValue,
 };
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ColumnarValue, Operator};
+use datafusion_physical_expr_common::datum::compare_op_for_nested;
 
 use ahash::RandomState;
 use hashbrown::hash_map::RawEntryMut;
@@ -361,7 +361,11 @@ impl PhysicalExpr for InListExpr {
                     |result, expr| -> Result<BooleanArray> {
                         Ok(or_kleene(
                             &result,
-                            &eq(&value, &expr?.into_array(num_rows)?)?,
+                            &compare_op_for_nested(
+                                Operator::Eq,
+                                &value,
+                                &expr?.into_array(num_rows)?,
+                            )?,
                         )?)
                     },
                 )?;
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index edb0002842a8f..f2b4e0b4e43d5 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -661,6 +661,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 }
                 not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}")
             }
+            SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values),
             _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
         }
     }
@@ -670,7 +671,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         &self,
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
-        values: Vec<sqlparser::ast::Expr>,
+        values: Vec<SQLExpr>,
         fields: Vec<StructField>,
     ) -> Result<Expr> {
         if !fields.is_empty() {
@@ -695,6 +696,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         not_impl_err!("Struct not supported by ExprPlanner: {create_struct_args:?}")
     }
 
+    fn parse_tuple(
+        &self,
+        schema: &DFSchema,
+        planner_context: &mut PlannerContext,
+        values: Vec<SQLExpr>,
+    ) -> Result<Expr> {
+        match values.first() {
+            Some(SQLExpr::Identifier(_)) | Some(SQLExpr::Value(_)) => {
+                self.parse_struct(schema, planner_context, values, vec![])
+            }
+            None => not_impl_err!("Empty tuple not supported yet"),
+            _ => {
+                not_impl_err!("Only identifiers and literals are supported in tuples")
+            }
+        }
+    }
+
     fn sql_position_to_expr(
         &self,
         substr_expr: SQLExpr,
diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt
index caa612f556fed..5c66bca1e0c2f 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -218,9 +218,6 @@ select named_struct('field_a', 1, 'field_b', 2);
 ----
 {field_a: 1, field_b: 2}
 
-statement ok
-drop table values;
-
 query T
 select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3));
 ----
@@ -236,3 +233,44 @@ query ?
 select {'animal': {'cat': 1, 'dog': 2, 'bird': {'parrot': 3, 'canary': 1}}, 'genre': {'fiction': ['mystery', 'sci-fi', 'fantasy'], 'non-fiction': {'biography': 5, 'history': 7, 'science': {'physics': 2, 'biology': 3}}}, 'vehicle': {'car': {'sedan': 4, 'suv': 2}, 'bicycle': 3, 'boat': ['sailboat', 'motorboat']}, 'weather': {'sunny': True, 'temperature': 25.5, 'wind': {'speed': 10, 'direction': 'NW'}}};
 ----
 {animal: {cat: 1, dog: 2, bird: {parrot: 3, canary: 1}}, genre: {fiction: [mystery, sci-fi, fantasy], non-fiction: {biography: 5, history: 7, science: {physics: 2, biology: 3}}}, vehicle: {car: {sedan: 4, suv: 2}, bicycle: 3, boat: [sailboat, motorboat]}, weather: {sunny: true, temperature: 25.5, wind: {speed: 10, direction: NW}}}
+
+# test tuple as struct
+query B
+select ('x', 'y') = ('x', 'y');
+----
+true
+
+query B
+select ('x', 'y') = ('y', 'x');
+----
+false
+
+query error DataFusion error: Error during planning: Cannot infer common argument type for comparison operation Struct.*
+select ('x', 'y') = ('x', 'y', 'z');
+
+query B
+select ('x', 'y') IN (('x', 'y'));
+----
+true
+
+query B
+select ('x', 'y') IN (('x', 'y'), ('y', 'x'));
+----
+true
+
+query I
+select a from values where (a, c) = (1, 'a');
+----
+1
+
+query I
+select a from values where (a, c) IN ((1, 'a'), (2, 'b'));
+----
+1
+2
+
+statement ok
+drop table values;
+
+statement ok
+drop table struct_values;

From 00ef8204707b158c0e086506bd7d9d9dd3be5a6f Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 12 Aug 2024 17:34:11 -0400
Subject: [PATCH 280/357] Support `convert_to_state` for `AVG` accumulator
 (#11734)

* Support `convert_to_state` for `AVG` accumulator

* Update datafusion/physical-expr-common/src/aggregate/groups_accumulator/nulls.rs

* fix documentation

* Fix after merge

* fix for change in location
---
 .../src/aggregate/groups_accumulator.rs       |  1 +
 .../src/aggregate/groups_accumulator/nulls.rs | 93 +++++++++++++++++++
 datafusion/functions-aggregate/src/average.rs | 32 ++++++-
 .../test_files/aggregate_skip_partial.slt     | 29 ++++++
 4 files changed, 154 insertions(+), 1 deletion(-)
 create mode 100644 datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs

diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
index 644221edd04db..3984b02c5fbbb 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
@@ -20,6 +20,7 @@
 
 pub mod accumulate;
 pub mod bool_op;
+pub mod nulls;
 pub mod prim_op;
 
 use arrow::{
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs
new file mode 100644
index 0000000000000..25212f7f0f5ff
--- /dev/null
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`set_nulls`], and [`filtered_null_mask`], utilities for working with nulls
+
+use arrow::array::{Array, ArrowNumericType, BooleanArray, PrimitiveArray};
+use arrow::buffer::NullBuffer;
+
+/// Sets the validity mask for a `PrimitiveArray` to `nulls`
+/// replacing any existing null mask
+pub fn set_nulls<T: ArrowNumericType + Send>(
+    array: PrimitiveArray<T>,
+    nulls: Option<NullBuffer>,
+) -> PrimitiveArray<T> {
+    let (dt, values, _old_nulls) = array.into_parts();
+    PrimitiveArray::<T>::new(values, nulls).with_data_type(dt)
+}
+
+/// Converts a `BooleanBuffer` representing a filter to a `NullBuffer.
+///
+/// The `NullBuffer` is
+/// * `true` (representing valid) for values that were `true` in filter
+/// * `false` (representing null) for values that were `false` or `null` in filter
+fn filter_to_nulls(filter: &BooleanArray) -> Option<NullBuffer> {
+    let (filter_bools, filter_nulls) = filter.clone().into_parts();
+    let filter_bools = NullBuffer::from(filter_bools);
+    NullBuffer::union(Some(&filter_bools), filter_nulls.as_ref())
+}
+
+/// Compute an output validity mask for an array that has been filtered
+///
+/// This can be used to compute nulls for the output of
+/// [`GroupsAccumulator::convert_to_state`], which quickly applies an optional
+/// filter to the input rows by setting any filtered rows to NULL in the output.
+/// Subsequent applications of  aggregate functions that ignore NULLs (most of
+/// them) will thus ignore the filtered rows as well.
+///
+/// # Output element is `true` (and thus output is non-null)
+///
+/// A `true` in the output represents non null output for all values that were *both*:
+///
+/// * `true` in any `opt_filter` (aka values that passed the filter)
+///
+/// * `non null` in `input`
+///
+/// # Output element is `false` (and thus output is null)
+///
+/// A `false` in the output represents an input that was *either*:
+///
+/// * `null`
+///
+/// * filtered (aka the value was `false` or `null` in the filter)
+///
+/// # Example
+///
+/// ```text
+/// ┌─────┐           ┌─────┐            ┌─────┐
+/// │true │           │NULL │            │false│
+/// │true │    │      │true │            │true │
+/// │true │ ───┼───   │false│  ────────▶ │false│       filtered_nulls
+/// │false│    │      │NULL │            │false│
+/// │false│           │true │            │false│
+/// └─────┘           └─────┘            └─────┘
+/// array           opt_filter           output
+///  .nulls()
+///
+/// false = NULL    true  = pass          false = NULL       Meanings
+/// true  = valid   false = filter        true  = valid
+///                 NULL  = filter
+/// ```
+///
+/// [`GroupsAccumulator::convert_to_state`]: datafusion_expr_common::groups_accumulator::GroupsAccumulator
+pub fn filtered_null_mask(
+    opt_filter: Option<&BooleanArray>,
+    input: &dyn Array,
+) -> Option<NullBuffer> {
+    let opt_filter = opt_filter.and_then(filter_to_nulls);
+    NullBuffer::union(opt_filter.as_ref(), input.nulls())
+}
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index 1be3cd6b07146..ddad76a8734b0 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -19,8 +19,9 @@
 
 use arrow::array::{
     self, Array, ArrayRef, ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType,
-    AsArray, PrimitiveArray, PrimitiveBuilder, UInt64Array,
+    AsArray, BooleanArray, PrimitiveArray, PrimitiveBuilder, UInt64Array,
 };
+
 use arrow::compute::sum;
 use arrow::datatypes::{
     i256, ArrowNativeType, DataType, Decimal128Type, Decimal256Type, DecimalType, Field,
@@ -34,7 +35,12 @@ use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
     Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature,
 };
+
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::{
+    filtered_null_mask, set_nulls,
+};
+
 use datafusion_functions_aggregate_common::utils::DecimalAverager;
 use log::debug;
 use std::any::Any;
@@ -551,6 +557,30 @@ where
         Ok(())
     }
 
+    fn convert_to_state(
+        &self,
+        values: &[ArrayRef],
+        opt_filter: Option<&BooleanArray>,
+    ) -> Result<Vec<ArrayRef>> {
+        let sums = values[0]
+            .as_primitive::<T>()
+            .clone()
+            .with_data_type(self.sum_data_type.clone());
+        let counts = UInt64Array::from_value(1, sums.len());
+
+        let nulls = filtered_null_mask(opt_filter, &sums);
+
+        // set nulls on the arrays
+        let counts = set_nulls(counts, nulls.clone());
+        let sums = set_nulls(sums, nulls);
+
+        Ok(vec![Arc::new(counts) as ArrayRef, Arc::new(sums)])
+    }
+
+    fn supports_convert_to_state(&self) -> bool {
+        true
+    }
+
     fn size(&self) -> usize {
         self.counts.capacity() * std::mem::size_of::<u64>()
             + self.sums.capacity() * std::mem::size_of::<T>()
diff --git a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
index 6c0cf5f800d89..ba378f4230f89 100644
--- a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
+++ b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
@@ -209,6 +209,21 @@ SELECT c2, sum(c3), sum(c11) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2;
 4 29 9.531112968922
 5 -194 7.074412226677
 
+# Test avg for tinyint / float
+query TRR
+SELECT
+  c1,
+  avg(c2),
+  avg(c11)
+FROM aggregate_test_100 GROUP BY c1 ORDER BY c1;
+----
+a 2.857142857143 0.438223421574
+b 3.263157894737 0.496481208425
+c 2.666666666667 0.425241138254
+d 2.444444444444 0.541519476308
+e 3 0.505440263521
+
+
 # Enabling PG dialect for filtered aggregates tests
 statement ok
 set datafusion.sql_parser.dialect = 'Postgres';
@@ -267,6 +282,20 @@ FROM aggregate_test_100_null GROUP BY c2 ORDER BY c2;
 4 11 14
 5 8 7
 
+# Test avg for tinyint / float
+query TRR
+SELECT
+  c1,
+  avg(c2) FILTER (WHERE c2 != 5),
+  avg(c11) FILTER (WHERE c2 != 5)
+FROM aggregate_test_100 GROUP BY c1 ORDER BY c1;
+----
+a 2.5 0.449071887467    
+b 2.642857142857 0.445486298629
+c 2.421052631579 0.422882117723
+d 2.125 0.518706191331
+e 2.789473684211 0.536785323369
+
 # Test count with nullable fields and nullable filter
 query III
 SELECT c2,

From 7a1a23d5f50cd113613cf54ee5346f0e83d1edc4 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Mon, 12 Aug 2024 15:57:40 -0600
Subject: [PATCH 281/357] minor: Update release documentation based on 41.0.0
 release  (#11947)

* fix release issues

* remove out of date dependency diagram
---
 datafusion/catalog/Cargo.toml |   1 +
 dev/release/README.md         |  17 +-
 dev/release/crate-deps.dot    |  91 -------
 dev/release/crate-deps.svg    | 445 ----------------------------------
 dev/release/release-crates.sh |  45 ----
 5 files changed, 4 insertions(+), 595 deletions(-)
 delete mode 100644 dev/release/crate-deps.dot
 delete mode 100644 dev/release/crate-deps.svg
 delete mode 100644 dev/release/release-crates.sh

diff --git a/datafusion/catalog/Cargo.toml b/datafusion/catalog/Cargo.toml
index 2ebca511c5c87..ff28d8e0c64a6 100644
--- a/datafusion/catalog/Cargo.toml
+++ b/datafusion/catalog/Cargo.toml
@@ -17,6 +17,7 @@
 
 [package]
 name = "datafusion-catalog"
+description = "datafusion-catalog"
 authors.workspace = true
 edition.workspace = true
 homepage.workspace = true
diff --git a/dev/release/README.md b/dev/release/README.md
index 1817b3002578a..397369a41aa33 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -256,20 +256,7 @@ to all of the DataFusion crates.
 Download and unpack the official release tarball
 
 Verify that the Cargo.toml in the tarball contains the correct version
-(e.g. `version = "38.0.0"`) and then publish the crates by running the script `release-crates.sh`
-in a directory extracted from the source tarball that was voted on. Note that this script doesn't
-work if run in a Git repo.
-
-Alternatively the crates can be published one at a time with the following commands. Crates need to be
-published in the correct order as shown in this diagram.
-
-![](crate-deps.svg)
-
-_To update this diagram, manually edit the dependencies in [crate-deps.dot](crate-deps.dot) and then run:_
-
-```shell
-dot -Tsvg dev/release/crate-deps.dot > dev/release/crate-deps.svg
-```
+(e.g. `version = "38.0.0"`) and then publish the crates by running the following commands
 
 ```shell
 (cd datafusion/common && cargo publish)
@@ -283,7 +270,9 @@ dot -Tsvg dev/release/crate-deps.dot > dev/release/crate-deps.svg
 (cd datafusion/sql && cargo publish)
 (cd datafusion/optimizer && cargo publish)
 (cd datafusion/common-runtime && cargo publish)
+(cd datafusion/catalog && cargo publish)
 (cd datafusion/physical-plan && cargo publish)
+(cd datafusion/physical-optimizer && cargo publish)
 (cd datafusion/core && cargo publish)
 (cd datafusion/proto-common && cargo publish)
 (cd datafusion/proto && cargo publish)
diff --git a/dev/release/crate-deps.dot b/dev/release/crate-deps.dot
deleted file mode 100644
index 1d903a56021d1..0000000000000
--- a/dev/release/crate-deps.dot
+++ /dev/null
@@ -1,91 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-digraph G {
-        datafusion_examples
-        datafusion_examples -> datafusion
-        datafusion_examples -> datafusion_common
-        datafusion_examples -> datafusion_expr
-        datafusion_examples -> datafusion_optimizer
-        datafusion_examples -> datafusion_physical_expr
-        datafusion_examples -> datafusion_sql
-        datafusion_expr
-        datafusion_expr -> datafusion_common
-        datafusion_functions
-        datafusion_functions -> datafusion_common
-        datafusion_functions -> datafusion_execution
-        datafusion_functions -> datafusion_expr
-        datafusion_wasmtest
-        datafusion_wasmtest -> datafusion
-        datafusion_wasmtest -> datafusion_common
-        datafusion_wasmtest -> datafusion_execution
-        datafusion_wasmtest -> datafusion_expr
-        datafusion_wasmtest -> datafusion_optimizer
-        datafusion_wasmtest -> datafusion_physical_expr
-        datafusion_wasmtest -> datafusion_physical_plan
-        datafusion_wasmtest -> datafusion_sql
-        datafusion_common
-        datafusion_sql
-        datafusion_sql -> datafusion_common
-        datafusion_sql -> datafusion_expr
-        datafusion_physical_plan
-        datafusion_physical_plan -> datafusion_common
-        datafusion_physical_plan -> datafusion_execution
-        datafusion_physical_plan -> datafusion_expr
-        datafusion_physical_plan -> datafusion_physical_expr
-        datafusion_benchmarks
-        datafusion_benchmarks -> datafusion
-        datafusion_benchmarks -> datafusion_common
-        datafusion_benchmarks -> datafusion_proto
-        datafusion_docs_tests
-        datafusion_docs_tests -> datafusion
-        datafusion_optimizer
-        datafusion_optimizer -> datafusion_common
-        datafusion_optimizer -> datafusion_expr
-        datafusion_optimizer -> datafusion_physical_expr
-        datafusion_optimizer -> datafusion_sql
-        datafusion_proto
-        datafusion_proto -> datafusion
-        datafusion_proto -> datafusion_common
-        datafusion_proto -> datafusion_expr
-        datafusion_physical_expr
-        datafusion_physical_expr -> datafusion_common
-        datafusion_physical_expr -> datafusion_execution
-        datafusion_physical_expr -> datafusion_expr
-        datafusion_sqllogictest
-        datafusion_sqllogictest -> datafusion
-        datafusion_sqllogictest -> datafusion_common
-        datafusion
-        datafusion -> datafusion_common
-        datafusion -> datafusion_execution
-        datafusion -> datafusion_expr
-        datafusion -> datafusion_functions
-        datafusion -> datafusion_functions_nested
-        datafusion -> datafusion_optimizer
-        datafusion -> datafusion_physical_expr
-        datafusion -> datafusion_physical_plan
-        datafusion -> datafusion_sql
-        datafusion_functions_nested
-        datafusion_functions_nested -> datafusion_common
-        datafusion_functions_nested -> datafusion_execution
-        datafusion_functions_nested -> datafusion_expr
-        datafusion_execution
-        datafusion_execution -> datafusion_common
-        datafusion_execution -> datafusion_expr
-        datafusion_substrait
-        datafusion_substrait -> datafusion
-}
\ No newline at end of file
diff --git a/dev/release/crate-deps.svg b/dev/release/crate-deps.svg
deleted file mode 100644
index c76fe3abb4acd..0000000000000
--- a/dev/release/crate-deps.svg
+++ /dev/null
@@ -1,445 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
- "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<!-- Generated by graphviz version 2.43.0 (0)
- -->
-<!-- Title: G Pages: 1 -->
-<svg width="1695pt" height="548pt"
- viewBox="0.00 0.00 1695.02 548.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 544)">
-<title>G</title>
-<polygon fill="white" stroke="transparent" points="-4,4 -4,-544 1691.02,-544 1691.02,4 -4,4"/>
-<!-- datafusion_examples -->
-<g id="node1" class="node">
-<title>datafusion_examples</title>
-<ellipse fill="none" stroke="black" cx="144.38" cy="-450" rx="107.78" ry="18"/>
-<text text-anchor="middle" x="144.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_examples</text>
-</g>
-<!-- datafusion -->
-<g id="node2" class="node">
-<title>datafusion</title>
-<ellipse fill="none" stroke="black" cx="974.38" cy="-378" rx="59.59" ry="18"/>
-<text text-anchor="middle" x="974.38" y="-374.3" font-family="Times,serif" font-size="14.00">datafusion</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion -->
-<g id="edge1" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M238.38,-441.07C407.7,-426.79 759.98,-397.08 907.32,-384.66"/>
-<polygon fill="black" stroke="black" points="907.9,-388.12 917.57,-383.79 907.31,-381.14 907.9,-388.12"/>
-</g>
-<!-- datafusion_common -->
-<g id="node3" class="node">
-<title>datafusion_common</title>
-<ellipse fill="none" stroke="black" cx="817.38" cy="-18" rx="102.88" ry="18"/>
-<text text-anchor="middle" x="817.38" y="-14.3" font-family="Times,serif" font-size="14.00">datafusion_common</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion_common -->
-<g id="edge2" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M120.33,-432.33C89.06,-408.49 38.38,-361.45 38.38,-307 38.38,-307 38.38,-307 38.38,-161 38.38,-86.89 109.11,-95.52 179.38,-72 274.07,-40.31 550.14,-26.99 706.33,-21.87"/>
-<polygon fill="black" stroke="black" points="706.56,-25.37 716.44,-21.55 706.33,-18.37 706.56,-25.37"/>
-</g>
-<!-- datafusion_expr -->
-<g id="node4" class="node">
-<title>datafusion_expr</title>
-<ellipse fill="none" stroke="black" cx="636.38" cy="-90" rx="85.29" ry="18"/>
-<text text-anchor="middle" x="636.38" y="-86.3" font-family="Times,serif" font-size="14.00">datafusion_expr</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion_expr -->
-<g id="edge3" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M132.74,-431.83C106.54,-390.84 48.73,-285.5 95.38,-216 145.15,-141.87 406.15,-109.57 546.64,-97.38"/>
-<polygon fill="black" stroke="black" points="547.29,-100.84 556.96,-96.5 546.69,-93.86 547.29,-100.84"/>
-</g>
-<!-- datafusion_optimizer -->
-<g id="node5" class="node">
-<title>datafusion_optimizer</title>
-<ellipse fill="none" stroke="black" cx="327.38" cy="-306" rx="108.58" ry="18"/>
-<text text-anchor="middle" x="327.38" y="-302.3" font-family="Times,serif" font-size="14.00">datafusion_optimizer</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion_optimizer -->
-<g id="edge4" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion_optimizer</title>
-<path fill="none" stroke="black" d="M166.09,-432.15C198.84,-406.75 260.94,-358.56 297.79,-329.96"/>
-<polygon fill="black" stroke="black" points="299.95,-332.72 305.7,-323.82 295.65,-327.19 299.95,-332.72"/>
-</g>
-<!-- datafusion_physical_expr -->
-<g id="node6" class="node">
-<title>datafusion_physical_expr</title>
-<ellipse fill="none" stroke="black" cx="480.38" cy="-234" rx="127.28" ry="18"/>
-<text text-anchor="middle" x="480.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_physical_expr</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion_physical_expr -->
-<g id="edge5" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M214.78,-436.27C282.15,-420.68 382.57,-388.01 445.38,-324 461.94,-307.12 470.91,-281.48 475.61,-262.05"/>
-<polygon fill="black" stroke="black" points="479.03,-262.78 477.75,-252.26 472.19,-261.28 479.03,-262.78"/>
-</g>
-<!-- datafusion_sql -->
-<g id="node7" class="node">
-<title>datafusion_sql</title>
-<ellipse fill="none" stroke="black" cx="181.38" cy="-234" rx="77.19" ry="18"/>
-<text text-anchor="middle" x="181.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_sql</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion_sql -->
-<g id="edge6" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion_sql</title>
-<path fill="none" stroke="black" d="M139.73,-431.93C132.4,-401.83 120.71,-337.82 138.38,-288 142.11,-277.51 148.89,-267.58 155.98,-259.21"/>
-<polygon fill="black" stroke="black" points="158.67,-261.46 162.79,-251.7 153.48,-256.75 158.67,-261.46"/>
-</g>
-<!-- datafusion&#45;&gt;datafusion_common -->
-<g id="edge41" class="edge">
-<title>datafusion&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1032.05,-373.07C1164.93,-362.45 1481.38,-327.44 1481.38,-235 1481.38,-235 1481.38,-235 1481.38,-161 1481.38,-49.59 1119.38,-25.28 929.96,-20.19"/>
-<polygon fill="black" stroke="black" points="929.76,-16.69 919.68,-19.93 929.59,-23.69 929.76,-16.69"/>
-</g>
-<!-- datafusion&#45;&gt;datafusion_expr -->
-<g id="edge43" class="edge">
-<title>datafusion&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M955.24,-360.8C898.46,-312.76 730.77,-170.86 664.13,-114.48"/>
-<polygon fill="black" stroke="black" points="666.05,-111.52 656.16,-107.73 661.53,-116.86 666.05,-111.52"/>
-</g>
-<!-- datafusion&#45;&gt;datafusion_optimizer -->
-<g id="edge46" class="edge">
-<title>datafusion&#45;&gt;datafusion_optimizer</title>
-<path fill="none" stroke="black" d="M919.22,-371.03C809.73,-359.19 563.67,-332.56 425.78,-317.65"/>
-<polygon fill="black" stroke="black" points="425.96,-314.14 415.64,-316.55 425.2,-321.1 425.96,-314.14"/>
-</g>
-<!-- datafusion&#45;&gt;datafusion_physical_expr -->
-<g id="edge47" class="edge">
-<title>datafusion&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M914.47,-377.77C837.99,-376.73 702.92,-368 597.38,-324 561.14,-308.89 526.2,-279.79 504.09,-258.98"/>
-<polygon fill="black" stroke="black" points="506.46,-256.41 496.82,-252 501.61,-261.45 506.46,-256.41"/>
-</g>
-<!-- datafusion&#45;&gt;datafusion_sql -->
-<g id="edge49" class="edge">
-<title>datafusion&#45;&gt;datafusion_sql</title>
-<path fill="none" stroke="black" d="M914.48,-376.67C743.89,-375.21 264.27,-367.27 209.38,-324 190.86,-309.4 184.35,-282.61 182.18,-262.23"/>
-<polygon fill="black" stroke="black" points="185.67,-261.95 181.4,-252.25 178.69,-262.49 185.67,-261.95"/>
-</g>
-<!-- datafusion_functions -->
-<g id="node8" class="node">
-<title>datafusion_functions</title>
-<ellipse fill="none" stroke="black" cx="1019.38" cy="-234" rx="106.68" ry="18"/>
-<text text-anchor="middle" x="1019.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_functions</text>
-</g>
-<!-- datafusion&#45;&gt;datafusion_functions -->
-<g id="edge44" class="edge">
-<title>datafusion&#45;&gt;datafusion_functions</title>
-<path fill="none" stroke="black" d="M979.81,-359.87C987.51,-335.56 1001.69,-290.82 1010.82,-262.01"/>
-<polygon fill="black" stroke="black" points="1014.25,-262.78 1013.94,-252.19 1007.58,-260.66 1014.25,-262.78"/>
-</g>
-<!-- datafusion_execution -->
-<g id="node9" class="node">
-<title>datafusion_execution</title>
-<ellipse fill="none" stroke="black" cx="886.38" cy="-162" rx="108.58" ry="18"/>
-<text text-anchor="middle" x="886.38" y="-158.3" font-family="Times,serif" font-size="14.00">datafusion_execution</text>
-</g>
-<!-- datafusion&#45;&gt;datafusion_execution -->
-<g id="edge42" class="edge">
-<title>datafusion&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M969.67,-359.99C964.02,-341.35 953.41,-311.04 938.38,-288 926.19,-269.31 913.69,-271.79 903.38,-252 893.36,-232.75 889.21,-208.41 887.5,-190.05"/>
-<polygon fill="black" stroke="black" points="890.99,-189.73 886.76,-180.02 884.01,-190.25 890.99,-189.73"/>
-</g>
-<!-- datafusion_physical_plan -->
-<g id="node11" class="node">
-<title>datafusion_physical_plan</title>
-<ellipse fill="none" stroke="black" cx="732.38" cy="-306" rx="126.18" ry="18"/>
-<text text-anchor="middle" x="732.38" y="-302.3" font-family="Times,serif" font-size="14.00">datafusion_physical_plan</text>
-</g>
-<!-- datafusion&#45;&gt;datafusion_physical_plan -->
-<g id="edge48" class="edge">
-<title>datafusion&#45;&gt;datafusion_physical_plan</title>
-<path fill="none" stroke="black" d="M932.78,-364.97C895.04,-354.05 838.86,-337.8 795.58,-325.28"/>
-<polygon fill="black" stroke="black" points="796.29,-321.84 785.71,-322.43 794.34,-328.57 796.29,-321.84"/>
-</g>
-<!-- datafusion_functions_nested -->
-<g id="node16" class="node">
-<title>datafusion_functions_nested</title>
-<ellipse fill="none" stroke="black" cx="1279.38" cy="-234" rx="135.68" ry="18"/>
-<text text-anchor="middle" x="1279.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_functions_nested</text>
-</g>
-<!-- datafusion&#45;&gt;datafusion_functions_nested -->
-<g id="edge45" class="edge">
-<title>datafusion&#45;&gt;datafusion_functions_nested</title>
-<path fill="none" stroke="black" d="M1005.59,-362.47C1059.89,-337.19 1171.92,-285.03 1234.54,-255.88"/>
-<polygon fill="black" stroke="black" points="1236.19,-258.97 1243.78,-251.58 1233.23,-252.63 1236.19,-258.97"/>
-</g>
-<!-- datafusion_expr&#45;&gt;datafusion_common -->
-<g id="edge7" class="edge">
-<title>datafusion_expr&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M675.66,-73.81C702.56,-63.41 738.47,-49.52 767.41,-38.33"/>
-<polygon fill="black" stroke="black" points="768.87,-41.52 776.93,-34.64 766.34,-34.99 768.87,-41.52"/>
-</g>
-<!-- datafusion_optimizer&#45;&gt;datafusion_common -->
-<g id="edge29" class="edge">
-<title>datafusion_optimizer&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M323.59,-287.75C320.33,-269.42 317.31,-239.86 325.38,-216 338.29,-177.85 349.94,-170.36 380.38,-144 472.7,-64.04 613.13,-35.08 709.74,-24.67"/>
-<polygon fill="black" stroke="black" points="710.24,-28.14 719.83,-23.63 709.52,-21.17 710.24,-28.14"/>
-</g>
-<!-- datafusion_optimizer&#45;&gt;datafusion_expr -->
-<g id="edge30" class="edge">
-<title>datafusion_optimizer&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M326.16,-287.98C325.67,-268.49 327.82,-236.59 344.38,-216 397.49,-150 492,-118.2 559.1,-103.32"/>
-<polygon fill="black" stroke="black" points="560.17,-106.67 569.21,-101.16 558.7,-99.83 560.17,-106.67"/>
-</g>
-<!-- datafusion_optimizer&#45;&gt;datafusion_physical_expr -->
-<g id="edge31" class="edge">
-<title>datafusion_optimizer&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M362.48,-288.94C384.12,-279.04 412.07,-266.25 435.33,-255.61"/>
-<polygon fill="black" stroke="black" points="437.01,-258.69 444.65,-251.35 434.1,-252.33 437.01,-258.69"/>
-</g>
-<!-- datafusion_optimizer&#45;&gt;datafusion_sql -->
-<g id="edge32" class="edge">
-<title>datafusion_optimizer&#45;&gt;datafusion_sql</title>
-<path fill="none" stroke="black" d="M293.52,-288.76C272.44,-278.66 245.18,-265.59 222.81,-254.86"/>
-<polygon fill="black" stroke="black" points="224.09,-251.59 213.56,-250.43 221.06,-257.91 224.09,-251.59"/>
-</g>
-<!-- datafusion_physical_expr&#45;&gt;datafusion_common -->
-<g id="edge36" class="edge">
-<title>datafusion_physical_expr&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M481.69,-215.81C485.19,-182.52 497.82,-109.29 542.38,-72 568.76,-49.93 649.98,-36.06 717.37,-28.08"/>
-<polygon fill="black" stroke="black" points="718.1,-31.52 727.63,-26.89 717.29,-24.56 718.1,-31.52"/>
-</g>
-<!-- datafusion_physical_expr&#45;&gt;datafusion_expr -->
-<g id="edge38" class="edge">
-<title>datafusion_physical_expr&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M498.89,-216.15C526.57,-190.96 578.85,-143.37 610.36,-114.68"/>
-<polygon fill="black" stroke="black" points="612.86,-117.14 617.9,-107.82 608.15,-111.97 612.86,-117.14"/>
-</g>
-<!-- datafusion_physical_expr&#45;&gt;datafusion_execution -->
-<g id="edge37" class="edge">
-<title>datafusion_physical_expr&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M557.7,-219.67C628.24,-207.51 732.15,-189.59 804.18,-177.17"/>
-<polygon fill="black" stroke="black" points="804.86,-180.61 814.12,-175.46 803.67,-173.71 804.86,-180.61"/>
-</g>
-<!-- datafusion_sql&#45;&gt;datafusion_common -->
-<g id="edge19" class="edge">
-<title>datafusion_sql&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M177.9,-215.85C172.25,-182.65 165.44,-109.55 205.38,-72 240.96,-38.55 538.93,-25.83 705.59,-21.31"/>
-<polygon fill="black" stroke="black" points="705.98,-24.8 715.89,-21.03 705.8,-17.8 705.98,-24.8"/>
-</g>
-<!-- datafusion_sql&#45;&gt;datafusion_expr -->
-<g id="edge20" class="edge">
-<title>datafusion_sql&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M210.26,-217.08C228.16,-206.95 251.47,-193.27 271.38,-180 293.57,-165.21 294.87,-154.5 319.38,-144 390.71,-113.46 478.09,-100.46 543.03,-94.95"/>
-<polygon fill="black" stroke="black" points="543.56,-98.42 553.25,-94.13 543,-91.44 543.56,-98.42"/>
-</g>
-<!-- datafusion_functions&#45;&gt;datafusion_common -->
-<g id="edge8" class="edge">
-<title>datafusion_functions&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1026.08,-216.03C1032.36,-197.16 1039.08,-166.38 1026.38,-144 995,-88.66 928.31,-55.19 878.65,-37"/>
-<polygon fill="black" stroke="black" points="879.56,-33.61 868.96,-33.56 877.22,-40.21 879.56,-33.61"/>
-</g>
-<!-- datafusion_functions&#45;&gt;datafusion_expr -->
-<g id="edge10" class="edge">
-<title>datafusion_functions&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M1021.9,-215.92C1023.8,-195.75 1023.44,-162.61 1004.38,-144 966.64,-107.14 827.05,-95.75 731.07,-92.32"/>
-<polygon fill="black" stroke="black" points="731.16,-88.82 721.05,-91.99 730.93,-95.82 731.16,-88.82"/>
-</g>
-<!-- datafusion_functions&#45;&gt;datafusion_execution -->
-<g id="edge9" class="edge">
-<title>datafusion_functions&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M988.54,-216.76C970.04,-207.03 946.31,-194.54 926.38,-184.05"/>
-<polygon fill="black" stroke="black" points="928,-180.95 917.52,-179.39 924.74,-187.14 928,-180.95"/>
-</g>
-<!-- datafusion_execution&#45;&gt;datafusion_common -->
-<g id="edge53" class="edge">
-<title>datafusion_execution&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M878.06,-143.87C866.15,-119.35 844.12,-74.03 830.14,-45.26"/>
-<polygon fill="black" stroke="black" points="833.25,-43.65 825.74,-36.19 826.96,-46.71 833.25,-43.65"/>
-</g>
-<!-- datafusion_execution&#45;&gt;datafusion_expr -->
-<g id="edge54" class="edge">
-<title>datafusion_execution&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M833.67,-146.24C793.22,-134.91 737.38,-119.28 695.43,-107.53"/>
-<polygon fill="black" stroke="black" points="696.2,-104.11 685.63,-104.79 694.31,-110.85 696.2,-104.11"/>
-</g>
-<!-- datafusion_wasmtest -->
-<g id="node10" class="node">
-<title>datafusion_wasmtest</title>
-<ellipse fill="none" stroke="black" cx="540.38" cy="-450" rx="108.58" ry="18"/>
-<text text-anchor="middle" x="540.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_wasmtest</text>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion -->
-<g id="edge11" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M619.01,-437.59C720.75,-422.79 886.69,-398.58 900.38,-396 906.59,-394.83 913.06,-393.51 919.49,-392.12"/>
-<polygon fill="black" stroke="black" points="920.49,-395.49 929.5,-389.91 918.98,-388.65 920.49,-395.49"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_common -->
-<g id="edge12" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M617.4,-437.24C630.72,-435.37 644.44,-433.54 657.38,-432 753.48,-420.54 1519.38,-403.78 1519.38,-307 1519.38,-307 1519.38,-307 1519.38,-161 1519.38,-102.16 1475.29,-95.58 1421.38,-72 1335.95,-34.62 1079.45,-23.6 929.6,-20.35"/>
-<polygon fill="black" stroke="black" points="929.52,-16.85 919.45,-20.14 929.37,-23.84 929.52,-16.85"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_expr -->
-<g id="edge14" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M545.58,-431.76C554.76,-401.95 575.01,-339.06 597.38,-288 604.64,-271.43 610.96,-269.26 616.38,-252 630.65,-206.62 634.8,-151.03 635.97,-118.47"/>
-<polygon fill="black" stroke="black" points="639.47,-118.45 636.26,-108.35 632.47,-118.25 639.47,-118.45"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_optimizer -->
-<g id="edge15" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_optimizer</title>
-<path fill="none" stroke="black" d="M515.53,-432.43C477.24,-406.9 403.74,-357.91 360.82,-329.29"/>
-<polygon fill="black" stroke="black" points="362.67,-326.32 352.41,-323.68 358.78,-332.14 362.67,-326.32"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_physical_expr -->
-<g id="edge16" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M535.57,-431.85C525.17,-394.75 500.52,-306.81 487.98,-262.1"/>
-<polygon fill="black" stroke="black" points="491.28,-260.92 485.22,-252.23 484.54,-262.81 491.28,-260.92"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_sql -->
-<g id="edge18" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_sql</title>
-<path fill="none" stroke="black" d="M439.39,-443.4C356.27,-432.89 242.93,-403.72 184.38,-324 171.49,-306.44 171.88,-281.25 174.84,-262.15"/>
-<polygon fill="black" stroke="black" points="178.29,-262.72 176.68,-252.25 171.41,-261.44 178.29,-262.72"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_execution -->
-<g id="edge13" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M598.04,-434.64C682.46,-412.41 833.41,-367.64 867.38,-324 883.14,-303.76 886.09,-230.71 886.48,-190.32"/>
-<polygon fill="black" stroke="black" points="889.99,-190.12 886.54,-180.1 882.99,-190.09 889.99,-190.12"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_physical_plan -->
-<g id="edge17" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_physical_plan</title>
-<path fill="none" stroke="black" d="M563.16,-432.15C597.52,-406.75 662.67,-358.56 701.34,-329.96"/>
-<polygon fill="black" stroke="black" points="703.68,-332.59 709.63,-323.82 699.51,-326.96 703.68,-332.59"/>
-</g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_common -->
-<g id="edge21" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M735.32,-287.86C740.6,-258.18 752.64,-195.49 768.38,-144 779.04,-109.17 794.94,-70.29 805.72,-45.28"/>
-<polygon fill="black" stroke="black" points="808.93,-46.66 809.71,-36.09 802.51,-43.87 808.93,-46.66"/>
-</g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_expr -->
-<g id="edge23" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M724.69,-287.85C707.94,-250.5 668.07,-161.63 648.14,-117.21"/>
-<polygon fill="black" stroke="black" points="651.27,-115.63 643.98,-107.94 644.88,-118.49 651.27,-115.63"/>
-</g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_physical_expr -->
-<g id="edge24" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M677.39,-289.72C638.59,-278.95 586.46,-264.46 545.56,-253.11"/>
-<polygon fill="black" stroke="black" points="546.26,-249.67 535.69,-250.36 544.39,-256.41 546.26,-249.67"/>
-</g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_execution -->
-<g id="edge22" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M741.25,-287.92C751.89,-268.66 771.21,-237.26 794.38,-216 808.12,-203.39 825.36,-192.48 841.2,-183.88"/>
-<polygon fill="black" stroke="black" points="843.1,-186.84 850.32,-179.09 839.84,-180.64 843.1,-186.84"/>
-</g>
-<!-- datafusion_benchmarks -->
-<g id="node12" class="node">
-<title>datafusion_benchmarks</title>
-<ellipse fill="none" stroke="black" cx="1148.38" cy="-522" rx="120.48" ry="18"/>
-<text text-anchor="middle" x="1148.38" y="-518.3" font-family="Times,serif" font-size="14.00">datafusion_benchmarks</text>
-</g>
-<!-- datafusion_benchmarks&#45;&gt;datafusion -->
-<g id="edge25" class="edge">
-<title>datafusion_benchmarks&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M1149.44,-503.72C1149.68,-483.98 1147,-451.85 1129.38,-432 1106.81,-406.57 1071.52,-393.33 1040.3,-386.44"/>
-<polygon fill="black" stroke="black" points="1040.83,-382.98 1030.34,-384.42 1039.44,-389.84 1040.83,-382.98"/>
-</g>
-<!-- datafusion_benchmarks&#45;&gt;datafusion_common -->
-<g id="edge26" class="edge">
-<title>datafusion_benchmarks&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1029.67,-519.04C749.26,-513.92 66.41,-498.4 27.38,-468 -5.23,-442.6 0.38,-420.34 0.38,-379 0.38,-379 0.38,-379 0.38,-161 0.38,-113.54 18.38,-95.89 59.38,-72 113.79,-40.3 508.68,-26.4 705.91,-21.39"/>
-<polygon fill="black" stroke="black" points="706.1,-24.88 716.01,-21.13 705.93,-17.89 706.1,-24.88"/>
-</g>
-<!-- datafusion_proto -->
-<g id="node13" class="node">
-<title>datafusion_proto</title>
-<ellipse fill="none" stroke="black" cx="1292.38" cy="-450" rx="89.08" ry="18"/>
-<text text-anchor="middle" x="1292.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_proto</text>
-</g>
-<!-- datafusion_benchmarks&#45;&gt;datafusion_proto -->
-<g id="edge27" class="edge">
-<title>datafusion_benchmarks&#45;&gt;datafusion_proto</title>
-<path fill="none" stroke="black" d="M1182.15,-504.59C1202.66,-494.62 1229,-481.81 1250.79,-471.22"/>
-<polygon fill="black" stroke="black" points="1252.36,-474.35 1259.82,-466.83 1249.3,-468.05 1252.36,-474.35"/>
-</g>
-<!-- datafusion_proto&#45;&gt;datafusion -->
-<g id="edge33" class="edge">
-<title>datafusion_proto&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M1234.43,-436.24C1176.63,-423.52 1088.42,-404.1 1031.07,-391.48"/>
-<polygon fill="black" stroke="black" points="1031.61,-388.01 1021.09,-389.28 1030.1,-394.85 1031.61,-388.01"/>
-</g>
-<!-- datafusion_proto&#45;&gt;datafusion_common -->
-<g id="edge34" class="edge">
-<title>datafusion_proto&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1368.68,-440.64C1446.96,-427.32 1557.38,-393.25 1557.38,-307 1557.38,-307 1557.38,-307 1557.38,-161 1557.38,-106.3 1521.76,-95.52 1472.38,-72 1379.47,-27.74 1092.21,-19.54 930.67,-18.56"/>
-<polygon fill="black" stroke="black" points="930.23,-15.06 920.21,-18.5 930.2,-22.06 930.23,-15.06"/>
-</g>
-<!-- datafusion_proto&#45;&gt;datafusion_expr -->
-<g id="edge35" class="edge">
-<title>datafusion_proto&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M1329.71,-433.53C1374.42,-412.22 1443.38,-369.26 1443.38,-307 1443.38,-307 1443.38,-307 1443.38,-233 1443.38,-160.79 935.69,-113.79 725.68,-97.44"/>
-<polygon fill="black" stroke="black" points="725.84,-93.95 715.6,-96.67 725.3,-100.93 725.84,-93.95"/>
-</g>
-<!-- datafusion_docs_tests -->
-<g id="node14" class="node">
-<title>datafusion_docs_tests</title>
-<ellipse fill="none" stroke="black" cx="1008.38" cy="-450" rx="112.38" ry="18"/>
-<text text-anchor="middle" x="1008.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_docs_tests</text>
-</g>
-<!-- datafusion_docs_tests&#45;&gt;datafusion -->
-<g id="edge28" class="edge">
-<title>datafusion_docs_tests&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M999.98,-431.7C996.11,-423.73 991.43,-414.1 987.14,-405.26"/>
-<polygon fill="black" stroke="black" points="990.21,-403.57 982.69,-396.1 983.91,-406.63 990.21,-403.57"/>
-</g>
-<!-- datafusion_sqllogictest -->
-<g id="node15" class="node">
-<title>datafusion_sqllogictest</title>
-<ellipse fill="none" stroke="black" cx="1569.38" cy="-450" rx="117.78" ry="18"/>
-<text text-anchor="middle" x="1569.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_sqllogictest</text>
-</g>
-<!-- datafusion_sqllogictest&#45;&gt;datafusion -->
-<g id="edge39" class="edge">
-<title>datafusion_sqllogictest&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M1479.34,-438.41C1358.17,-424.15 1145.58,-399.14 1039.06,-386.61"/>
-<polygon fill="black" stroke="black" points="1039.43,-383.13 1029.09,-385.44 1038.61,-390.08 1039.43,-383.13"/>
-</g>
-<!-- datafusion_sqllogictest&#45;&gt;datafusion_common -->
-<g id="edge40" class="edge">
-<title>datafusion_sqllogictest&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1574.65,-431.7C1582.24,-405.14 1595.38,-352.61 1595.38,-307 1595.38,-307 1595.38,-307 1595.38,-161 1595.38,-108.7 1565.1,-95.53 1518.38,-72 1467.03,-46.14 1111.46,-29.63 927.36,-22.72"/>
-<polygon fill="black" stroke="black" points="927.23,-19.22 917.1,-22.34 926.97,-26.21 927.23,-19.22"/>
-</g>
-<!-- datafusion_functions_nested&#45;&gt;datafusion_common -->
-<g id="edge50" class="edge">
-<title>datafusion_functions_nested&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1253.53,-216.22C1203.95,-184.59 1091.36,-115.46 989.38,-72 955.26,-57.46 915.79,-45 883.28,-35.83"/>
-<polygon fill="black" stroke="black" points="884,-32.4 873.43,-33.09 882.12,-39.14 884,-32.4"/>
-</g>
-<!-- datafusion_functions_nested&#45;&gt;datafusion_expr -->
-<g id="edge52" class="edge">
-<title>datafusion_functions_nested&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M1240.69,-216.61C1191.68,-196.5 1104.24,-162.78 1026.38,-144 924.46,-119.42 804.61,-105.26 724.94,-97.87"/>
-<polygon fill="black" stroke="black" points="725.22,-94.38 714.94,-96.96 724.59,-101.35 725.22,-94.38"/>
-</g>
-<!-- datafusion_functions_nested&#45;&gt;datafusion_execution -->
-<g id="edge51" class="edge">
-<title>datafusion_functions_nested&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M1201.74,-219.17C1134.05,-207.11 1036.21,-189.69 967.5,-177.45"/>
-<polygon fill="black" stroke="black" points="968.05,-173.99 957.59,-175.68 966.82,-180.88 968.05,-173.99"/>
-</g>
-<!-- datafusion_substrait -->
-<g id="node17" class="node">
-<title>datafusion_substrait</title>
-<ellipse fill="none" stroke="black" cx="772.38" cy="-450" rx="105.88" ry="18"/>
-<text text-anchor="middle" x="772.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_substrait</text>
-</g>
-<!-- datafusion_substrait&#45;&gt;datafusion -->
-<g id="edge55" class="edge">
-<title>datafusion_substrait&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M816.71,-433.64C849.38,-422.32 893.73,-406.95 927.07,-395.39"/>
-<polygon fill="black" stroke="black" points="928.3,-398.67 936.6,-392.09 926.01,-392.06 928.3,-398.67"/>
-</g>
-</g>
-</svg>
diff --git a/dev/release/release-crates.sh b/dev/release/release-crates.sh
deleted file mode 100644
index b9bda68b780b5..0000000000000
--- a/dev/release/release-crates.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-# This script publishes datafusion crates to crates.io.
-#
-# This script should only be run after the release has been approved
-# by the Apache DataFusion PMC committee.
-#
-# See release/README.md for full release instructions
-
-set -eu
-
-# Do not run inside a git repo
-if ! [ git rev-parse --is-inside-work-tree ]; then
-  cd datafusion/common && cargo publish
-  cd datafusion/expr && cargo publish
-  cd datafusion/sql && cargo publish
-  cd datafusion/physical-expr && cargo publish
-  cd datafusion/optimizer && cargo publish
-  cd datafusion/core && cargo publish
-  cd datafusion/proto && cargo publish
-  cd datafusion/execution && cargo publish
-  cd datafusion/substrait && cargo publish
-  cd datafusion-cli && cargo publish --no-verify
-else
-    echo "Crates must be released from the source tarball that was voted on, not from the repo"
-    exit 1
-fi

From 5d3cda530660f52fbcfe66baae25f07924cdc0e0 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 13 Aug 2024 08:21:27 -0400
Subject: [PATCH 282/357] Make `Precision<usize>` copy to make it clear clones
 are not expensive (#11828)

* Minor: make it clearer that clone() is not slow

* Make Precision<T> Copy when T is Copy
---
 datafusion/common/src/stats.rs                | 23 +++++++++++++---
 .../physical_plan/file_scan_config.rs         |  2 +-
 datafusion/core/src/datasource/statistics.rs  | 26 +++++++++----------
 datafusion/physical-expr/src/analysis.rs      |  2 +-
 datafusion/physical-plan/src/filter.rs        |  2 +-
 datafusion/physical-plan/src/joins/utils.rs   | 10 +++----
 6 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
index d6b5310581d72..50ae4e3ca71f7 100644
--- a/datafusion/common/src/stats.rs
+++ b/datafusion/common/src/stats.rs
@@ -25,7 +25,7 @@ use arrow_schema::{Schema, SchemaRef};
 
 /// Represents a value with a degree of certainty. `Precision` is used to
 /// propagate information the precision of statistical values.
-#[derive(Clone, PartialEq, Eq, Default)]
+#[derive(Clone, PartialEq, Eq, Default, Copy)]
 pub enum Precision<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
     /// The exact value is known
     Exact(T),
@@ -503,9 +503,9 @@ mod tests {
         let inexact_precision = Precision::Inexact(42);
         let absent_precision = Precision::<i32>::Absent;
 
-        assert_eq!(exact_precision.clone().to_inexact(), inexact_precision);
-        assert_eq!(inexact_precision.clone().to_inexact(), inexact_precision);
-        assert_eq!(absent_precision.clone().to_inexact(), absent_precision);
+        assert_eq!(exact_precision.to_inexact(), inexact_precision);
+        assert_eq!(inexact_precision.to_inexact(), inexact_precision);
+        assert_eq!(absent_precision.to_inexact(), absent_precision);
     }
 
     #[test]
@@ -545,4 +545,19 @@ mod tests {
         assert_eq!(precision2.multiply(&precision3), Precision::Inexact(15));
         assert_eq!(precision1.multiply(&absent_precision), Precision::Absent);
     }
+
+    #[test]
+    fn test_precision_cloning() {
+        // Precision<usize> is copy
+        let precision: Precision<usize> = Precision::Exact(42);
+        let p2 = precision;
+        assert_eq!(precision, p2);
+
+        // Precision<ScalarValue> is not copy (requires .clone())
+        let precision: Precision<ScalarValue> =
+            Precision::Exact(ScalarValue::Int64(Some(42)));
+        // Clippy would complain about this if it were Copy
+        let p2 = precision.clone();
+        assert_eq!(precision, p2);
+    }
 }
diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
index 17850ea7585aa..34fb6226c1a26 100644
--- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
@@ -244,7 +244,7 @@ impl FileScanConfig {
         }
 
         let table_stats = Statistics {
-            num_rows: self.statistics.num_rows.clone(),
+            num_rows: self.statistics.num_rows,
             // TODO correct byte size?
             total_byte_size: Precision::Absent,
             column_statistics: table_cols_stats,
diff --git a/datafusion/core/src/datasource/statistics.rs b/datafusion/core/src/datasource/statistics.rs
index 6697558776802..6f89657defd3c 100644
--- a/datafusion/core/src/datasource/statistics.rs
+++ b/datafusion/core/src/datasource/statistics.rs
@@ -18,17 +18,18 @@
 use std::mem;
 use std::sync::Arc;
 
-use super::listing::PartitionedFile;
-use crate::arrow::datatypes::{Schema, SchemaRef};
-use crate::error::Result;
-use crate::functions_aggregate::min_max::{MaxAccumulator, MinAccumulator};
-use crate::physical_plan::{Accumulator, ColumnStatistics, Statistics};
 use arrow_schema::DataType;
+use futures::{Stream, StreamExt};
 
 use datafusion_common::stats::Precision;
 use datafusion_common::ScalarValue;
 
-use futures::{Stream, StreamExt};
+use crate::arrow::datatypes::{Schema, SchemaRef};
+use crate::error::Result;
+use crate::functions_aggregate::min_max::{MaxAccumulator, MinAccumulator};
+use crate::physical_plan::{Accumulator, ColumnStatistics, Statistics};
+
+use super::listing::PartitionedFile;
 
 /// Get all files as well as the file level summary statistics (no statistic for partition columns).
 /// If the optional `limit` is provided, includes only sufficient files. Needed to read up to
@@ -62,8 +63,8 @@ pub async fn get_statistics_with_limit(
         result_files.push(file);
 
         // First file, we set them directly from the file statistics.
-        num_rows = file_stats.num_rows.clone();
-        total_byte_size = file_stats.total_byte_size.clone();
+        num_rows = file_stats.num_rows;
+        total_byte_size = file_stats.total_byte_size;
         for (index, file_column) in
             file_stats.column_statistics.clone().into_iter().enumerate()
         {
@@ -93,10 +94,10 @@ pub async fn get_statistics_with_limit(
                 // counts across all the files in question. If any file does not
                 // provide any information or provides an inexact value, we demote
                 // the statistic precision to inexact.
-                num_rows = add_row_stats(file_stats.num_rows.clone(), num_rows);
+                num_rows = add_row_stats(file_stats.num_rows, num_rows);
 
                 total_byte_size =
-                    add_row_stats(file_stats.total_byte_size.clone(), total_byte_size);
+                    add_row_stats(file_stats.total_byte_size, total_byte_size);
 
                 for (file_col_stats, col_stats) in file_stats
                     .column_statistics
@@ -110,8 +111,7 @@ pub async fn get_statistics_with_limit(
                         distinct_count: _,
                     } = file_col_stats;
 
-                    col_stats.null_count =
-                        add_row_stats(file_nc.clone(), col_stats.null_count.clone());
+                    col_stats.null_count = add_row_stats(*file_nc, col_stats.null_count);
                     set_max_if_greater(file_max, &mut col_stats.max_value);
                     set_min_if_lesser(file_min, &mut col_stats.min_value)
                 }
@@ -192,7 +192,7 @@ pub(crate) fn get_col_stats(
                 None => None,
             };
             ColumnStatistics {
-                null_count: null_counts[i].clone(),
+                null_count: null_counts[i],
                 max_value: max_value.map(Precision::Exact).unwrap_or(Precision::Absent),
                 min_value: min_value.map(Precision::Exact).unwrap_or(Precision::Absent),
                 distinct_count: Precision::Absent,
diff --git a/datafusion/physical-expr/src/analysis.rs b/datafusion/physical-expr/src/analysis.rs
index bcf1c8e510b18..3eac62a4df089 100644
--- a/datafusion/physical-expr/src/analysis.rs
+++ b/datafusion/physical-expr/src/analysis.rs
@@ -119,7 +119,7 @@ impl ExprBoundaries {
         Ok(ExprBoundaries {
             column,
             interval,
-            distinct_count: col_stats.distinct_count.clone(),
+            distinct_count: col_stats.distinct_count,
         })
     }
 
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index fa9108057cfe3..568987b147980 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -324,7 +324,7 @@ fn collect_new_statistics(
                     (Precision::Inexact(lower), Precision::Inexact(upper))
                 };
                 ColumnStatistics {
-                    null_count: input_column_stats[idx].null_count.clone().to_inexact(),
+                    null_count: input_column_stats[idx].null_count.to_inexact(),
                     max_value,
                     min_value,
                     distinct_count: distinct_count.to_inexact(),
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index b8a58e4d0d302..80d8815bdebc3 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -827,12 +827,12 @@ fn estimate_join_cardinality(
         JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
             let ij_cardinality = estimate_inner_join_cardinality(
                 Statistics {
-                    num_rows: left_stats.num_rows.clone(),
+                    num_rows: left_stats.num_rows,
                     total_byte_size: Precision::Absent,
                     column_statistics: left_col_stats,
                 },
                 Statistics {
-                    num_rows: right_stats.num_rows.clone(),
+                    num_rows: right_stats.num_rows,
                     total_byte_size: Precision::Absent,
                     column_statistics: right_col_stats,
                 },
@@ -1024,7 +1024,7 @@ fn max_distinct_count(
     stats: &ColumnStatistics,
 ) -> Precision<usize> {
     match &stats.distinct_count {
-        dc @ (Precision::Exact(_) | Precision::Inexact(_)) => dc.clone(),
+        &dc @ (Precision::Exact(_) | Precision::Inexact(_)) => dc,
         _ => {
             // The number can never be greater than the number of rows we have
             // minus the nulls (since they don't count as distinct values).
@@ -2054,9 +2054,7 @@ mod tests {
             );
             assert_eq!(
                 partial_join_stats.map(|s| s.column_statistics),
-                expected_cardinality
-                    .clone()
-                    .map(|_| [left_col_stats, right_col_stats].concat())
+                expected_cardinality.map(|_| [left_col_stats, right_col_stats].concat())
             );
         }
         Ok(())

From 1d86724a05fa9b926af1d66fac5e1418a01f0cd1 Mon Sep 17 00:00:00 2001
From: June <61218022+itsjunetime@users.noreply.github.com>
Date: Tue, 13 Aug 2024 06:43:30 -0600
Subject: [PATCH 283/357] fix: Fix various complaints from the latest nightly
 clippy (#11958)

* fix: Fix various complaints from the latest nightly clippy

* fix: run fmt for ci :/

* fix: Update cli lockfile since that's what ci wants
---
 datafusion-cli/Cargo.lock                     | 68 ++++++++++---------
 datafusion/common/Cargo.toml                  |  1 +
 datafusion/common/src/error.rs                | 67 ++++++++----------
 datafusion/common/src/lib.rs                  | 12 ++++
 .../physical_plan/parquet/row_filter.rs       | 11 +--
 datafusion/functions/src/datetime/common.rs   | 60 +++++++---------
 datafusion/functions/src/datetime/to_date.rs  |  4 +-
 datafusion/physical-expr/benches/case_when.rs |  4 +-
 .../physical-expr/src/expressions/case.rs     |  2 +-
 datafusion/sql/src/parser.rs                  |  4 +-
 datafusion/sql/src/unparser/plan.rs           | 18 ++---
 11 files changed, 116 insertions(+), 135 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 134cde8976d67..90995c1d116ae 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -347,13 +347,14 @@ dependencies = [
 
 [[package]]
 name = "assert_cmd"
-version = "2.0.15"
+version = "2.0.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc65048dd435533bb1baf2ed9956b9a278fbfdcf90301b39ee117f06c0199d37"
+checksum = "dc1835b7f27878de8525dc71410b5a31cdcc5f230aed5ba5df968e09c201b23d"
 dependencies = [
  "anstyle",
  "bstr",
  "doc-comment",
+ "libc",
  "predicates",
  "predicates-core",
  "predicates-tree",
@@ -386,7 +387,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -874,9 +875,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.8"
+version = "1.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549"
+checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292"
 dependencies = [
  "jobserver",
  "libc",
@@ -1022,9 +1023,9 @@ dependencies = [
 
 [[package]]
 name = "core-foundation-sys"
-version = "0.8.6"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
 [[package]]
 name = "core2"
@@ -1037,9 +1038,9 @@ dependencies = [
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.12"
+version = "0.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
+checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad"
 dependencies = [
  "libc",
 ]
@@ -1103,7 +1104,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1240,6 +1241,7 @@ dependencies = [
  "num_cpus",
  "object_store",
  "parquet",
+ "paste",
  "sqlparser",
 ]
 
@@ -1762,7 +1764,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -2441,9 +2443,9 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4"
+checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec"
 dependencies = [
  "hermit-abi 0.3.9",
  "libc",
@@ -2785,7 +2787,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3386,29 +3388,29 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 
 [[package]]
 name = "serde"
-version = "1.0.205"
+version = "1.0.207"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150"
+checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.205"
+version = "1.0.207"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1"
+checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.122"
+version = "1.0.124"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
+checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d"
 dependencies = [
  "itoa",
  "memchr",
@@ -3537,7 +3539,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3583,7 +3585,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3596,7 +3598,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3618,9 +3620,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.72"
+version = "2.0.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
+checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3684,7 +3686,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3778,7 +3780,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3875,7 +3877,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3920,7 +3922,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -4074,7 +4076,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
  "wasm-bindgen-shared",
 ]
 
@@ -4108,7 +4110,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -4383,7 +4385,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 85dfb2e8f73ab..8435d0632576c 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -60,6 +60,7 @@ libc = "0.2.140"
 num_cpus = { workspace = true }
 object_store = { workspace = true, optional = true }
 parquet = { workspace = true, optional = true, default-features = true }
+paste = "1.0.15"
 pyo3 = { version = "0.21.0", optional = true }
 sqlparser = { workspace = true }
 
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index f62acaf0493b8..a5c2b3e55bc7b 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -481,13 +481,6 @@ macro_rules! unwrap_or_internal_err {
     };
 }
 
-macro_rules! with_dollar_sign {
-    ($($body:tt)*) => {
-        macro_rules! __with_dollar_sign { $($body)* }
-        __with_dollar_sign!($);
-    }
-}
-
 /// Add a macros for concise  DataFusionError::* errors declaration
 /// supports placeholders the same way as `format!`
 /// Examples:
@@ -501,37 +494,37 @@ macro_rules! with_dollar_sign {
 /// `NAME_DF_ERR` -  macro name for wrapping DataFusionError::*. Needed to keep backtrace opportunity
 /// in construction where DataFusionError::* used directly, like `map_err`, `ok_or_else`, etc
 macro_rules! make_error {
-    ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => {
-        with_dollar_sign! {
-            ($d:tt) => {
-                /// Macro wraps `$ERR` to add backtrace feature
-                #[macro_export]
-                macro_rules! $NAME_DF_ERR {
-                    ($d($d args:expr),*) => {
-                        $crate::DataFusionError::$ERR(
-                            format!(
-                                "{}{}",
-                                format!($d($d args),*),
-                                $crate::DataFusionError::get_back_trace(),
-                            ).into()
-                        )
-                    }
+    ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => { make_error!(@inner ($), $NAME_ERR, $NAME_DF_ERR, $ERR); };
+    (@inner ($d:tt), $NAME_ERR:ident, $NAME_DF_ERR:ident, $ERR:ident) => {
+        ::paste::paste!{
+            /// Macro wraps `$ERR` to add backtrace feature
+            #[macro_export]
+            macro_rules! $NAME_DF_ERR {
+                ($d($d args:expr),*) => {
+                    $crate::DataFusionError::$ERR(
+                        ::std::format!(
+                            "{}{}",
+                            ::std::format!($d($d args),*),
+                            $crate::DataFusionError::get_back_trace(),
+                        ).into()
+                    )
                 }
+            }
 
-                /// Macro wraps Err(`$ERR`) to add backtrace feature
-                #[macro_export]
-                macro_rules! $NAME_ERR {
-                    ($d($d args:expr),*) => {
-                        Err($crate::DataFusionError::$ERR(
-                            format!(
-                                "{}{}",
-                                format!($d($d args),*),
-                                $crate::DataFusionError::get_back_trace(),
-                            ).into()
-                        ))
-                    }
+            /// Macro wraps Err(`$ERR`) to add backtrace feature
+            #[macro_export]
+            macro_rules! $NAME_ERR {
+                ($d($d args:expr),*) => {
+                    Err($crate::[<_ $NAME_DF_ERR>]!($d($d args),*))
                 }
             }
+
+            #[doc(hidden)]
+            #[allow(unused)]
+            pub use $NAME_ERR as [<_ $NAME_ERR>];
+            #[doc(hidden)]
+            #[allow(unused)]
+            pub use $NAME_DF_ERR as [<_ $NAME_DF_ERR>];
         }
     };
 }
@@ -613,12 +606,6 @@ macro_rules! schema_err {
 
 // To avoid compiler error when using macro in the same crate:
 // macros from the current crate cannot be referred to by absolute paths
-pub use config_err as _config_err;
-pub use internal_datafusion_err as _internal_datafusion_err;
-pub use internal_err as _internal_err;
-pub use not_impl_err as _not_impl_err;
-pub use plan_datafusion_err as _plan_datafusion_err;
-pub use plan_err as _plan_err;
 pub use schema_err as _schema_err;
 
 /// Create a "field not found" DataFusion::SchemaError
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 8cd64e7d16a26..19af889e426a2 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -73,6 +73,18 @@ pub use table_reference::{ResolvedTableReference, TableReference};
 pub use unnest::UnnestOptions;
 pub use utils::project_schema;
 
+// These are hidden from docs purely to avoid polluting the public view of what this crate exports.
+// These are just re-exports of macros by the same name, which gets around the 'cannot refer to
+// macro-expanded macro_export macros by their full path' error.
+// The design to get around this comes from this comment:
+// https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997
+#[doc(hidden)]
+pub use error::{
+    _config_datafusion_err, _exec_datafusion_err, _internal_datafusion_err,
+    _not_impl_datafusion_err, _plan_datafusion_err, _resources_datafusion_err,
+    _substrait_datafusion_err,
+};
+
 /// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
 /// not possible. In normal usage of DataFusion the downcast should always succeed.
 ///
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
index f9cce5f783ff5..9de132169389c 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
@@ -341,14 +341,9 @@ pub fn build_row_filter(
     let mut candidates: Vec<FilterCandidate> = predicates
         .into_iter()
         .flat_map(|expr| {
-            if let Ok(candidate) =
-                FilterCandidateBuilder::new(expr.clone(), file_schema, table_schema)
-                    .build(metadata)
-            {
-                candidate
-            } else {
-                None
-            }
+            FilterCandidateBuilder::new(expr.clone(), file_schema, table_schema)
+                .build(metadata)
+                .unwrap_or_default()
         })
         .collect();
 
diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs
index 4f48ab188403e..6048eeeaa554e 100644
--- a/datafusion/functions/src/datetime/common.rs
+++ b/datafusion/functions/src/datetime/common.rs
@@ -28,7 +28,9 @@ use chrono::{DateTime, TimeZone, Utc};
 use itertools::Either;
 
 use datafusion_common::cast::as_generic_string_array;
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarType, ScalarValue};
+use datafusion_common::{
+    exec_err, unwrap_or_internal_err, DataFusionError, Result, ScalarType, ScalarValue,
+};
 use datafusion_expr::ColumnarValue;
 
 /// Error message if nanosecond conversion request beyond supported interval
@@ -227,46 +229,34 @@ where
         // if the first argument is a scalar utf8 all arguments are expected to be scalar utf8
         ColumnarValue::Scalar(scalar) => match scalar {
             ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
-                let mut val: Option<Result<ColumnarValue>> = None;
-                let mut err: Option<DataFusionError> = None;
+                let a = a.as_ref();
+                // ASK: Why do we trust `a` to be non-null at this point?
+                let a = unwrap_or_internal_err!(a);
 
-                match a {
-                    Some(a) => {
-                        // enumerate all the values finding the first one that returns an Ok result
-                        for (pos, v) in args.iter().enumerate().skip(1) {
-                            if let ColumnarValue::Scalar(s) = v {
-                                if let ScalarValue::Utf8(x) | ScalarValue::LargeUtf8(x) =
-                                    s
-                                {
-                                    if let Some(s) = x {
-                                        match op(a.as_str(), s.as_str()) {
-                                            Ok(r) => {
-                                                val = Some(Ok(ColumnarValue::Scalar(
-                                                    S::scalar(Some(op2(r))),
-                                                )));
-                                                break;
-                                            }
-                                            Err(e) => {
-                                                err = Some(e);
-                                            }
-                                        }
-                                    }
-                                } else {
-                                    return exec_err!("Unsupported data type {s:?} for function {name}, arg # {pos}");
-                                }
-                            } else {
-                                return exec_err!("Unsupported data type {v:?} for function {name}, arg # {pos}");
+                let mut ret = None;
+
+                for (pos, v) in args.iter().enumerate().skip(1) {
+                    let ColumnarValue::Scalar(
+                        ScalarValue::Utf8(x) | ScalarValue::LargeUtf8(x),
+                    ) = v
+                    else {
+                        return exec_err!("Unsupported data type {v:?} for function {name}, arg # {pos}");
+                    };
+
+                    if let Some(s) = x {
+                        match op(a.as_str(), s.as_str()) {
+                            Ok(r) => {
+                                ret = Some(Ok(ColumnarValue::Scalar(S::scalar(Some(
+                                    op2(r),
+                                )))));
+                                break;
                             }
+                            Err(e) => ret = Some(Err(e)),
                         }
                     }
-                    None => (),
                 }
 
-                if let Some(v) = val {
-                    v
-                } else {
-                    Err(err.unwrap())
-                }
+                unwrap_or_internal_err!(ret)
             }
             other => {
                 exec_err!("Unsupported data type {other:?} for function {name}")
diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs
index e491c0b555083..cc5ffa73c8f17 100644
--- a/datafusion/functions/src/datetime/to_date.rs
+++ b/datafusion/functions/src/datetime/to_date.rs
@@ -58,7 +58,7 @@ impl ToDateFunc {
                 },
                 "to_date",
             ),
-            n if n >= 2 => handle_multiple::<Date32Type, _, Date32Type, _>(
+            2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
                 args,
                 |s, format| {
                     string_to_timestamp_nanos_formatted(s, format)
@@ -72,7 +72,7 @@ impl ToDateFunc {
                 |n| n,
                 "to_date",
             ),
-            _ => exec_err!("Unsupported 0 argument count for function to_date"),
+            0 => exec_err!("Unsupported 0 argument count for function to_date"),
         }
     }
 }
diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
index 8a34f34a82db3..9eda1277c263b 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -44,12 +44,12 @@ fn criterion_benchmark(c: &mut Criterion) {
         if i % 7 == 0 {
             c2.append_null();
         } else {
-            c2.append_value(&format!("string {i}"));
+            c2.append_value(format!("string {i}"));
         }
         if i % 9 == 0 {
             c3.append_null();
         } else {
-            c3.append_value(&format!("other string {i}"));
+            c3.append_value(format!("other string {i}"));
         }
     }
     let c1 = Arc::new(c1.finish());
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index 583a4ef32542d..c6afb5c059854 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -1146,7 +1146,7 @@ mod tests {
             if i % 7 == 0 {
                 c2.append_null();
             } else {
-                c2.append_value(&format!("string {i}"));
+                c2.append_value(format!("string {i}"));
             }
         }
         let c1 = Arc::new(c1.finish());
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 40dd368f9e80b..dcb33aa7b44f7 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -523,9 +523,7 @@ impl<'a> DFParser<'a> {
                 Ok(n) => Ok(Value::Number(n, l)),
                 // The tokenizer should have ensured `n` is an integer
                 // so this should not be possible
-                Err(e) => parser_err!(format!(
-                    "Unexpected error: could not parse '{n}' as number: {e}"
-                )),
+                Err(e) => match e {},
             },
             _ => self.parser.expected("string or numeric value", next_token),
         }
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 277efd5fe7002..024f33fb2c7df 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -359,18 +359,14 @@ impl Unparser<'_> {
                             .iter()
                             .map(|e| self.select_item_to_sql(e))
                             .collect::<Result<Vec<_>>>()?;
-                        match &on.sort_expr {
-                            Some(sort_expr) => {
-                                if let Some(query_ref) = query {
-                                    query_ref
-                                        .order_by(self.sort_to_sql(sort_expr.clone())?);
-                                } else {
-                                    return internal_err!(
-                                "Sort operator only valid in a statement context."
-                            );
-                                }
+                        if let Some(sort_expr) = &on.sort_expr {
+                            if let Some(query_ref) = query {
+                                query_ref.order_by(self.sort_to_sql(sort_expr.clone())?);
+                            } else {
+                                return internal_err!(
+                                    "Sort operator only valid in a statement context."
+                                );
                             }
-                            None => {}
                         }
                         select.projection(items);
                         (ast::Distinct::On(exprs), on.input.as_ref())

From 8e23cba47920dc6add72a9c0b164e37d406a938d Mon Sep 17 00:00:00 2001
From: Austin Liu <austin362667@gmail.com>
Date: Tue, 13 Aug 2024 20:51:08 +0800
Subject: [PATCH 284/357] Minor: fix outdated link (#11964)

* Minor: fix outdated link

Signed-off-by: Austin Liu <austin362667@gmail.com>

* Minor: update corresponding content

Signed-off-by: Austin Liu <austin362667@gmail.com>

---------

Signed-off-by: Austin Liu <austin362667@gmail.com>
---
 docs/source/contributor-guide/howtos.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/contributor-guide/howtos.md b/docs/source/contributor-guide/howtos.md
index 254b1de6521e9..4e52a2fbcaa67 100644
--- a/docs/source/contributor-guide/howtos.md
+++ b/docs/source/contributor-guide/howtos.md
@@ -24,7 +24,7 @@
 Below is a checklist of what you need to do to add a new scalar function to DataFusion:
 
 - Add the actual implementation of the function to a new module file within:
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions-array) for array functions
+  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions-nested) for arrays, maps and structs functions
   - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/crypto) for crypto functions
   - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/datetime) for datetime functions
   - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/encoding) for encoding functions

From af75f2d51067d8a6b0d3279a8c5cab36d6096e37 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 13 Aug 2024 14:23:12 -0400
Subject: [PATCH 285/357] Minor: simplify SQL number parsing and add a comment
 about unused (#11965)

* Simplify Number parsing

* Add comment explaining unused erorr
---
 datafusion/common/src/error.rs | 4 ++++
 datafusion/sql/src/parser.rs   | 7 +------
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index a5c2b3e55bc7b..27a25d0c9dd5b 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -519,6 +519,10 @@ macro_rules! make_error {
                 }
             }
 
+
+            // Note: Certain macros are used in this  crate, but not all.
+            // This macro generates a use or all of them in case they are needed
+            // so we allow unused code to avoid warnings when they are not used
             #[doc(hidden)]
             #[allow(unused)]
             pub use $NAME_ERR as [<_ $NAME_ERR>];
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index dcb33aa7b44f7..2df8d89c59bc8 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -519,12 +519,7 @@ impl<'a> DFParser<'a> {
             Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
             Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
             Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
-            Token::Number(ref n, l) => match n.parse() {
-                Ok(n) => Ok(Value::Number(n, l)),
-                // The tokenizer should have ensured `n` is an integer
-                // so this should not be possible
-                Err(e) => match e {},
-            },
+            Token::Number(n, l) => Ok(Value::Number(n, l)),
             _ => self.parser.expected("string or numeric value", next_token),
         }
     }

From 44b9066ea452f760621e3d8076338abcdfd67de0 Mon Sep 17 00:00:00 2001
From: Dharan Aditya <dharan.aditya@gmail.com>
Date: Tue, 13 Aug 2024 23:55:16 +0530
Subject: [PATCH 286/357] Support Arrays for the Map scalar functions (#11712)

* crude impl to support array

* ++improvement

* uncomment logic test

* working impl

* leverage return_type_from_exprs

* add documentation

* remove unwrap method

* add more slt tests

* typos

* typos

* remove extract based on dt

* few more tests

* move back to return_type

* improve error & tests

* Update datafusion/functions-nested/src/map.rs

Co-authored-by: Alex Huang <huangweijun1001@gmail.com>

---------

Co-authored-by: Alex Huang <huangweijun1001@gmail.com>
---
 datafusion/common/src/utils/mod.rs         |   9 +-
 datafusion/functions-nested/src/map.rs     | 141 +++++++++++++++++++--
 datafusion/sqllogictest/test_files/map.slt |  55 +++++---
 3 files changed, 180 insertions(+), 25 deletions(-)

diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index 12e306ffaf031..bf506c0551eb6 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -29,8 +29,10 @@ use arrow::compute;
 use arrow::compute::{partition, SortColumn, SortOptions};
 use arrow::datatypes::{Field, SchemaRef, UInt32Type};
 use arrow::record_batch::RecordBatch;
+use arrow_array::cast::AsArray;
 use arrow_array::{
-    Array, FixedSizeListArray, LargeListArray, ListArray, RecordBatchOptions,
+    Array, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait,
+    RecordBatchOptions,
 };
 use arrow_schema::DataType;
 use sqlparser::ast::Ident;
@@ -440,6 +442,11 @@ pub fn arrays_into_list_array(
     ))
 }
 
+/// Helper function to convert a ListArray into a vector of ArrayRefs.
+pub fn list_to_arrays<O: OffsetSizeTrait>(a: ArrayRef) -> Vec<ArrayRef> {
+    a.as_list::<O>().iter().flatten().collect::<Vec<_>>()
+}
+
 /// Get the base type of a data type.
 ///
 /// Example
diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs
index e218b501dcf16..b6068fdff0d52 100644
--- a/datafusion/functions-nested/src/map.rs
+++ b/datafusion/functions-nested/src/map.rs
@@ -15,17 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::make_array::make_array;
+use std::any::Any;
+use std::collections::VecDeque;
+use std::sync::Arc;
+
 use arrow::array::ArrayData;
-use arrow_array::{Array, ArrayRef, MapArray, StructArray};
+use arrow_array::{Array, ArrayRef, MapArray, OffsetSizeTrait, StructArray};
 use arrow_buffer::{Buffer, ToByteSlice};
 use arrow_schema::{DataType, Field, SchemaBuilder};
+
 use datafusion_common::{exec_err, ScalarValue};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
-use std::any::Any;
-use std::collections::VecDeque;
-use std::sync::Arc;
+
+use crate::make_array::make_array;
 
 /// Returns a map created from a key list and a value list
 pub fn map(keys: Vec<Expr>, values: Vec<Expr>) -> Expr {
@@ -56,11 +59,11 @@ fn make_map_batch(args: &[ColumnarValue]) -> datafusion_common::Result<ColumnarV
         );
     }
 
+    let data_type = args[0].data_type();
     let can_evaluate_to_const = can_evaluate_to_const(args);
-
     let key = get_first_array_ref(&args[0])?;
     let value = get_first_array_ref(&args[1])?;
-    make_map_batch_internal(key, value, can_evaluate_to_const)
+    make_map_batch_internal(key, value, can_evaluate_to_const, data_type)
 }
 
 fn get_first_array_ref(
@@ -73,7 +76,7 @@ fn get_first_array_ref(
             ScalarValue::FixedSizeList(array) => Ok(array.value(0)),
             _ => exec_err!("Expected array, got {:?}", value),
         },
-        ColumnarValue::Array(array) => exec_err!("Expected scalar, got {:?}", array),
+        ColumnarValue::Array(array) => Ok(array.to_owned()),
     }
 }
 
@@ -81,6 +84,7 @@ fn make_map_batch_internal(
     keys: ArrayRef,
     values: ArrayRef,
     can_evaluate_to_const: bool,
+    data_type: DataType,
 ) -> datafusion_common::Result<ColumnarValue> {
     if keys.null_count() > 0 {
         return exec_err!("map key cannot be null");
@@ -90,6 +94,14 @@ fn make_map_batch_internal(
         return exec_err!("map requires key and value lists to have the same length");
     }
 
+    if !can_evaluate_to_const {
+        return if let DataType::LargeList(..) = data_type {
+            make_map_array_internal::<i64>(keys, values)
+        } else {
+            make_map_array_internal::<i32>(keys, values)
+        };
+    }
+
     let key_field = Arc::new(Field::new("key", keys.data_type().clone(), false));
     let value_field = Arc::new(Field::new("value", values.data_type().clone(), true));
     let mut entry_struct_buffer: VecDeque<(Arc<Field>, ArrayRef)> = VecDeque::new();
@@ -190,7 +202,6 @@ impl ScalarUDFImpl for MapFunc {
         make_map_batch(args)
     }
 }
-
 fn get_element_type(data_type: &DataType) -> datafusion_common::Result<&DataType> {
     match data_type {
         DataType::List(element) => Ok(element.data_type()),
@@ -202,3 +213,115 @@ fn get_element_type(data_type: &DataType) -> datafusion_common::Result<&DataType
         ),
     }
 }
+
+/// Helper function to create MapArray from array of values to support arrays for Map scalar function
+///
+/// ``` text
+/// Format of input KEYS and VALUES column
+///         keys                        values
+/// +---------------------+       +---------------------+
+/// | +-----------------+ |       | +-----------------+ |
+/// | | [k11, k12, k13] | |       | | [v11, v12, v13] | |
+/// | +-----------------+ |       | +-----------------+ |
+/// |                     |       |                     |
+/// | +-----------------+ |       | +-----------------+ |
+/// | | [k21, k22, k23] | |       | | [v21, v22, v23] | |
+/// | +-----------------+ |       | +-----------------+ |
+/// |                     |       |                     |
+/// | +-----------------+ |       | +-----------------+ |
+/// | |[k31, k32, k33]  | |       | |[v31, v32, v33]  | |
+/// | +-----------------+ |       | +-----------------+ |
+/// +---------------------+       +---------------------+
+/// ```
+/// Flattened keys and values array to user create `StructArray`,
+/// which serves as inner child for `MapArray`
+///
+/// ``` text
+/// Flattened           Flattened
+/// Keys                Values
+/// +-----------+      +-----------+
+/// | +-------+ |      | +-------+ |
+/// | |  k11  | |      | |  v11  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k12  | |      | |  v12  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k13  | |      | |  v13  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k21  | |      | |  v21  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k22  | |      | |  v22  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k23  | |      | |  v23  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k31  | |      | |  v31  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k32  | |      | |  v32  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k33  | |      | |  v33  | |
+/// | +-------+ |      | +-------+ |
+/// +-----------+      +-----------+
+/// ```text
+
+fn make_map_array_internal<O: OffsetSizeTrait>(
+    keys: ArrayRef,
+    values: ArrayRef,
+) -> datafusion_common::Result<ColumnarValue> {
+    let mut offset_buffer = vec![O::zero()];
+    let mut running_offset = O::zero();
+
+    let keys = datafusion_common::utils::list_to_arrays::<O>(keys);
+    let values = datafusion_common::utils::list_to_arrays::<O>(values);
+
+    let mut key_array_vec = vec![];
+    let mut value_array_vec = vec![];
+    for (k, v) in keys.iter().zip(values.iter()) {
+        running_offset = running_offset.add(O::usize_as(k.len()));
+        offset_buffer.push(running_offset);
+        key_array_vec.push(k.as_ref());
+        value_array_vec.push(v.as_ref());
+    }
+
+    // concatenate all the arrays
+    let flattened_keys = arrow::compute::concat(key_array_vec.as_ref())?;
+    if flattened_keys.null_count() > 0 {
+        return exec_err!("keys cannot be null");
+    }
+    let flattened_values = arrow::compute::concat(value_array_vec.as_ref())?;
+
+    let fields = vec![
+        Arc::new(Field::new("key", flattened_keys.data_type().clone(), false)),
+        Arc::new(Field::new(
+            "value",
+            flattened_values.data_type().clone(),
+            true,
+        )),
+    ];
+
+    let struct_data = ArrayData::builder(DataType::Struct(fields.into()))
+        .len(flattened_keys.len())
+        .add_child_data(flattened_keys.to_data())
+        .add_child_data(flattened_values.to_data())
+        .build()?;
+
+    let map_data = ArrayData::builder(DataType::Map(
+        Arc::new(Field::new(
+            "entries",
+            struct_data.data_type().clone(),
+            false,
+        )),
+        false,
+    ))
+    .len(keys.len())
+    .add_child_data(struct_data)
+    .add_buffer(Buffer::from_slice_ref(offset_buffer.as_slice()))
+    .build()?;
+    Ok(ColumnarValue::Array(Arc::new(MapArray::from(map_data))))
+}
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index eb350c22bb5d9..0dc37c68bca4d 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -199,25 +199,50 @@ SELECT MAP(arrow_cast(make_array('POST', 'HEAD', 'PATCH'), 'LargeList(Utf8)'), a
 
 statement ok
 create table t as values
-('a', 1, 'k1', 10, ['k1', 'k2'], [1, 2]),
-('b', 2, 'k3', 30, ['k3'], [3]),
-('d', 4, 'k5', 50, ['k5'], [5]);
+('a', 1, 'k1', 10, ['k1', 'k2'], [1, 2], 'POST', [[1,2,3]], ['a']),
+('b', 2, 'k3', 30, ['k3'], [3], 'PUT', [[4]], ['b']),
+('d', 4, 'k5', 50, ['k5'], [5], null, [[1,2]], ['c']);
 
-query error
+query ?
 SELECT make_map(column1, column2, column3, column4) FROM t;
-# TODO: support array value
-# ----
-# {a: 1, k1: 10}
-# {b: 2, k3: 30}
-# {d: 4, k5: 50}
+----
+{a: 1, k1: 10}
+{b: 2, k3: 30}
+{d: 4, k5: 50}
 
-query error
+query ?
 SELECT map(column5, column6) FROM t;
-# TODO: support array value
-# ----
-# {k1:1, k2:2}
-# {k3: 3}
-# {k5: 5}
+----
+{k1: 1, k2: 2}
+{k3: 3}
+{k5: 5}
+
+query ?
+SELECT map(column8, column9) FROM t;
+----
+{[1, 2, 3]: a}
+{[4]: b}
+{[1, 2]: c}
+
+query error
+SELECT map(column6, column7) FROM t;
+
+query ?
+select Map {column6: column7} from t;
+----
+{[1, 2]: POST}
+{[3]: PUT}
+{[5]: }
+
+query ?
+select Map {column8: column7} from t;
+----
+{[[1, 2, 3]]: POST}
+{[[4]]: PUT}
+{[[1, 2]]: }
+
+query error
+select Map {column7: column8} from t;
 
 query ?
 SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', 30, 'OPTION', 29, 'GET', 27, 'PUT', 25, 'DELETE', 24) AS method_count from t;

From 508da801b633dfafe969f7e8f8598bc79a878d9d Mon Sep 17 00:00:00 2001
From: Bruce Ritchie <bruce.ritchie@veeva.com>
Date: Tue, 13 Aug 2024 14:25:43 -0400
Subject: [PATCH 287/357] Update LPAD scalar function to support Utf8View
 (#11941)

---
 datafusion/functions/src/unicode/lpad.rs      | 677 +++++++++++-------
 .../sqllogictest/test_files/functions.slt     |  26 +
 .../sqllogictest/test_files/string_view.slt   |  20 +-
 3 files changed, 463 insertions(+), 260 deletions(-)

diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs
index ce5e0064362bf..5caa6acd67453 100644
--- a/datafusion/functions/src/unicode/lpad.rs
+++ b/datafusion/functions/src/unicode/lpad.rs
@@ -18,16 +18,21 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{
+    Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, Int64Array,
+    OffsetSizeTrait, StringViewArray,
+};
 use arrow::datatypes::DataType;
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
 use unicode_segmentation::UnicodeSegmentation;
+use DataType::{LargeUtf8, Utf8, Utf8View};
 
-use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_common::cast::as_int64_array;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
+use crate::utils::{make_scalar_function, utf8_to_str_type};
+
 #[derive(Debug)]
 pub struct LPadFunc {
     signature: Signature,
@@ -45,11 +50,17 @@ impl LPadFunc {
         Self {
             signature: Signature::one_of(
                 vec![
+                    Exact(vec![Utf8View, Int64]),
+                    Exact(vec![Utf8View, Int64, Utf8View]),
+                    Exact(vec![Utf8View, Int64, Utf8]),
+                    Exact(vec![Utf8View, Int64, LargeUtf8]),
                     Exact(vec![Utf8, Int64]),
-                    Exact(vec![LargeUtf8, Int64]),
+                    Exact(vec![Utf8, Int64, Utf8View]),
                     Exact(vec![Utf8, Int64, Utf8]),
-                    Exact(vec![LargeUtf8, Int64, Utf8]),
                     Exact(vec![Utf8, Int64, LargeUtf8]),
+                    Exact(vec![LargeUtf8, Int64]),
+                    Exact(vec![LargeUtf8, Int64, Utf8View]),
+                    Exact(vec![LargeUtf8, Int64, Utf8]),
                     Exact(vec![LargeUtf8, Int64, LargeUtf8]),
                 ],
                 Volatility::Immutable,
@@ -76,300 +87,450 @@ impl ScalarUDFImpl for LPadFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(lpad::<i32>, vec![])(args),
-            DataType::LargeUtf8 => make_scalar_function(lpad::<i64>, vec![])(args),
-            other => exec_err!("Unsupported data type {other:?} for function lpad"),
-        }
+        make_scalar_function(lpad, vec![])(args)
     }
 }
 
-/// Extends the string to length 'length' by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right).
+/// Extends the string to length 'length' by prepending the characters fill (a space by default).
+/// If the string is already longer than length then it is truncated (on the right).
 /// lpad('hi', 5, 'xy') = 'xyxhi'
-pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .map(|(string, length)| match (string, length) {
-                    (Some(string), Some(length)) => {
-                        if length > i32::MAX as i64 {
-                            return exec_err!(
-                                "lpad requested length {length} too large"
-                            );
-                        }
+pub fn lpad(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() <= 1 || args.len() > 3 {
+        return exec_err!(
+            "lpad was called with {} arguments. It requires at least 2 and at most 3.",
+            args.len()
+        );
+    }
+
+    let length_array = as_int64_array(&args[1])?;
+
+    match args[0].data_type() {
+        Utf8 => match args.len() {
+            2 => lpad_impl::<&GenericStringArray<i32>, &GenericStringArray<i32>, i32>(
+                args[0].as_string::<i32>(),
+                length_array,
+                None,
+            ),
+            3 => lpad_with_replace::<&GenericStringArray<i32>, i32>(
+                args[0].as_string::<i32>(),
+                length_array,
+                &args[2],
+            ),
+            _ => unreachable!(),
+        },
+        LargeUtf8 => match args.len() {
+            2 => lpad_impl::<&GenericStringArray<i64>, &GenericStringArray<i64>, i64>(
+                args[0].as_string::<i64>(),
+                length_array,
+                None,
+            ),
+            3 => lpad_with_replace::<&GenericStringArray<i64>, i64>(
+                args[0].as_string::<i64>(),
+                length_array,
+                &args[2],
+            ),
+            _ => unreachable!(),
+        },
+        Utf8View => match args.len() {
+            2 => lpad_impl::<&StringViewArray, &GenericStringArray<i32>, i32>(
+                args[0].as_string_view(),
+                length_array,
+                None,
+            ),
+            3 => lpad_with_replace::<&StringViewArray, i32>(
+                args[0].as_string_view(),
+                length_array,
+                &args[2],
+            ),
+            _ => unreachable!(),
+        },
+        other => {
+            exec_err!("Unsupported data type {other:?} for function lpad")
+        }
+    }
+}
 
-                        let length = if length < 0 { 0 } else { length as usize };
-                        if length == 0 {
-                            Ok(Some("".to_string()))
+fn lpad_with_replace<'a, V, T: OffsetSizeTrait>(
+    string_array: V,
+    length_array: &Int64Array,
+    fill_array: &'a ArrayRef,
+) -> Result<ArrayRef>
+where
+    V: StringArrayType<'a>,
+{
+    match fill_array.data_type() {
+        Utf8 => lpad_impl::<V, &GenericStringArray<i32>, T>(
+            string_array,
+            length_array,
+            Some(fill_array.as_string::<i32>()),
+        ),
+        LargeUtf8 => lpad_impl::<V, &GenericStringArray<i64>, T>(
+            string_array,
+            length_array,
+            Some(fill_array.as_string::<i64>()),
+        ),
+        Utf8View => lpad_impl::<V, &StringViewArray, T>(
+            string_array,
+            length_array,
+            Some(fill_array.as_string_view()),
+        ),
+        other => {
+            exec_err!("Unsupported data type {other:?} for function lpad")
+        }
+    }
+}
+
+fn lpad_impl<'a, V, V2, T>(
+    string_array: V,
+    length_array: &Int64Array,
+    fill_array: Option<V2>,
+) -> Result<ArrayRef>
+where
+    V: StringArrayType<'a>,
+    V2: StringArrayType<'a>,
+    T: OffsetSizeTrait,
+{
+    if fill_array.is_none() {
+        let result = string_array
+            .iter()
+            .zip(length_array.iter())
+            .map(|(string, length)| match (string, length) {
+                (Some(string), Some(length)) => {
+                    if length > i32::MAX as i64 {
+                        return exec_err!("lpad requested length {length} too large");
+                    }
+
+                    let length = if length < 0 { 0 } else { length as usize };
+                    if length == 0 {
+                        Ok(Some("".to_string()))
+                    } else {
+                        let graphemes = string.graphemes(true).collect::<Vec<&str>>();
+                        if length < graphemes.len() {
+                            Ok(Some(graphemes[..length].concat()))
                         } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            if length < graphemes.len() {
-                                Ok(Some(graphemes[..length].concat()))
-                            } else {
-                                let mut s: String = " ".repeat(length - graphemes.len());
-                                s.push_str(string);
-                                Ok(Some(s))
-                            }
+                            let mut s: String = " ".repeat(length - graphemes.len());
+                            s.push_str(string);
+                            Ok(Some(s))
                         }
                     }
-                    _ => Ok(None),
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
+                }
+                _ => Ok(None),
+            })
+            .collect::<Result<GenericStringArray<T>>>()?;
 
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        3 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
-            let fill_array = as_generic_string_array::<T>(&args[2])?;
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .zip(fill_array.iter())
-                .map(|((string, length), fill)| match (string, length, fill) {
-                    (Some(string), Some(length), Some(fill)) => {
-                        if length > i32::MAX as i64 {
-                            return exec_err!(
-                                "lpad requested length {length} too large"
-                            );
-                        }
+        Ok(Arc::new(result) as ArrayRef)
+    } else {
+        let result = string_array
+            .iter()
+            .zip(length_array.iter())
+            .zip(fill_array.unwrap().iter())
+            .map(|((string, length), fill)| match (string, length, fill) {
+                (Some(string), Some(length), Some(fill)) => {
+                    if length > i32::MAX as i64 {
+                        return exec_err!("lpad requested length {length} too large");
+                    }
+
+                    let length = if length < 0 { 0 } else { length as usize };
+                    if length == 0 {
+                        Ok(Some("".to_string()))
+                    } else {
+                        let graphemes = string.graphemes(true).collect::<Vec<&str>>();
+                        let fill_chars = fill.chars().collect::<Vec<char>>();
 
-                        let length = if length < 0 { 0 } else { length as usize };
-                        if length == 0 {
-                            Ok(Some("".to_string()))
+                        if length < graphemes.len() {
+                            Ok(Some(graphemes[..length].concat()))
+                        } else if fill_chars.is_empty() {
+                            Ok(Some(string.to_string()))
                         } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            let fill_chars = fill.chars().collect::<Vec<char>>();
-
-                            if length < graphemes.len() {
-                                Ok(Some(graphemes[..length].concat()))
-                            } else if fill_chars.is_empty() {
-                                Ok(Some(string.to_string()))
-                            } else {
-                                let mut s = string.to_string();
-                                let mut char_vector =
-                                    Vec::<char>::with_capacity(length - graphemes.len());
-                                for l in 0..length - graphemes.len() {
-                                    char_vector.push(
-                                        *fill_chars.get(l % fill_chars.len()).unwrap(),
-                                    );
-                                }
-                                s.insert_str(
-                                    0,
-                                    char_vector.iter().collect::<String>().as_str(),
-                                );
-                                Ok(Some(s))
+                            let mut s = string.to_string();
+                            let mut char_vector =
+                                Vec::<char>::with_capacity(length - graphemes.len());
+                            for l in 0..length - graphemes.len() {
+                                char_vector
+                                    .push(*fill_chars.get(l % fill_chars.len()).unwrap());
                             }
+                            s.insert_str(
+                                0,
+                                char_vector.iter().collect::<String>().as_str(),
+                            );
+                            Ok(Some(s))
                         }
                     }
-                    _ => Ok(None),
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
+                }
+                _ => Ok(None),
+            })
+            .collect::<Result<GenericStringArray<T>>>()?;
 
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => exec_err!(
-            "lpad was called with {other} arguments. It requires at least 2 and at most 3."
-        ),
+        Ok(Arc::new(result) as ArrayRef)
+    }
+}
+
+trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
+    fn iter(&self) -> ArrayIter<Self>;
+}
+impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
+    fn iter(&self) -> ArrayIter<Self> {
+        GenericStringArray::<O>::iter(self)
+    }
+}
+impl<'a> StringArrayType<'a> for &'a StringViewArray {
+    fn iter(&self) -> ArrayIter<Self> {
+        StringViewArray::iter(self)
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use arrow::array::{Array, StringArray};
-    use arrow::datatypes::DataType::Utf8;
+    use crate::unicode::lpad::LPadFunc;
+    use crate::utils::test::test_function;
+
+    use arrow::array::{Array, LargeStringArray, StringArray};
+    use arrow::datatypes::DataType::{LargeUtf8, Utf8};
 
     use datafusion_common::{Result, ScalarValue};
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
 
-    use crate::unicode::lpad::LPadFunc;
-    use crate::utils::test::test_function;
+    macro_rules! test_lpad {
+        ($INPUT:expr, $LENGTH:expr, $EXPECTED:expr) => {
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH)
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH)
+                ],
+                $EXPECTED,
+                &str,
+                LargeUtf8,
+                LargeStringArray
+            );
+
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT)),
+                    ColumnarValue::Scalar($LENGTH)
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+        };
+
+        ($INPUT:expr, $LENGTH:expr, $REPLACE:expr, $EXPECTED:expr) => {
+            // utf8, utf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+            // utf8, largeutf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+            // utf8, utf8view
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+
+            // largeutf8, utf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                LargeUtf8,
+                LargeStringArray
+            );
+            // largeutf8, largeutf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                LargeUtf8,
+                LargeStringArray
+            );
+            // largeutf8, utf8view
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                LargeUtf8,
+                LargeStringArray
+            );
+
+            // utf8view, utf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+            // utf8view, largeutf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+            // utf8view, utf8view
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+        };
+    }
 
     #[test]
     fn test_functions() -> Result<()> {
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("josé")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-            ],
-            Ok(Some(" josé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-            ],
-            Ok(Some("   hi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(0i64)),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("josé".into()),
+            ScalarValue::Int64(Some(5i64)),
+            Ok(Some(" josé"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(5i64)),
+            Ok(Some("   hi"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(0i64)),
+            Ok(Some(""))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-                ColumnarValue::Scalar(ScalarValue::from("xy")),
-            ],
-            Ok(Some("xyxhi")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(Some("hi".into()), ScalarValue::Int64(None), Ok(None));
+        test_lpad!(None, ScalarValue::Int64(Some(5i64)), Ok(None));
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(5i64)),
+            Some("xy".into()),
+            Ok(Some("xyxhi"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(21i64)),
-                ColumnarValue::Scalar(ScalarValue::from("abcdef")),
-            ],
-            Ok(Some("abcdefabcdefabcdefahi")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(21i64)),
+            Some("abcdef".into()),
+            Ok(Some("abcdefabcdefabcdefahi"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-                ColumnarValue::Scalar(ScalarValue::from(" ")),
-            ],
-            Ok(Some("   hi")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(5i64)),
+            Some(" ".into()),
+            Ok(Some("   hi"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-                ColumnarValue::Scalar(ScalarValue::from("")),
-            ],
-            Ok(Some("hi")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(5i64)),
+            Some("".into()),
+            Ok(Some("hi"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-                ColumnarValue::Scalar(ScalarValue::from("xy")),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            None,
+            ScalarValue::Int64(Some(5i64)),
+            Some("xy".into()),
+            Ok(None)
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::Int64(None)),
-                ColumnarValue::Scalar(ScalarValue::from("xy")),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(None),
+            Some("xy".into()),
+            Ok(None)
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(5i64)),
+            None,
+            Ok(None)
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("josé")),
-                ColumnarValue::Scalar(ScalarValue::from(10i64)),
-                ColumnarValue::Scalar(ScalarValue::from("xy")),
-            ],
-            Ok(Some("xyxyxyjosé")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("josé".into()),
+            ScalarValue::Int64(Some(10i64)),
+            Some("xy".into()),
+            Ok(Some("xyxyxyjosé"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("josé")),
-                ColumnarValue::Scalar(ScalarValue::from(10i64)),
-                ColumnarValue::Scalar(ScalarValue::from("éñ")),
-            ],
-            Ok(Some("éñéñéñjosé")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("josé".into()),
+            ScalarValue::Int64(Some(10i64)),
+            Some("éñ".into()),
+            Ok(Some("éñéñéñjosé"))
         );
+
         #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("josé")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-            ],
-            internal_err!(
+        test_lpad!(Some("josé".into()), ScalarValue::Int64(Some(5i64)), internal_err!(
                 "function lpad requires compilation with feature flag: unicode_expressions."
-            ),
-            &str,
-            Utf8,
-            StringArray
-        );
+        ));
+
         Ok(())
     }
 }
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index 3255ddccdb81f..bea3016a21d32 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -203,6 +203,32 @@ SELECT lpad(NULL, 5, 'xy')
 ----
 NULL
 
+# test largeutf8, utf8view for lpad
+query T
+SELECT lpad(arrow_cast('hi', 'LargeUtf8'), 5, 'xy')
+----
+xyxhi
+
+query T
+SELECT lpad(arrow_cast('hi', 'Utf8View'), 5, 'xy')
+----
+xyxhi
+
+query T
+SELECT lpad(arrow_cast('hi', 'LargeUtf8'), 5, arrow_cast('xy', 'LargeUtf8'))
+----
+xyxhi
+
+query T
+SELECT lpad(arrow_cast('hi', 'Utf8View'), 5, arrow_cast('xy', 'Utf8View'))
+----
+xyxhi
+
+query T
+SELECT lpad(arrow_cast(NULL, 'Utf8View'), 5, 'xy')
+----
+NULL
+
 query T
 SELECT reverse('abcde')
 ----
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index fcd71b7f7e943..0088b035e7774 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -697,16 +697,32 @@ logical_plan
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for LPAD
-## TODO https://github.com/apache/datafusion/issues/11857
 query TT
 EXPLAIN SELECT
   LPAD(column1_utf8view, 12, ' ') as c1
 FROM test;
 ----
 logical_plan
-01)Projection: lpad(CAST(test.column1_utf8view AS Utf8), Int64(12), Utf8(" ")) AS c1
+01)Projection: lpad(test.column1_utf8view, Int64(12), Utf8(" ")) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
+query TT
+EXPLAIN SELECT
+  LPAD(column1_utf8view, 12, column2_large_utf8) as c1
+FROM test;
+----
+logical_plan
+01)Projection: lpad(test.column1_utf8view, Int64(12), test.column2_large_utf8) AS c1
+02)--TableScan: test projection=[column2_large_utf8, column1_utf8view]
+
+query TT
+EXPLAIN SELECT
+  LPAD(column1_utf8view, 12, column2_utf8view) as c1
+FROM test;
+----
+logical_plan
+01)Projection: lpad(test.column1_utf8view, Int64(12), test.column2_utf8view) AS c1
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for OCTET_LENGTH
 query TT

From e8ac93a0ffd490bd549872366bf7fd1b77ffd7f8 Mon Sep 17 00:00:00 2001
From: kf zheng <100595273+Kev1n8@users.noreply.github.com>
Date: Wed, 14 Aug 2024 02:27:03 +0800
Subject: [PATCH 288/357] Add native stringview support for LTRIM & RTRIM
 (#11948)

* add stringview option for ltrim

* add stringview option for rtrim

* add some tests to ensure no casts for ltrim & rtrim when using stringview

* fix typo and remove useless comments

* add tests covering ltrim and rtrim functioning
---
 datafusion/functions/src/string/btrim.rs      |   3 +-
 datafusion/functions/src/string/ltrim.rs      |  20 ++-
 datafusion/functions/src/string/rtrim.rs      |  20 ++-
 .../sqllogictest/test_files/string_view.slt   | 128 +++++++++++++-----
 4 files changed, 126 insertions(+), 45 deletions(-)

diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs
index 86470dd7a6469..371a11c82c543 100644
--- a/datafusion/functions/src/string/btrim.rs
+++ b/datafusion/functions/src/string/btrim.rs
@@ -57,7 +57,6 @@ impl BTrimFunc {
                     // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
                     // If that fails, it proceeds to `(Utf8, Utf8)`.
                     Exact(vec![Utf8View, Utf8View]),
-                    // Exact(vec![Utf8, Utf8View]),
                     Exact(vec![Utf8, Utf8]),
                     Exact(vec![Utf8View]),
                     Exact(vec![Utf8]),
@@ -98,7 +97,7 @@ impl ScalarUDFImpl for BTrimFunc {
             )(args),
             other => exec_err!(
                 "Unsupported data type {other:?} for function btrim,\
-                expected for Utf8, LargeUtf8 or Utf8View."
+                expected Utf8, LargeUtf8 or Utf8View."
             ),
         }
     }
diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs
index 6a9fafdd9299a..b7b27afcee1fa 100644
--- a/datafusion/functions/src/string/ltrim.rs
+++ b/datafusion/functions/src/string/ltrim.rs
@@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 /// Returns the longest string  with leading characters removed. If the characters are not specified, whitespace is removed.
 /// ltrim('zzzytest', 'xyz') = 'test'
 fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    general_trim::<T>(args, TrimType::Left, false)
+    let use_string_view = args[0].data_type() == &DataType::Utf8View;
+    general_trim::<T>(args, TrimType::Left, use_string_view)
 }
 
 #[derive(Debug)]
@@ -51,7 +52,15 @@ impl LtrimFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+                vec![
+                    // Planner attempts coercion to the target type starting with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![Utf8View, Utf8View]),
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8View]),
+                    Exact(vec![Utf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -77,7 +86,7 @@ impl ScalarUDFImpl for LtrimFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(
+            DataType::Utf8 | DataType::Utf8View => make_scalar_function(
                 ltrim::<i32>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
@@ -85,7 +94,10 @@ impl ScalarUDFImpl for LtrimFunc {
                 ltrim::<i64>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
-            other => exec_err!("Unsupported data type {other:?} for function ltrim"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function ltrim,\
+                expected Utf8, LargeUtf8 or Utf8View."
+            ),
         }
     }
 }
diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs
index 50b626e3df0e9..ec53f3ed74307 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 /// Returns the longest string  with trailing characters removed. If the characters are not specified, whitespace is removed.
 /// rtrim('testxxzx', 'xyz') = 'test'
 fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    general_trim::<T>(args, TrimType::Right, false)
+    let use_string_view = args[0].data_type() == &DataType::Utf8View;
+    general_trim::<T>(args, TrimType::Right, use_string_view)
 }
 
 #[derive(Debug)]
@@ -51,7 +52,15 @@ impl RtrimFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+                vec![
+                    // Planner attempts coercion to the target type starting with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![Utf8View, Utf8View]),
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8View]),
+                    Exact(vec![Utf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -77,7 +86,7 @@ impl ScalarUDFImpl for RtrimFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(
+            DataType::Utf8 | DataType::Utf8View => make_scalar_function(
                 rtrim::<i32>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
@@ -85,7 +94,10 @@ impl ScalarUDFImpl for RtrimFunc {
                 rtrim::<i64>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
-            other => exec_err!("Unsupported data type {other:?} for function rtrim"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function rtrim,\
+                expected Utf8, LargeUtf8 or Utf8View."
+            ),
         }
     }
 }
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 0088b035e7774..2381bd122bdda 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -607,6 +607,99 @@ Xiangpeng Xiangpeng Xiangpeng NULL
 Raphael   Raphael   Raphael   NULL
 NULL      NULL      NULL      NULL
 
+## Ensure no casts for LTRIM
+# Test LTRIM with Utf8View input
+query TT
+EXPLAIN SELECT
+  LTRIM(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM with Utf8View input and Utf8View pattern
+query TT
+EXPLAIN SELECT
+  LTRIM(column1_utf8view, 'foo') AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view, Utf8View("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM with Utf8View bytes longer than 12
+query TT
+EXPLAIN SELECT
+  LTRIM(column1_utf8view, 'this is longer than 12') AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view, Utf8View("this is longer than 12")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM outputs
+query TTTTT
+SELECT
+  LTRIM(column1_utf8view, 'foo') AS l1,
+  LTRIM(column1_utf8view, column2_utf8view) AS l2,
+  LTRIM(column1_utf8view) AS l3,
+  LTRIM(column1_utf8view, NULL) AS l4,
+  LTRIM(column1_utf8view, 'Xiang') AS l5
+FROM test;
+----
+Andrew    Andrew    Andrew    NULL  Andrew
+Xiangpeng (empty)   Xiangpeng NULL  peng
+Raphael   aphael    Raphael   NULL  Raphael
+NULL      NULL      NULL      NULL  NULL
+
+## ensure no casts for RTRIM
+# Test RTRIM with Utf8View input
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM with Utf8View input and Utf8View pattern
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view, 'foo') AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view, Utf8View("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM with Utf8View bytes longer than 12
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view, 'this is longer than 12') AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view, Utf8View("this is longer than 12")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM outputs
+query TTTTT
+SELECT
+  RTRIM(column1_utf8view, 'foo') AS l1,
+  RTRIM(column1_utf8view, column2_utf8view) AS l2,
+  RTRIM(column1_utf8view) AS l3,
+  RTRIM(column1_utf8view, NULL) AS l4,
+  RTRIM(column1_utf8view, 'peng') As l5
+FROM test;
+----
+Andrew    Andrew    Andrew    NULL  Andrew
+Xiangpeng (empty)   Xiangpeng NULL  Xia
+Raphael   Raphael   Raphael   NULL  Raphael
+NULL      NULL      NULL      NULL  NULL
+
+
 ## Ensure no casts for CHARACTER_LENGTH
 query TT
 EXPLAIN SELECT
@@ -685,16 +778,6 @@ logical_plan
 01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
-## Ensure no casts for LTRIM
-## TODO https://github.com/apache/datafusion/issues/11856
-query TT
-EXPLAIN SELECT
-  LTRIM(column1_utf8view) as c1
-FROM test;
-----
-logical_plan
-01)Projection: ltrim(CAST(test.column1_utf8view AS Utf8)) AS c1
-02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for LPAD
 query TT
@@ -811,18 +894,6 @@ logical_plan
 01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
-## Ensure no casts for RTRIM
-## TODO file ticket
-query TT
-EXPLAIN SELECT
-  RTRIM(column1_utf8view) as c1,
-  RTRIM(column1_utf8view, 'foo') as c2
-FROM test;
-----
-logical_plan
-01)Projection: rtrim(__common_expr_1) AS c1, rtrim(__common_expr_1, Utf8("foo")) AS c2
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
-03)----TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for RIGHT
 ## TODO file ticket
@@ -849,19 +920,6 @@ logical_plan
 03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 
-## Ensure no casts for RTRIM
-## TODO file ticket
-query TT
-EXPLAIN SELECT
-  RTRIM(column1_utf8view) as c,
-  RTRIM(column1_utf8view, column2_utf8view) as c1
-FROM test;
-----
-logical_plan
-01)Projection: rtrim(__common_expr_1) AS c, rtrim(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c1
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
-
 ## Ensure no casts for SPLIT_PART
 ## TODO file ticket
 query TT

From 3438b355308afa23dba399f4aec5760969d054c5 Mon Sep 17 00:00:00 2001
From: Jax Liu <liugs963@gmail.com>
Date: Wed, 14 Aug 2024 02:28:02 +0800
Subject: [PATCH 289/357] Move wildcard expansions to the analyzer (#11681)

* allow qualified wildcard in the logical plan

* move wildcard expansions to the analyzer

* fix fmt

* fix the view tests

* expand wildcard for schema

* fix for union query

* cargo fmt clippy

* move wildcard expanding tests to expand_wildcard_rule.rs

* coercion the expanded wildcard expression in union

* remove debug message

* move wildcard options to logical plan

* remove unused function

* add the doc for expression function

* fix cargo check

* fix cargo fmt

* fix test

* extract expand_exprlist

* expand wildcard for functional_dependencies

* refine the doc

* fix tests

* fix expand exclude and except

* remove unused import

* fix check and update function

* fix check

* throw the error when exprlist to field

* fix functional_dependency and exclude

* fix projection_schema

* fix the window functions

* fix clippy and support unparsing wildcard

* fix clippy and fmt

* add the doc for util functions

* fix unique expression check for projection

* cargo fmt

* move test and solve dependency issue

* address review comments

* add the fail reason

* enhance the doc

* add more doc
---
 datafusion/core/src/datasource/view.rs        |  44 ++-
 datafusion/core/src/execution/context/mod.rs  |   1 -
 datafusion/expr/src/expr.rs                   | 221 ++++++++++++-
 datafusion/expr/src/expr_fn.rs                |  45 ++-
 datafusion/expr/src/expr_rewriter/mod.rs      |   1 +
 datafusion/expr/src/expr_schema.rs            |  21 +-
 datafusion/expr/src/logical_plan/builder.rs   |  41 +--
 datafusion/expr/src/logical_plan/plan.rs      |  55 +++-
 datafusion/expr/src/utils.rs                  | 150 ++++++++-
 .../src/analyzer/count_wildcard_rule.rs       |   8 +-
 .../src/analyzer/expand_wildcard_rule.rs      | 304 ++++++++++++++++++
 .../src/analyzer/inline_table_scan.rs         |   8 +-
 datafusion/optimizer/src/analyzer/mod.rs      |   5 +
 .../optimizer/src/analyzer/type_coercion.rs   |  47 ++-
 .../optimizer/tests/optimizer_integration.rs  |  21 ++
 .../proto/src/logical_plan/from_proto.rs      |   7 +-
 datafusion/proto/src/logical_plan/to_proto.rs |   2 +-
 .../tests/cases/roundtrip_logical_plan.rs     |   8 +-
 datafusion/sql/src/expr/function.rs           |  13 +-
 datafusion/sql/src/expr/mod.rs                |  12 +-
 datafusion/sql/src/select.rs                  | 120 +++----
 datafusion/sql/src/unparser/expr.rs           |  54 +++-
 datafusion/sql/tests/sql_integration.rs       |  78 ++---
 .../sqllogictest/test_files/explain.slt       |   1 +
 datafusion/sqllogictest/test_files/select.slt |   6 +-
 datafusion/sqllogictest/test_files/union.slt  |  35 ++
 datafusion/sqllogictest/test_files/window.slt |   3 +-
 27 files changed, 1057 insertions(+), 254 deletions(-)
 create mode 100644 datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs

diff --git a/datafusion/core/src/datasource/view.rs b/datafusion/core/src/datasource/view.rs
index 98d118c027b7a..a81942bf769e8 100644
--- a/datafusion/core/src/datasource/view.rs
+++ b/datafusion/core/src/datasource/view.rs
@@ -19,17 +19,19 @@
 
 use std::{any::Any, sync::Arc};
 
-use arrow::datatypes::SchemaRef;
-use async_trait::async_trait;
-use datafusion_catalog::Session;
-use datafusion_common::Column;
-use datafusion_expr::{LogicalPlanBuilder, TableProviderFilterPushDown};
-
 use crate::{
     error::Result,
     logical_expr::{Expr, LogicalPlan},
     physical_plan::ExecutionPlan,
 };
+use arrow::datatypes::SchemaRef;
+use async_trait::async_trait;
+use datafusion_catalog::Session;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::Column;
+use datafusion_expr::{LogicalPlanBuilder, TableProviderFilterPushDown};
+use datafusion_optimizer::analyzer::expand_wildcard_rule::ExpandWildcardRule;
+use datafusion_optimizer::Analyzer;
 
 use crate::datasource::{TableProvider, TableType};
 
@@ -50,6 +52,7 @@ impl ViewTable {
         logical_plan: LogicalPlan,
         definition: Option<String>,
     ) -> Result<Self> {
+        let logical_plan = Self::apply_required_rule(logical_plan)?;
         let table_schema = logical_plan.schema().as_ref().to_owned().into();
 
         let view = Self {
@@ -61,6 +64,15 @@ impl ViewTable {
         Ok(view)
     }
 
+    fn apply_required_rule(logical_plan: LogicalPlan) -> Result<LogicalPlan> {
+        let options = ConfigOptions::default();
+        Analyzer::with_rules(vec![Arc::new(ExpandWildcardRule::new())]).execute_and_check(
+            logical_plan,
+            &options,
+            |_, _| {},
+        )
+    }
+
     /// Get definition ref
     pub fn definition(&self) -> Option<&String> {
         self.definition.as_ref()
@@ -232,6 +244,26 @@ mod tests {
 
         assert_batches_eq!(expected, &results);
 
+        let view_sql =
+            "CREATE VIEW replace_xyz AS SELECT * REPLACE (column1*2 as column1) FROM xyz";
+        session_ctx.sql(view_sql).await?.collect().await?;
+
+        let results = session_ctx
+            .sql("SELECT * FROM replace_xyz")
+            .await?
+            .collect()
+            .await?;
+
+        let expected = [
+            "+---------+---------+---------+",
+            "| column1 | column2 | column3 |",
+            "+---------+---------+---------+",
+            "| 2       | 2       | 3       |",
+            "| 8       | 5       | 6       |",
+            "+---------+---------+---------+",
+        ];
+
+        assert_batches_eq!(expected, &results);
         Ok(())
     }
 
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index c63ffddd81b31..972a6f643733f 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -718,7 +718,6 @@ impl SessionContext {
             }
             (_, Err(_)) => {
                 let table = Arc::new(ViewTable::try_new((*input).clone(), definition)?);
-
                 self.register_table(name, table)?;
                 self.return_empty_dataframe()
             }
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 5030a95d3c8ab..b4d489cc7c1e5 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -41,7 +41,10 @@ use datafusion_common::{
     internal_err, not_impl_err, plan_err, Column, DFSchema, Result, ScalarValue,
     TableReference,
 };
-use sqlparser::ast::NullTreatment;
+use sqlparser::ast::{
+    display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
+    NullTreatment, RenameSelectItem, ReplaceSelectElement,
+};
 
 /// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
 ///
@@ -315,7 +318,10 @@ pub enum Expr {
     ///
     /// This expr has to be resolved to a list of columns before translating logical
     /// plan into physical plan.
-    Wildcard { qualifier: Option<TableReference> },
+    Wildcard {
+        qualifier: Option<TableReference>,
+        options: WildcardOptions,
+    },
     /// List of grouping set expressions. Only valid in the context of an aggregate
     /// GROUP BY expression list
     GroupingSet(GroupingSet),
@@ -970,6 +976,89 @@ impl GroupingSet {
     }
 }
 
+/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
+#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)]
+pub struct WildcardOptions {
+    /// `[ILIKE...]`.
+    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
+    pub ilike: Option<IlikeSelectItem>,
+    /// `[EXCLUDE...]`.
+    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
+    pub exclude: Option<ExcludeSelectItem>,
+    /// `[EXCEPT...]`.
+    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except>
+    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except>
+    pub except: Option<ExceptSelectItem>,
+    /// `[REPLACE]`
+    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace>
+    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace>
+    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
+    pub replace: Option<PlannedReplaceSelectItem>,
+    /// `[RENAME ...]`.
+    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
+    pub rename: Option<RenameSelectItem>,
+}
+
+impl WildcardOptions {
+    pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self {
+        WildcardOptions {
+            ilike: self.ilike,
+            exclude: self.exclude,
+            except: self.except,
+            replace: Some(replace),
+            rename: self.rename,
+        }
+    }
+}
+
+impl Display for WildcardOptions {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        if let Some(ilike) = &self.ilike {
+            write!(f, " {ilike}")?;
+        }
+        if let Some(exclude) = &self.exclude {
+            write!(f, " {exclude}")?;
+        }
+        if let Some(except) = &self.except {
+            write!(f, " {except}")?;
+        }
+        if let Some(replace) = &self.replace {
+            write!(f, " {replace}")?;
+        }
+        if let Some(rename) = &self.rename {
+            write!(f, " {rename}")?;
+        }
+        Ok(())
+    }
+}
+
+/// The planned expressions for `REPLACE`
+#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)]
+pub struct PlannedReplaceSelectItem {
+    /// The original ast nodes
+    pub items: Vec<ReplaceSelectElement>,
+    /// The expression planned from the ast nodes. They will be used when expanding the wildcard.
+    pub planned_expressions: Vec<Expr>,
+}
+
+impl Display for PlannedReplaceSelectItem {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "REPLACE")?;
+        write!(f, " ({})", display_comma_separated(&self.items))?;
+        Ok(())
+    }
+}
+
+impl PlannedReplaceSelectItem {
+    pub fn items(&self) -> &[ReplaceSelectElement] {
+        &self.items
+    }
+
+    pub fn expressions(&self) -> &[Expr] {
+        &self.planned_expressions
+    }
+}
+
 /// Fixed seed for the hashing so that Ords are consistent across runs
 const SEED: ahash::RandomState = ahash::RandomState::with_seeds(0, 0, 0, 0);
 
@@ -1720,8 +1809,9 @@ impl Expr {
             Expr::ScalarSubquery(subquery) => {
                 subquery.hash(hasher);
             }
-            Expr::Wildcard { qualifier } => {
+            Expr::Wildcard { qualifier, options } => {
                 qualifier.hash(hasher);
+                options.hash(hasher);
             }
             Expr::GroupingSet(grouping_set) => {
                 mem::discriminant(grouping_set).hash(hasher);
@@ -2242,9 +2332,9 @@ impl fmt::Display for Expr {
                     write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
                 }
             }
-            Expr::Wildcard { qualifier } => match qualifier {
-                Some(qualifier) => write!(f, "{qualifier}.*"),
-                None => write!(f, "*"),
+            Expr::Wildcard { qualifier, options } => match qualifier {
+                Some(qualifier) => write!(f, "{qualifier}.*{options}"),
+                None => write!(f, "*{options}"),
             },
             Expr::GroupingSet(grouping_sets) => match grouping_sets {
                 GroupingSet::Rollup(exprs) => {
@@ -2543,9 +2633,10 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
         Expr::Sort { .. } => {
             internal_err!("Create physical name does not support sort expression")
         }
-        Expr::Wildcard { .. } => {
-            internal_err!("Create physical name does not support wildcard")
-        }
+        Expr::Wildcard { qualifier, options } => match qualifier {
+            Some(qualifier) => Ok(format!("{}.*{}", qualifier, options)),
+            None => Ok(format!("*{}", options)),
+        },
         Expr::Placeholder(_) => {
             internal_err!("Create physical name does not support placeholder")
         }
@@ -2558,7 +2649,12 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
 #[cfg(test)]
 mod test {
     use crate::expr_fn::col;
-    use crate::{case, lit, ColumnarValue, ScalarUDF, ScalarUDFImpl, Volatility};
+    use crate::{
+        case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
+        ScalarUDF, ScalarUDFImpl, Volatility,
+    };
+    use sqlparser::ast;
+    use sqlparser::ast::{Ident, IdentWithAlias};
     use std::any::Any;
 
     #[test]
@@ -2859,4 +2955,109 @@ mod test {
         );
         assert_eq!(find_df_window_func("not_exist"), None)
     }
+
+    #[test]
+    fn test_display_wildcard() {
+        assert_eq!(format!("{}", wildcard()), "*");
+        assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*");
+        assert_eq!(
+            format!(
+                "{}",
+                wildcard_with_options(wildcard_options(
+                    Some(IlikeSelectItem {
+                        pattern: "c1".to_string()
+                    }),
+                    None,
+                    None,
+                    None,
+                    None
+                ))
+            ),
+            "* ILIKE 'c1'"
+        );
+        assert_eq!(
+            format!(
+                "{}",
+                wildcard_with_options(wildcard_options(
+                    None,
+                    Some(ExcludeSelectItem::Multiple(vec![
+                        Ident::from("c1"),
+                        Ident::from("c2")
+                    ])),
+                    None,
+                    None,
+                    None
+                ))
+            ),
+            "* EXCLUDE (c1, c2)"
+        );
+        assert_eq!(
+            format!(
+                "{}",
+                wildcard_with_options(wildcard_options(
+                    None,
+                    None,
+                    Some(ExceptSelectItem {
+                        first_element: Ident::from("c1"),
+                        additional_elements: vec![Ident::from("c2")]
+                    }),
+                    None,
+                    None
+                ))
+            ),
+            "* EXCEPT (c1, c2)"
+        );
+        assert_eq!(
+            format!(
+                "{}",
+                wildcard_with_options(wildcard_options(
+                    None,
+                    None,
+                    None,
+                    Some(PlannedReplaceSelectItem {
+                        items: vec![ReplaceSelectElement {
+                            expr: ast::Expr::Identifier(Ident::from("c1")),
+                            column_name: Ident::from("a1"),
+                            as_keyword: false
+                        }],
+                        planned_expressions: vec![]
+                    }),
+                    None
+                ))
+            ),
+            "* REPLACE (c1 a1)"
+        );
+        assert_eq!(
+            format!(
+                "{}",
+                wildcard_with_options(wildcard_options(
+                    None,
+                    None,
+                    None,
+                    None,
+                    Some(RenameSelectItem::Multiple(vec![IdentWithAlias {
+                        ident: Ident::from("c1"),
+                        alias: Ident::from("a1")
+                    }]))
+                ))
+            ),
+            "* RENAME (c1 AS a1)"
+        )
+    }
+
+    fn wildcard_options(
+        opt_ilike: Option<IlikeSelectItem>,
+        opt_exclude: Option<ExcludeSelectItem>,
+        opt_except: Option<ExceptSelectItem>,
+        opt_replace: Option<PlannedReplaceSelectItem>,
+        opt_rename: Option<RenameSelectItem>,
+    ) -> WildcardOptions {
+        WildcardOptions {
+            ilike: opt_ilike,
+            exclude: opt_exclude,
+            except: opt_except,
+            replace: opt_replace,
+            rename: opt_rename,
+        }
+    }
 }
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index e9c5485656c8c..4e6022399653b 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -19,7 +19,7 @@
 
 use crate::expr::{
     AggregateFunction, BinaryExpr, Cast, Exists, GroupingSet, InList, InSubquery,
-    Placeholder, TryCast, Unnest, WindowFunction,
+    Placeholder, TryCast, Unnest, WildcardOptions, WindowFunction,
 };
 use crate::function::{
     AccumulatorArgs, AccumulatorFactoryFunction, PartitionEvaluatorFactory,
@@ -37,7 +37,7 @@ use arrow::compute::kernels::cast_utils::{
     parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
 };
 use arrow::datatypes::{DataType, Field};
-use datafusion_common::{plan_err, Column, Result, ScalarValue};
+use datafusion_common::{plan_err, Column, Result, ScalarValue, TableReference};
 use sqlparser::ast::NullTreatment;
 use std::any::Any;
 use std::fmt::Debug;
@@ -119,7 +119,46 @@ pub fn placeholder(id: impl Into<String>) -> Expr {
 /// assert_eq!(p.to_string(), "*")
 /// ```
 pub fn wildcard() -> Expr {
-    Expr::Wildcard { qualifier: None }
+    Expr::Wildcard {
+        qualifier: None,
+        options: WildcardOptions::default(),
+    }
+}
+
+/// Create an '*' [`Expr::Wildcard`] expression with the wildcard options
+pub fn wildcard_with_options(options: WildcardOptions) -> Expr {
+    Expr::Wildcard {
+        qualifier: None,
+        options,
+    }
+}
+
+/// Create an 't.*' [`Expr::Wildcard`] expression that matches all columns from a specific table
+///
+/// # Example
+///
+/// ```rust
+/// # use datafusion_common::TableReference;
+/// # use datafusion_expr::{qualified_wildcard};
+/// let p = qualified_wildcard(TableReference::bare("t"));
+/// assert_eq!(p.to_string(), "t.*")
+/// ```
+pub fn qualified_wildcard(qualifier: impl Into<TableReference>) -> Expr {
+    Expr::Wildcard {
+        qualifier: Some(qualifier.into()),
+        options: WildcardOptions::default(),
+    }
+}
+
+/// Create an 't.*' [`Expr::Wildcard`] expression with the wildcard options
+pub fn qualified_wildcard_with_options(
+    qualifier: impl Into<TableReference>,
+    options: WildcardOptions,
+) -> Expr {
+    Expr::Wildcard {
+        qualifier: Some(qualifier.into()),
+        options,
+    }
 }
 
 /// Return a new expression `left <op> right`
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 0dc41d4a9ac1a..32e621350ee24 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -248,6 +248,7 @@ fn coerce_exprs_for_schema(
                     Expr::Alias(Alias { expr, name, .. }) => {
                         Ok(expr.cast_to(new_type, src_schema)?.alias(name))
                     }
+                    Expr::Wildcard { .. } => Ok(expr),
                     _ => expr.cast_to(new_type, src_schema),
                 }
             } else {
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 73123819ba99a..af35b9a9910d7 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -28,8 +28,8 @@ use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::{
-    internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema,
-    Result, TableReference,
+    not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema, Result,
+    TableReference,
 };
 use std::collections::HashMap;
 use std::sync::Arc;
@@ -244,13 +244,7 @@ impl ExprSchemable for Expr {
                     )
                 })
             }
-            Expr::Wildcard { qualifier } => {
-                // Wildcard do not really have a type and do not appear in projections
-                match qualifier {
-                    Some(_) => internal_err!("QualifiedWildcard expressions are not valid in a logical query plan"),
-                    None => Ok(DataType::Null)
-                }
-            }
+            Expr::Wildcard { .. } => Ok(DataType::Null),
             Expr::GroupingSet(_) => {
                 // grouping sets do not really have a type and do not appear in projections
                 Ok(DataType::Null)
@@ -362,12 +356,7 @@ impl ExprSchemable for Expr {
             | Expr::SimilarTo(Like { expr, pattern, .. }) => {
                 Ok(expr.nullable(input_schema)? || pattern.nullable(input_schema)?)
             }
-            Expr::Wildcard { qualifier } => match qualifier {
-                Some(_) => internal_err!(
-                    "QualifiedWildcard expressions are not valid in a logical query plan"
-                ),
-                None => Ok(false),
-            },
+            Expr::Wildcard { .. } => Ok(false),
             Expr::GroupingSet(_) => {
                 // grouping sets do not really have the concept of nullable and do not appear
                 // in projections
@@ -548,7 +537,7 @@ mod tests {
     use super::*;
     use crate::{col, lit};
 
-    use datafusion_common::{DFSchema, ScalarValue};
+    use datafusion_common::{internal_err, DFSchema, ScalarValue};
 
     macro_rules! test_is_expr_nullable {
         ($EXPR_TYPE:ident) => {{
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 4ef346656ff40..e95fcdd128ede 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -38,9 +38,8 @@ use crate::logical_plan::{
 };
 use crate::type_coercion::binary::{comparison_coercion, values_coercion};
 use crate::utils::{
-    can_hash, columnize_expr, compare_sort_expr, expand_qualified_wildcard,
-    expand_wildcard, expr_to_columns, find_valid_equijoin_key_pair,
-    group_window_expr_by_sort_keys,
+    can_hash, columnize_expr, compare_sort_expr, expr_to_columns,
+    find_valid_equijoin_key_pair, group_window_expr_by_sort_keys,
 };
 use crate::{
     and, binary_expr, logical_plan::tree_node::unwrap_arc, DmlStatement, Expr,
@@ -1316,7 +1315,7 @@ fn add_group_by_exprs_from_dependencies(
     Ok(group_expr)
 }
 /// Errors if one or more expressions have equal names.
-pub(crate) fn validate_unique_names<'a>(
+pub fn validate_unique_names<'a>(
     node_name: &str,
     expressions: impl IntoIterator<Item = &'a Expr>,
 ) -> Result<()> {
@@ -1356,6 +1355,7 @@ pub fn project_with_column_index(
                 ref name,
             }) if name != schema.field(i).name() => e.alias(schema.field(i).name()),
             Expr::Alias { .. } | Expr::Column { .. } => e,
+            Expr::Wildcard { .. } => e,
             _ => e.alias(schema.field(i).name()),
         })
         .collect::<Vec<_>>();
@@ -1440,22 +1440,11 @@ pub fn project(
     plan: LogicalPlan,
     expr: impl IntoIterator<Item = impl Into<Expr>>,
 ) -> Result<LogicalPlan> {
-    // TODO: move it into analyzer
-    let input_schema = plan.schema();
     let mut projected_expr = vec![];
     for e in expr {
         let e = e.into();
         match e {
-            Expr::Wildcard { qualifier: None } => {
-                projected_expr.extend(expand_wildcard(input_schema, &plan, None)?)
-            }
-            Expr::Wildcard {
-                qualifier: Some(qualifier),
-            } => projected_expr.extend(expand_qualified_wildcard(
-                &qualifier,
-                input_schema,
-                None,
-            )?),
+            Expr::Wildcard { .. } => projected_expr.push(e),
             _ => projected_expr.push(columnize_expr(normalize_col(e, &plan)?, &plan)?),
         }
     }
@@ -1807,26 +1796,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn plan_using_join_wildcard_projection() -> Result<()> {
-        let t2 = table_scan(Some("t2"), &employee_schema(), None)?.build()?;
-
-        let plan = table_scan(Some("t1"), &employee_schema(), None)?
-            .join_using(t2, JoinType::Inner, vec!["id"])?
-            .project(vec![Expr::Wildcard { qualifier: None }])?
-            .build()?;
-
-        // id column should only show up once in projection
-        let expected = "Projection: t1.id, t1.first_name, t1.last_name, t1.state, t1.salary, t2.first_name, t2.last_name, t2.state, t2.salary\
-        \n  Inner Join: Using t1.id = t2.id\
-        \n    TableScan: t1\
-        \n    TableScan: t2";
-
-        assert_eq!(expected, format!("{plan}"));
-
-        Ok(())
-    }
-
     #[test]
     fn plan_builder_union() -> Result<()> {
         let plan =
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index c5538d8880a7f..2bab6d516a73e 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -31,8 +31,9 @@ use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor};
 use crate::logical_plan::extension::UserDefinedLogicalNode;
 use crate::logical_plan::{DmlStatement, Statement};
 use crate::utils::{
-    enumerate_grouping_sets, exprlist_to_fields, find_out_reference_exprs,
-    grouping_set_expr_count, grouping_set_to_exprlist, split_conjunction,
+    enumerate_grouping_sets, exprlist_len, exprlist_to_fields, find_base_plan,
+    find_out_reference_exprs, grouping_set_expr_count, grouping_set_to_exprlist,
+    split_conjunction,
 };
 use crate::{
     build_join_schema, expr_vec_fmt, BinaryExpr, BuiltInWindowFunction,
@@ -1977,7 +1978,9 @@ impl Projection {
         input: Arc<LogicalPlan>,
         schema: DFSchemaRef,
     ) -> Result<Self> {
-        if expr.len() != schema.fields().len() {
+        if !expr.iter().any(|e| matches!(e, Expr::Wildcard { .. }))
+            && expr.len() != schema.fields().len()
+        {
             return plan_err!("Projection has mismatch between number of expressions ({}) and number of fields in schema ({})", expr.len(), schema.fields().len());
         }
         Ok(Self {
@@ -2763,20 +2766,48 @@ fn calc_func_dependencies_for_project(
     // Calculate expression indices (if present) in the input schema.
     let proj_indices = exprs
         .iter()
-        .filter_map(|expr| {
-            let expr_name = match expr {
-                Expr::Alias(alias) => {
-                    format!("{}", alias.expr)
-                }
-                _ => format!("{}", expr),
-            };
-            input_fields.iter().position(|item| *item == expr_name)
+        .map(|expr| match expr {
+            Expr::Wildcard { qualifier, options } => {
+                let wildcard_fields = exprlist_to_fields(
+                    vec![&Expr::Wildcard {
+                        qualifier: qualifier.clone(),
+                        options: options.clone(),
+                    }],
+                    input,
+                )?;
+                Ok::<_, DataFusionError>(
+                    wildcard_fields
+                        .into_iter()
+                        .filter_map(|(qualifier, f)| {
+                            let flat_name = qualifier
+                                .map(|t| format!("{}.{}", t, f.name()))
+                                .unwrap_or(f.name().clone());
+                            input_fields.iter().position(|item| *item == flat_name)
+                        })
+                        .collect::<Vec<_>>(),
+                )
+            }
+            Expr::Alias(alias) => Ok(input_fields
+                .iter()
+                .position(|item| *item == format!("{}", alias.expr))
+                .map(|i| vec![i])
+                .unwrap_or(vec![])),
+            _ => Ok(input_fields
+                .iter()
+                .position(|item| *item == format!("{}", expr))
+                .map(|i| vec![i])
+                .unwrap_or(vec![])),
         })
+        .collect::<Result<Vec<_>>>()?
+        .into_iter()
+        .flatten()
         .collect::<Vec<_>>();
+
+    let len = exprlist_len(exprs, input.schema(), Some(find_base_plan(input).schema()))?;
     Ok(input
         .schema()
         .functional_dependencies()
-        .project_functional_dependencies(&proj_indices, exprs.len()))
+        .project_functional_dependencies(&proj_indices, len))
 }
 
 /// Sorts its input according to a list of sort expressions.
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 7b650d1ab448b..4db5061e8fe7d 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -21,7 +21,7 @@ use std::cmp::Ordering;
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
-use crate::expr::{Alias, Sort, WindowFunction};
+use crate::expr::{Alias, Sort, WildcardOptions, WindowFunction};
 use crate::expr_rewriter::strip_outer_reference;
 use crate::{
     and, BinaryExpr, Expr, ExprSchemable, Filter, GroupingSet, LogicalPlan, Operator,
@@ -34,11 +34,11 @@ use datafusion_common::tree_node::{
 };
 use datafusion_common::utils::get_at_indices;
 use datafusion_common::{
-    internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef, Result,
-    TableReference,
+    internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef,
+    DataFusionError, Result, TableReference,
 };
 
-use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem, WildcardAdditionalOptions};
+use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem};
 
 pub use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity;
 
@@ -377,7 +377,7 @@ fn get_exprs_except_skipped(
 pub fn expand_wildcard(
     schema: &DFSchema,
     plan: &LogicalPlan,
-    wildcard_options: Option<&WildcardAdditionalOptions>,
+    wildcard_options: Option<&WildcardOptions>,
 ) -> Result<Vec<Expr>> {
     let using_columns = plan.using_columns()?;
     let mut columns_to_skip = using_columns
@@ -401,9 +401,9 @@ pub fn expand_wildcard(
                 .collect::<Vec<_>>()
         })
         .collect::<HashSet<_>>();
-    let excluded_columns = if let Some(WildcardAdditionalOptions {
-        opt_exclude,
-        opt_except,
+    let excluded_columns = if let Some(WildcardOptions {
+        exclude: opt_exclude,
+        except: opt_except,
         ..
     }) = wildcard_options
     {
@@ -420,7 +420,7 @@ pub fn expand_wildcard(
 pub fn expand_qualified_wildcard(
     qualifier: &TableReference,
     schema: &DFSchema,
-    wildcard_options: Option<&WildcardAdditionalOptions>,
+    wildcard_options: Option<&WildcardOptions>,
 ) -> Result<Vec<Expr>> {
     let qualified_indices = schema.fields_indices_with_qualified(qualifier);
     let projected_func_dependencies = schema
@@ -435,9 +435,9 @@ pub fn expand_qualified_wildcard(
     let qualified_dfschema =
         DFSchema::try_from_qualified_schema(qualifier.clone(), &qualified_schema)?
             .with_functional_dependencies(projected_func_dependencies)?;
-    let excluded_columns = if let Some(WildcardAdditionalOptions {
-        opt_exclude,
-        opt_except,
+    let excluded_columns = if let Some(WildcardOptions {
+        exclude: opt_exclude,
+        except: opt_except,
         ..
     }) = wildcard_options
     {
@@ -731,11 +731,129 @@ pub fn exprlist_to_fields<'a>(
     plan: &LogicalPlan,
 ) -> Result<Vec<(Option<TableReference>, Arc<Field>)>> {
     // look for exact match in plan's output schema
-    let input_schema = &plan.schema();
-    exprs
+    let wildcard_schema = find_base_plan(plan).schema();
+    let input_schema = plan.schema();
+    let result = exprs
         .into_iter()
-        .map(|e| e.to_field(input_schema))
-        .collect()
+        .map(|e| match e {
+            Expr::Wildcard { qualifier, options } => match qualifier {
+                None => {
+                    let excluded: Vec<String> = get_excluded_columns(
+                        options.exclude.as_ref(),
+                        options.except.as_ref(),
+                        wildcard_schema,
+                        None,
+                    )?
+                    .into_iter()
+                    .map(|c| c.flat_name())
+                    .collect();
+                    Ok::<_, DataFusionError>(
+                        wildcard_schema
+                            .field_names()
+                            .iter()
+                            .enumerate()
+                            .filter(|(_, s)| !excluded.contains(s))
+                            .map(|(i, _)| wildcard_schema.qualified_field(i))
+                            .map(|(qualifier, f)| {
+                                (qualifier.cloned(), Arc::new(f.to_owned()))
+                            })
+                            .collect::<Vec<_>>(),
+                    )
+                }
+                Some(qualifier) => {
+                    let excluded: Vec<String> = get_excluded_columns(
+                        options.exclude.as_ref(),
+                        options.except.as_ref(),
+                        wildcard_schema,
+                        Some(qualifier),
+                    )?
+                    .into_iter()
+                    .map(|c| c.flat_name())
+                    .collect();
+                    Ok(wildcard_schema
+                        .fields_with_qualified(qualifier)
+                        .into_iter()
+                        .filter_map(|field| {
+                            let flat_name = format!("{}.{}", qualifier, field.name());
+                            if excluded.contains(&flat_name) {
+                                None
+                            } else {
+                                Some((
+                                    Some(qualifier.clone()),
+                                    Arc::new(field.to_owned()),
+                                ))
+                            }
+                        })
+                        .collect::<Vec<_>>())
+                }
+            },
+            _ => Ok(vec![e.to_field(input_schema)?]),
+        })
+        .collect::<Result<Vec<_>>>()?
+        .into_iter()
+        .flatten()
+        .collect();
+    Ok(result)
+}
+
+/// Find the suitable base plan to expand the wildcard expression recursively.
+/// When planning [LogicalPlan::Window] and [LogicalPlan::Aggregate], we will generate
+/// an intermediate plan based on the relation plan (e.g. [LogicalPlan::TableScan], [LogicalPlan::Subquery], ...).
+/// If we expand a wildcard expression basing the intermediate plan, we could get some duplicate fields.
+pub fn find_base_plan(input: &LogicalPlan) -> &LogicalPlan {
+    match input {
+        LogicalPlan::Window(window) => find_base_plan(&window.input),
+        LogicalPlan::Aggregate(agg) => find_base_plan(&agg.input),
+        _ => input,
+    }
+}
+
+/// Count the number of real fields. We should expand the wildcard expression to get the actual number.
+pub fn exprlist_len(
+    exprs: &[Expr],
+    schema: &DFSchemaRef,
+    wildcard_schema: Option<&DFSchemaRef>,
+) -> Result<usize> {
+    exprs
+        .iter()
+        .map(|e| match e {
+            Expr::Wildcard {
+                qualifier: None,
+                options,
+            } => {
+                let excluded = get_excluded_columns(
+                    options.exclude.as_ref(),
+                    options.except.as_ref(),
+                    wildcard_schema.unwrap_or(schema),
+                    None,
+                )?
+                .into_iter()
+                .collect::<HashSet<Column>>();
+                Ok(
+                    get_exprs_except_skipped(wildcard_schema.unwrap_or(schema), excluded)
+                        .len(),
+                )
+            }
+            Expr::Wildcard {
+                qualifier: Some(qualifier),
+                options,
+            } => {
+                let excluded = get_excluded_columns(
+                    options.exclude.as_ref(),
+                    options.except.as_ref(),
+                    wildcard_schema.unwrap_or(schema),
+                    Some(qualifier),
+                )?
+                .into_iter()
+                .collect::<HashSet<Column>>();
+                Ok(
+                    get_exprs_except_skipped(wildcard_schema.unwrap_or(schema), excluded)
+                        .len(),
+                )
+            }
+            _ => Ok(1),
+        })
+        .sum()
 }
 
 /// Convert an expression into Column expression if it's already provided as input plan.
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index 8ff00917dcb13..593dab2bc9a21 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -48,7 +48,13 @@ impl AnalyzerRule for CountWildcardRule {
 }
 
 fn is_wildcard(expr: &Expr) -> bool {
-    matches!(expr, Expr::Wildcard { qualifier: None })
+    matches!(
+        expr,
+        Expr::Wildcard {
+            qualifier: None,
+            ..
+        }
+    )
 }
 
 fn is_count_star_aggregate(aggregate_function: &AggregateFunction) -> bool {
diff --git a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
new file mode 100644
index 0000000000000..53ba3042f522e
--- /dev/null
+++ b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
@@ -0,0 +1,304 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use crate::AnalyzerRule;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::tree_node::{Transformed, TransformedResult};
+use datafusion_common::{Column, Result};
+use datafusion_expr::builder::validate_unique_names;
+use datafusion_expr::expr::PlannedReplaceSelectItem;
+use datafusion_expr::utils::{
+    expand_qualified_wildcard, expand_wildcard, find_base_plan,
+};
+use datafusion_expr::{Expr, LogicalPlan, Projection, SubqueryAlias};
+
+#[derive(Default)]
+pub struct ExpandWildcardRule {}
+
+impl ExpandWildcardRule {
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl AnalyzerRule for ExpandWildcardRule {
+    fn analyze(&self, plan: LogicalPlan, _: &ConfigOptions) -> Result<LogicalPlan> {
+        // Because the wildcard expansion is based on the schema of the input plan,
+        // using `transform_up_with_subqueries` here.
+        plan.transform_up_with_subqueries(expand_internal).data()
+    }
+
+    fn name(&self) -> &str {
+        "expand_wildcard_rule"
+    }
+}
+
+fn expand_internal(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
+    match plan {
+        LogicalPlan::Projection(Projection { expr, input, .. }) => {
+            let projected_expr = expand_exprlist(&input, expr)?;
+            validate_unique_names("Projections", projected_expr.iter())?;
+            Ok(Transformed::yes(
+                Projection::try_new(projected_expr, Arc::clone(&input))
+                    .map(LogicalPlan::Projection)?,
+            ))
+        }
+        // Teh schema of the plan should also be updated if the child plan is transformed.
+        LogicalPlan::SubqueryAlias(SubqueryAlias { input, alias, .. }) => {
+            Ok(Transformed::yes(
+                SubqueryAlias::try_new(input, alias).map(LogicalPlan::SubqueryAlias)?,
+            ))
+        }
+        _ => Ok(Transformed::no(plan)),
+    }
+}
+
+fn expand_exprlist(input: &LogicalPlan, expr: Vec<Expr>) -> Result<Vec<Expr>> {
+    let mut projected_expr = vec![];
+    let input = find_base_plan(input);
+    for e in expr {
+        match e {
+            Expr::Wildcard { qualifier, options } => {
+                if let Some(qualifier) = qualifier {
+                    let expanded = expand_qualified_wildcard(
+                        &qualifier,
+                        input.schema(),
+                        Some(&options),
+                    )?;
+                    // If there is a REPLACE statement, replace that column with the given
+                    // replace expression. Column name remains the same.
+                    let replaced = if let Some(replace) = options.replace {
+                        replace_columns(expanded, replace)?
+                    } else {
+                        expanded
+                    };
+                    projected_expr.extend(replaced);
+                } else {
+                    let expanded =
+                        expand_wildcard(input.schema(), input, Some(&options))?;
+                    // If there is a REPLACE statement, replace that column with the given
+                    // replace expression. Column name remains the same.
+                    let replaced = if let Some(replace) = options.replace {
+                        replace_columns(expanded, replace)?
+                    } else {
+                        expanded
+                    };
+                    projected_expr.extend(replaced);
+                }
+            }
+            // A workaround to handle the case when the column name is "*".
+            // We transform the expression to a Expr::Column through [Column::from_name] in many places.
+            // It would also convert the wildcard expression to a column expression with name "*".
+            Expr::Column(Column {
+                ref relation,
+                ref name,
+            }) => {
+                if name.eq("*") {
+                    if let Some(qualifier) = relation {
+                        projected_expr.extend(expand_qualified_wildcard(
+                            qualifier,
+                            input.schema(),
+                            None,
+                        )?);
+                    } else {
+                        projected_expr.extend(expand_wildcard(
+                            input.schema(),
+                            input,
+                            None,
+                        )?);
+                    }
+                } else {
+                    projected_expr.push(e.clone());
+                }
+            }
+            _ => projected_expr.push(e),
+        }
+    }
+    Ok(projected_expr)
+}
+
+/// If there is a REPLACE statement in the projected expression in the form of
+/// "REPLACE (some_column_within_an_expr AS some_column)", this function replaces
+/// that column with the given replace expression. Column name remains the same.
+/// Multiple REPLACEs are also possible with comma separations.
+fn replace_columns(
+    mut exprs: Vec<Expr>,
+    replace: PlannedReplaceSelectItem,
+) -> Result<Vec<Expr>> {
+    for expr in exprs.iter_mut() {
+        if let Expr::Column(Column { name, .. }) = expr {
+            if let Some((_, new_expr)) = replace
+                .items()
+                .iter()
+                .zip(replace.expressions().iter())
+                .find(|(item, _)| item.column_name.value == *name)
+            {
+                *expr = new_expr.clone().alias(name.clone())
+            }
+        }
+    }
+    Ok(exprs)
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::datatypes::{DataType, Field, Schema};
+
+    use datafusion_common::{JoinType, TableReference};
+    use datafusion_expr::{
+        col, in_subquery, qualified_wildcard, table_scan, wildcard, LogicalPlanBuilder,
+    };
+
+    use crate::test::{assert_analyzed_plan_eq_display_indent, test_table_scan};
+    use crate::Analyzer;
+
+    use super::*;
+
+    fn assert_plan_eq(plan: LogicalPlan, expected: &str) -> Result<()> {
+        assert_analyzed_plan_eq_display_indent(
+            Arc::new(ExpandWildcardRule::new()),
+            plan,
+            expected,
+        )
+    }
+
+    #[test]
+    fn test_expand_wildcard() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .project(vec![wildcard()])?
+            .build()?;
+        let expected =
+            "Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\
+        \n  TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
+        assert_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_expand_qualified_wildcard() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .project(vec![qualified_wildcard(TableReference::bare("test"))])?
+            .build()?;
+        let expected =
+            "Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\
+        \n  TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
+        assert_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_expand_qualified_wildcard_in_subquery() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .project(vec![qualified_wildcard(TableReference::bare("test"))])?
+            .build()?;
+        let plan = LogicalPlanBuilder::from(plan)
+            .project(vec![wildcard()])?
+            .build()?;
+        let expected =
+            "Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\
+        \n  Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\
+        \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
+        assert_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_expand_wildcard_in_subquery() -> Result<()> {
+        let projection_a = LogicalPlanBuilder::from(test_table_scan()?)
+            .project(vec![col("a")])?
+            .build()?;
+        let subquery = LogicalPlanBuilder::from(projection_a)
+            .project(vec![wildcard()])?
+            .build()?;
+        let plan = LogicalPlanBuilder::from(test_table_scan()?)
+            .filter(in_subquery(col("a"), Arc::new(subquery)))?
+            .project(vec![wildcard()])?
+            .build()?;
+        let expected = "\
+        Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\
+        \n  Filter: test.a IN (<subquery>) [a:UInt32, b:UInt32, c:UInt32]\
+        \n    Subquery: [a:UInt32]\
+        \n      Projection: test.a [a:UInt32]\
+        \n        Projection: test.a [a:UInt32]\
+        \n          TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
+        \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
+        assert_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_subquery_schema() -> Result<()> {
+        let analyzer = Analyzer::with_rules(vec![Arc::new(ExpandWildcardRule::new())]);
+        let options = ConfigOptions::default();
+        let subquery = LogicalPlanBuilder::from(test_table_scan()?)
+            .project(vec![wildcard()])?
+            .build()?;
+        let plan = LogicalPlanBuilder::from(subquery)
+            .alias("sub")?
+            .project(vec![wildcard()])?
+            .build()?;
+        let analyzed_plan = analyzer.execute_and_check(plan, &options, |_, _| {})?;
+        for x in analyzed_plan.inputs() {
+            for field in x.schema().fields() {
+                assert_ne!(field.name(), "*");
+            }
+        }
+        Ok(())
+    }
+
+    fn employee_schema() -> Schema {
+        Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("first_name", DataType::Utf8, false),
+            Field::new("last_name", DataType::Utf8, false),
+            Field::new("state", DataType::Utf8, false),
+            Field::new("salary", DataType::Int32, false),
+        ])
+    }
+
+    #[test]
+    fn plan_using_join_wildcard_projection() -> Result<()> {
+        let t2 = table_scan(Some("t2"), &employee_schema(), None)?.build()?;
+
+        let plan = table_scan(Some("t1"), &employee_schema(), None)?
+            .join_using(t2, JoinType::Inner, vec!["id"])?
+            .project(vec![wildcard()])?
+            .build()?;
+
+        let expected = "Projection: *\
+        \n  Inner Join: Using t1.id = t2.id\
+        \n    TableScan: t1\
+        \n    TableScan: t2";
+
+        assert_eq!(expected, format!("{plan}"));
+
+        let analyzer = Analyzer::with_rules(vec![Arc::new(ExpandWildcardRule::new())]);
+        let options = ConfigOptions::default();
+
+        let analyzed_plan = analyzer.execute_and_check(plan, &options, |_, _| {})?;
+
+        // id column should only show up once in projection
+        let expected = "Projection: t1.id, t1.first_name, t1.last_name, t1.state, t1.salary, t2.first_name, t2.last_name, t2.state, t2.salary\
+        \n  Inner Join: Using t1.id = t2.id\
+        \n    TableScan: t1\
+        \n    TableScan: t2";
+        assert_eq!(expected, format!("{analyzed_plan}"));
+
+        Ok(())
+    }
+}
diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs
index 73ab37cb11d85..b69b8410da494 100644
--- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs
+++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs
@@ -23,6 +23,7 @@ use crate::analyzer::AnalyzerRule;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{Column, Result};
+use datafusion_expr::expr::WildcardOptions;
 use datafusion_expr::{logical_plan::LogicalPlan, Expr, LogicalPlanBuilder, TableScan};
 
 /// Analyzed rule that inlines TableScan that provide a [`LogicalPlan`]
@@ -93,7 +94,10 @@ fn generate_projection_expr(
             )));
         }
     } else {
-        exprs.push(Expr::Wildcard { qualifier: None });
+        exprs.push(Expr::Wildcard {
+            qualifier: None,
+            options: WildcardOptions::default(),
+        });
     }
     Ok(exprs)
 }
@@ -178,7 +182,7 @@ mod tests {
         let plan = scan.filter(col("x.a").eq(lit(1)))?.build()?;
         let expected = "Filter: x.a = Int32(1)\
         \n  SubqueryAlias: x\
-        \n    Projection: y.a, y.b\
+        \n    Projection: *\
         \n      TableScan: y";
 
         assert_analyzed_plan_eq(Arc::new(InlineTableScan::new()), plan, expected)
diff --git a/datafusion/optimizer/src/analyzer/mod.rs b/datafusion/optimizer/src/analyzer/mod.rs
index 91ee8a9e1033a..6e2afeca88c99 100644
--- a/datafusion/optimizer/src/analyzer/mod.rs
+++ b/datafusion/optimizer/src/analyzer/mod.rs
@@ -30,6 +30,7 @@ use datafusion_expr::expr_rewriter::FunctionRewrite;
 use datafusion_expr::{Expr, LogicalPlan};
 
 use crate::analyzer::count_wildcard_rule::CountWildcardRule;
+use crate::analyzer::expand_wildcard_rule::ExpandWildcardRule;
 use crate::analyzer::inline_table_scan::InlineTableScan;
 use crate::analyzer::subquery::check_subquery_expr;
 use crate::analyzer::type_coercion::TypeCoercion;
@@ -38,6 +39,7 @@ use crate::utils::log_plan;
 use self::function_rewrite::ApplyFunctionRewrites;
 
 pub mod count_wildcard_rule;
+pub mod expand_wildcard_rule;
 pub mod function_rewrite;
 pub mod inline_table_scan;
 pub mod subquery;
@@ -89,6 +91,9 @@ impl Analyzer {
     pub fn new() -> Self {
         let rules: Vec<Arc<dyn AnalyzerRule + Send + Sync>> = vec![
             Arc::new(InlineTableScan::new()),
+            // Every rule that will generate [Expr::Wildcard] should be placed in front of [ExpandWildcardRule].
+            Arc::new(ExpandWildcardRule::new()),
+            // [Expr::Wildcard] should be expanded before [TypeCoercion]
             Arc::new(TypeCoercion::new()),
             Arc::new(CountWildcardRule::new()),
         ];
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 2bb859d84ad79..7392028ba7aba 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -21,16 +21,20 @@ use std::sync::Arc;
 
 use arrow::datatypes::{DataType, IntervalUnit};
 
+use crate::analyzer::AnalyzerRule;
+use crate::utils::NamePreserver;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
 use datafusion_common::{
     exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema,
     DataFusionError, Result, ScalarValue,
 };
+use datafusion_expr::builder::project_with_column_index;
 use datafusion_expr::expr::{
     self, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, ScalarFunction,
     WindowFunction,
 };
+use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema;
 use datafusion_expr::expr_schema::cast_subquery;
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::Subquery;
@@ -47,13 +51,10 @@ use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_large_utf8};
 use datafusion_expr::utils::merge_schema;
 use datafusion_expr::{
     is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not,
-    AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, Operator, ScalarUDF,
-    WindowFrame, WindowFrameBound, WindowFrameUnits,
+    AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, Operator,
+    Projection, ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits,
 };
 
-use crate::analyzer::AnalyzerRule;
-use crate::utils::NamePreserver;
-
 #[derive(Default)]
 pub struct TypeCoercion {}
 
@@ -122,6 +123,7 @@ fn analyze_internal(
     })?
     // coerce join expressions specially
     .map_data(|plan| expr_rewrite.coerce_joins(plan))?
+    .map_data(|plan| expr_rewrite.coerce_union(plan))?
     // recompute the schema after the expressions have been rewritten as the types may have changed
     .map_data(|plan| plan.recompute_schema())
 }
@@ -168,6 +170,39 @@ impl<'a> TypeCoercionRewriter<'a> {
         Ok(LogicalPlan::Join(join))
     }
 
+    /// Corece the union inputs after expanding the wildcard expressions
+    ///
+    /// Union inputs must have the same schema, so we coerce the expressions to match the schema
+    /// after expanding the wildcard expressions
+    fn coerce_union(&self, plan: LogicalPlan) -> Result<LogicalPlan> {
+        let LogicalPlan::Union(union) = plan else {
+            return Ok(plan);
+        };
+
+        let inputs = union
+            .inputs
+            .into_iter()
+            .map(|p| {
+                let plan = coerce_plan_expr_for_schema(&p, &union.schema)?;
+                match plan {
+                    LogicalPlan::Projection(Projection { expr, input, .. }) => {
+                        Ok(Arc::new(project_with_column_index(
+                            expr,
+                            input,
+                            Arc::clone(&union.schema),
+                        )?))
+                    }
+                    other_plan => Ok(Arc::new(other_plan)),
+                }
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        Ok(LogicalPlan::Union(Union {
+            inputs,
+            schema: Arc::clone(&union.schema),
+        }))
+    }
+
     fn coerce_join_filter(&self, expr: Expr) -> Result<Expr> {
         let expr_type = expr.get_type(self.schema)?;
         match expr_type {
@@ -1286,7 +1321,6 @@ mod test {
         .eq(cast(lit("1998-03-18"), DataType::Date32));
         let empty = empty();
         let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
-        dbg!(&plan);
         let expected =
             "Projection: CAST(Utf8(\"1998-03-18\") AS Timestamp(Nanosecond, None)) = CAST(CAST(Utf8(\"1998-03-18\") AS Date32) AS Timestamp(Nanosecond, None))\n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?;
@@ -1473,7 +1507,6 @@ mod test {
         ));
         let empty = empty();
         let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
-        dbg!(&plan);
         let expected =
             "Projection: CAST(Utf8(\"1998-03-18\") AS Timestamp(Nanosecond, None)) - CAST(Utf8(\"1998-03-18\") AS Timestamp(Nanosecond, None))\n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?;
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs
index aaa5eec3955c7..93dd49b174928 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -335,6 +335,27 @@ fn test_propagate_empty_relation_inner_join_and_unions() {
     assert_eq!(expected, format!("{plan}"));
 }
 
+#[test]
+fn select_wildcard_with_repeated_column() {
+    let sql = "SELECT *, col_int32 FROM test";
+    let err = test_sql(sql).expect_err("query should have failed");
+    assert_eq!(
+        "expand_wildcard_rule\ncaused by\nError during planning: Projections require unique expression names but the expression \"test.col_int32\" at position 0 and \"test.col_int32\" at position 7 have the same name. Consider aliasing (\"AS\") one of them.",
+        err.strip_backtrace()
+    );
+}
+
+#[test]
+fn select_wildcard_with_repeated_column_but_is_aliased() {
+    let sql = "SELECT *, col_int32 as col_32 FROM test";
+
+    let plan = test_sql(sql).unwrap();
+    let expected = "Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64, test.col_ts_nano_none, test.col_ts_nano_utc, test.col_int32 AS col_32\
+    \n  TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]";
+
+    assert_eq!(expected, format!("{plan}"));
+}
+
 fn test_sql(sql: &str) -> Result<LogicalPlan> {
     // parse the SQL
     let dialect = GenericDialect {}; // or AnsiDialect, or your own dialect ...
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 6c4c07428bd3b..6cbea5f0cfcce 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -22,8 +22,8 @@ use datafusion_common::{
     exec_datafusion_err, internal_err, plan_datafusion_err, Result, ScalarValue,
     TableReference, UnnestOptions,
 };
-use datafusion_expr::expr::Unnest;
 use datafusion_expr::expr::{Alias, Placeholder};
+use datafusion_expr::expr::{Unnest, WildcardOptions};
 use datafusion_expr::ExprFunctionExt;
 use datafusion_expr::{
     expr::{self, InList, Sort, WindowFunction},
@@ -556,7 +556,10 @@ pub fn parse_expr(
         ))),
         ExprType::Wildcard(protobuf::Wildcard { qualifier }) => {
             let qualifier = qualifier.to_owned().map(|x| x.try_into()).transpose()?;
-            Ok(Expr::Wildcard { qualifier })
+            Ok(Expr::Wildcard {
+                qualifier,
+                options: WildcardOptions::default(),
+            })
         }
         ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode {
             fun_name,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index ab81ce8af9cb9..c7361c89c328c 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -582,7 +582,7 @@ pub fn serialize_expr(
                 expr_type: Some(ExprType::InList(expr)),
             }
         }
-        Expr::Wildcard { qualifier } => protobuf::LogicalExprNode {
+        Expr::Wildcard { qualifier, .. } => protobuf::LogicalExprNode {
             expr_type: Some(ExprType::Wildcard(protobuf::Wildcard {
                 qualifier: qualifier.to_owned().map(|x| x.into()),
             })),
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index a18fa03b2d151..eb7cc5c4b9c5f 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -58,7 +58,7 @@ use datafusion_common::{
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
     self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, ScalarFunction,
-    Sort, Unnest,
+    Sort, Unnest, WildcardOptions,
 };
 use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore};
 use datafusion_expr::{
@@ -1977,7 +1977,10 @@ fn roundtrip_unnest() {
 
 #[test]
 fn roundtrip_wildcard() {
-    let test_expr = Expr::Wildcard { qualifier: None };
+    let test_expr = Expr::Wildcard {
+        qualifier: None,
+        options: WildcardOptions::default(),
+    };
 
     let ctx = SessionContext::new();
     roundtrip_expr_test(test_expr, ctx);
@@ -1987,6 +1990,7 @@ fn roundtrip_wildcard() {
 fn roundtrip_qualified_wildcard() {
     let test_expr = Expr::Wildcard {
         qualifier: Some("foo".into()),
+        options: WildcardOptions::default(),
     };
 
     let ctx = SessionContext::new();
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index d16d08b041ae0..b95414a8cafd6 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -22,6 +22,7 @@ use datafusion_common::{
     internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema,
     Dependency, Result,
 };
+use datafusion_expr::expr::WildcardOptions;
 use datafusion_expr::planner::PlannerResult;
 use datafusion_expr::{
     expr, Expr, ExprFunctionExt, ExprSchemable, WindowFrame, WindowFunctionDefinition,
@@ -420,13 +421,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 name: _,
                 arg: FunctionArgExpr::Wildcard,
                 operator: _,
-            } => Ok(Expr::Wildcard { qualifier: None }),
+            } => Ok(Expr::Wildcard {
+                qualifier: None,
+                options: WildcardOptions::default(),
+            }),
             FunctionArg::Unnamed(FunctionArgExpr::Expr(arg)) => {
                 self.sql_expr_to_logical_expr(arg, schema, planner_context)
             }
-            FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => {
-                Ok(Expr::Wildcard { qualifier: None })
-            }
+            FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => Ok(Expr::Wildcard {
+                qualifier: None,
+                options: WildcardOptions::default(),
+            }),
             _ => not_impl_err!("Unsupported qualified wildcard argument: {sql:?}"),
         }
     }
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index f2b4e0b4e43d5..7c94e5ead5c35 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -27,10 +27,10 @@ use sqlparser::ast::{
 
 use datafusion_common::{
     internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
-    ScalarValue,
+    ScalarValue, TableReference,
 };
-use datafusion_expr::expr::InList;
 use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::expr::{InList, WildcardOptions};
 use datafusion_expr::{
     lit, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal,
     Operator, TryCast,
@@ -661,6 +661,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 }
                 not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}")
             }
+            SQLExpr::Wildcard => Ok(Expr::Wildcard {
+                qualifier: None,
+                options: WildcardOptions::default(),
+            }),
+            SQLExpr::QualifiedWildcard(object_name) => Ok(Expr::Wildcard {
+                qualifier: Some(TableReference::from(object_name.to_string())),
+                options: WildcardOptions::default(),
+            }),
             SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values),
             _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
         }
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 95a44dace31a8..339234d9965ca 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -27,23 +27,23 @@ use crate::utils::{
 };
 
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
+use datafusion_common::UnnestOptions;
 use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
-use datafusion_common::{Column, UnnestOptions};
-use datafusion_expr::expr::Alias;
+use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions};
 use datafusion_expr::expr_rewriter::{
     normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_cols,
 };
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::utils::{
-    expand_qualified_wildcard, expand_wildcard, expr_as_column_expr, expr_to_columns,
-    find_aggregate_exprs, find_window_exprs,
+    expr_as_column_expr, expr_to_columns, find_aggregate_exprs, find_window_exprs,
 };
 use datafusion_expr::{
-    Aggregate, Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning,
+    qualified_wildcard_with_options, wildcard_with_options, Aggregate, Expr, Filter,
+    GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning,
 };
 use sqlparser::ast::{
     Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, OrderByExpr,
-    ReplaceSelectItem, WildcardAdditionalOptions, WindowType,
+    WildcardAdditionalOptions, WindowType,
 };
 use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins};
 
@@ -82,7 +82,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // handle named windows before processing the projection expression
         check_conflicting_windows(&select.named_window)?;
         match_window_definitions(&mut select.projection, &select.named_window)?;
-        // process the SELECT expressions, with wildcards expanded.
+        // process the SELECT expressions
         let select_exprs = self.prepare_select_exprs(
             &base_plan,
             select.projection,
@@ -515,8 +515,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     }
 
     /// Returns the `Expr`'s corresponding to a SQL query's SELECT expressions.
-    ///
-    /// Wildcards are expanded into the concrete list of columns.
     fn prepare_select_exprs(
         &self,
         plan: &LogicalPlan,
@@ -570,49 +568,30 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
             SelectItem::Wildcard(options) => {
                 Self::check_wildcard_options(&options)?;
-
                 if empty_from {
                     return plan_err!("SELECT * with no tables specified is not valid");
                 }
-                // do not expand from outer schema
-                let expanded_exprs =
-                    expand_wildcard(plan.schema().as_ref(), plan, Some(&options))?;
-                // If there is a REPLACE statement, replace that column with the given
-                // replace expression. Column name remains the same.
-                if let Some(replace) = options.opt_replace {
-                    self.replace_columns(
-                        plan,
-                        empty_from,
-                        planner_context,
-                        expanded_exprs,
-                        replace,
-                    )
-                } else {
-                    Ok(expanded_exprs)
-                }
+                let planned_options = self.plan_wildcard_options(
+                    plan,
+                    empty_from,
+                    planner_context,
+                    options,
+                )?;
+                Ok(vec![wildcard_with_options(planned_options)])
             }
             SelectItem::QualifiedWildcard(object_name, options) => {
                 Self::check_wildcard_options(&options)?;
                 let qualifier = idents_to_table_reference(object_name.0, false)?;
-                // do not expand from outer schema
-                let expanded_exprs = expand_qualified_wildcard(
-                    &qualifier,
-                    plan.schema().as_ref(),
-                    Some(&options),
+                let planned_options = self.plan_wildcard_options(
+                    plan,
+                    empty_from,
+                    planner_context,
+                    options,
                 )?;
-                // If there is a REPLACE statement, replace that column with the given
-                // replace expression. Column name remains the same.
-                if let Some(replace) = options.opt_replace {
-                    self.replace_columns(
-                        plan,
-                        empty_from,
-                        planner_context,
-                        expanded_exprs,
-                        replace,
-                    )
-                } else {
-                    Ok(expanded_exprs)
-                }
+                Ok(vec![qualified_wildcard_with_options(
+                    qualifier,
+                    planned_options,
+                )])
             }
         }
     }
@@ -637,40 +616,44 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     }
 
     /// If there is a REPLACE statement in the projected expression in the form of
-    /// "REPLACE (some_column_within_an_expr AS some_column)", this function replaces
-    /// that column with the given replace expression. Column name remains the same.
-    /// Multiple REPLACEs are also possible with comma separations.
-    fn replace_columns(
+    /// "REPLACE (some_column_within_an_expr AS some_column)", we should plan the
+    /// replace expressions first.
+    fn plan_wildcard_options(
         &self,
         plan: &LogicalPlan,
         empty_from: bool,
         planner_context: &mut PlannerContext,
-        mut exprs: Vec<Expr>,
-        replace: ReplaceSelectItem,
-    ) -> Result<Vec<Expr>> {
-        for expr in exprs.iter_mut() {
-            if let Expr::Column(Column { name, .. }) = expr {
-                if let Some(item) = replace
-                    .items
-                    .iter()
-                    .find(|item| item.column_name.value == *name)
-                {
-                    let new_expr = self.sql_select_to_rex(
+        options: WildcardAdditionalOptions,
+    ) -> Result<WildcardOptions> {
+        let planned_option = WildcardOptions {
+            ilike: options.opt_ilike,
+            exclude: options.opt_exclude,
+            except: options.opt_except,
+            replace: None,
+            rename: options.opt_rename,
+        };
+        if let Some(replace) = options.opt_replace {
+            let replace_expr = replace
+                .items
+                .iter()
+                .map(|item| {
+                    Ok(self.sql_select_to_rex(
                         SelectItem::UnnamedExpr(item.expr.clone()),
                         plan,
                         empty_from,
                         planner_context,
                     )?[0]
-                        .clone();
-                    *expr = Expr::Alias(Alias {
-                        expr: Box::new(new_expr),
-                        relation: None,
-                        name: name.clone(),
-                    });
-                }
-            }
+                        .clone())
+                })
+                .collect::<Result<Vec<_>>>()?;
+            let planned_replace = PlannedReplaceSelectItem {
+                items: replace.items.into_iter().map(|i| *i).collect(),
+                planned_expressions: replace_expr,
+            };
+            Ok(planned_option.with_replace(planned_replace))
+        } else {
+            Ok(planned_option)
         }
-        Ok(exprs)
     }
 
     /// Wrap a plan in a projection
@@ -715,7 +698,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let plan = LogicalPlanBuilder::from(input.clone())
             .aggregate(group_by_exprs.to_vec(), aggr_exprs.to_vec())?
             .build()?;
-
         let group_by_exprs = if let LogicalPlan::Aggregate(agg) = &plan {
             &agg.group_expr
         } else {
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index de130754ab1aa..39511ea4d03ac 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -21,11 +21,13 @@ use datafusion_expr::ScalarUDF;
 use sqlparser::ast::Value::SingleQuotedString;
 use sqlparser::ast::{
     self, BinaryOperator, Expr as AstExpr, Function, FunctionArg, Ident, Interval,
-    TimezoneInfo, UnaryOperator,
+    ObjectName, TimezoneInfo, UnaryOperator,
 };
 use std::sync::Arc;
 use std::{fmt::Display, vec};
 
+use super::dialect::{DateFieldExtractStyle, IntervalStyle};
+use super::Unparser;
 use arrow::datatypes::{Decimal128Type, Decimal256Type, DecimalType};
 use arrow::util::display::array_value_to_string;
 use arrow_array::types::{
@@ -44,9 +46,6 @@ use datafusion_expr::{
     Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast,
 };
 
-use super::dialect::{DateFieldExtractStyle, IntervalStyle};
-use super::Unparser;
-
 /// DataFusion's Exprs can represent either an `Expr` or an `OrderByExpr`
 pub enum Unparsed {
     // SQL Expression
@@ -159,7 +158,13 @@ impl Unparser<'_> {
                 let args = args
                     .iter()
                     .map(|e| {
-                        if matches!(e, Expr::Wildcard { qualifier: None }) {
+                        if matches!(
+                            e,
+                            Expr::Wildcard {
+                                qualifier: None,
+                                ..
+                            }
+                        ) {
                             Ok(FunctionArg::Unnamed(ast::FunctionArgExpr::Wildcard))
                         } else {
                             self.expr_to_sql_inner(e).map(|e| {
@@ -477,8 +482,15 @@ impl Unparser<'_> {
                     format: None,
                 })
             }
-            Expr::Wildcard { qualifier: _ } => {
-                not_impl_err!("Unsupported Expr conversion: {expr:?}")
+            // TODO: unparsing wildcard addition options
+            Expr::Wildcard { qualifier, .. } => {
+                if let Some(qualifier) = qualifier {
+                    let idents: Vec<Ident> =
+                        qualifier.to_vec().into_iter().map(Ident::new).collect();
+                    Ok(ast::Expr::QualifiedWildcard(ObjectName(idents)))
+                } else {
+                    Ok(ast::Expr::Wildcard)
+                }
             }
             Expr::GroupingSet(grouping_set) => match grouping_set {
                 GroupingSet::GroupingSets(grouping_sets) => {
@@ -643,7 +655,13 @@ impl Unparser<'_> {
     fn function_args_to_sql(&self, args: &[Expr]) -> Result<Vec<ast::FunctionArg>> {
         args.iter()
             .map(|e| {
-                if matches!(e, Expr::Wildcard { qualifier: None }) {
+                if matches!(
+                    e,
+                    Expr::Wildcard {
+                        qualifier: None,
+                        ..
+                    }
+                ) {
                     Ok(ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Wildcard))
                 } else {
                     self.expr_to_sql(e)
@@ -1503,6 +1521,7 @@ mod tests {
     use arrow_schema::DataType::Int8;
     use ast::ObjectName;
     use datafusion_common::TableReference;
+    use datafusion_expr::expr::WildcardOptions;
     use datafusion_expr::{
         case, col, cube, exists, grouping_set, interval_datetime_lit,
         interval_year_month_lit, lit, not, not_exists, out_ref_col, placeholder, rollup,
@@ -1558,7 +1577,10 @@ mod tests {
     fn expr_to_sql_ok() -> Result<()> {
         let dummy_schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
         let dummy_logical_plan = table_scan(Some("t"), &dummy_schema, None)?
-            .project(vec![Expr::Wildcard { qualifier: None }])?
+            .project(vec![Expr::Wildcard {
+                qualifier: None,
+                options: WildcardOptions::default(),
+            }])?
             .filter(col("a").eq(lit(1)))?
             .build()?;
 
@@ -1749,7 +1771,10 @@ mod tests {
             (sum(col("a")), r#"sum(a)"#),
             (
                 count_udaf()
-                    .call(vec![Expr::Wildcard { qualifier: None }])
+                    .call(vec![Expr::Wildcard {
+                        qualifier: None,
+                        options: WildcardOptions::default(),
+                    }])
                     .distinct()
                     .build()
                     .unwrap(),
@@ -1757,7 +1782,10 @@ mod tests {
             ),
             (
                 count_udaf()
-                    .call(vec![Expr::Wildcard { qualifier: None }])
+                    .call(vec![Expr::Wildcard {
+                        qualifier: None,
+                        options: WildcardOptions::default(),
+                    }])
                     .filter(lit(true))
                     .build()
                     .unwrap(),
@@ -1833,11 +1861,11 @@ mod tests {
             (Expr::Negative(Box::new(col("a"))), r#"-a"#),
             (
                 exists(Arc::new(dummy_logical_plan.clone())),
-                r#"EXISTS (SELECT t.a FROM t WHERE (t.a = 1))"#,
+                r#"EXISTS (SELECT * FROM t WHERE (t.a = 1))"#,
             ),
             (
                 not_exists(Arc::new(dummy_logical_plan.clone())),
-                r#"NOT EXISTS (SELECT t.a FROM t WHERE (t.a = 1))"#,
+                r#"NOT EXISTS (SELECT * FROM t WHERE (t.a = 1))"#,
             ),
             (
                 try_cast(col("a"), DataType::Date64),
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 5a0317c47c85a..15efe2d2f03ce 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -58,7 +58,7 @@ mod common;
 fn test_schema_support() {
     quick_test(
         "SELECT * FROM s1.test",
-        "Projection: s1.test.t_date32, s1.test.t_date64\
+        "Projection: *\
              \n  TableScan: s1.test",
     );
 }
@@ -517,7 +517,7 @@ fn plan_copy_to_query() {
     let plan = r#"
 CopyTo: format=csv output_url=output.csv options: ()
   Limit: skip=0, fetch=10
-    Projection: test_decimal.id, test_decimal.price
+    Projection: *
       TableScan: test_decimal
     "#
     .trim();
@@ -637,23 +637,13 @@ fn select_repeated_column() {
     );
 }
 
-#[test]
-fn select_wildcard_with_repeated_column() {
-    let sql = "SELECT *, age FROM person";
-    let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"person.age\" at position 3 and \"person.age\" at position 8 have the same name. Consider aliasing (\"AS\") one of them.",
-        err.strip_backtrace()
-    );
-}
-
 #[test]
 fn select_wildcard_with_repeated_column_but_is_aliased() {
     quick_test(
-            "SELECT *, first_name AS fn from person",
-            "Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀, person.first_name AS fn\
+        "SELECT *, first_name AS fn from person",
+        "Projection: *, person.first_name AS fn\
             \n  TableScan: person",
-        );
+    );
 }
 
 #[test]
@@ -870,7 +860,7 @@ fn where_selection_with_ambiguous_column() {
 #[test]
 fn natural_join() {
     let sql = "SELECT * FROM lineitem a NATURAL JOIN lineitem b";
-    let expected = "Projection: a.l_item_id, a.l_description, a.price\
+    let expected = "Projection: *\
                         \n  Inner Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price\
                         \n    SubqueryAlias: a\
                         \n      TableScan: lineitem\
@@ -906,7 +896,7 @@ fn natural_right_join() {
 #[test]
 fn natural_join_no_common_becomes_cross_join() {
     let sql = "SELECT * FROM person a NATURAL JOIN lineitem b";
-    let expected = "Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀, b.l_item_id, b.l_description, b.price\
+    let expected = "Projection: *\
                         \n  CrossJoin:\
                         \n    SubqueryAlias: a\
                         \n      TableScan: person\
@@ -918,8 +908,7 @@ fn natural_join_no_common_becomes_cross_join() {
 #[test]
 fn using_join_multiple_keys() {
     let sql = "SELECT * FROM person a join person b using (id, age)";
-    let expected = "Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀, \
-        b.first_name, b.last_name, b.state, b.salary, b.birth_date, b.😀\
+    let expected = "Projection: *\
                         \n  Inner Join: Using a.id = b.id, a.age = b.age\
                         \n    SubqueryAlias: a\
                         \n      TableScan: person\
@@ -933,8 +922,7 @@ fn using_join_multiple_keys_subquery() {
     let sql =
         "SELECT age FROM (SELECT * FROM person a join person b using (id, age, state))";
     let expected = "Projection: a.age\
-                        \n  Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀, \
-        b.first_name, b.last_name, b.salary, b.birth_date, b.😀\
+                        \n  Projection: *\
                         \n    Inner Join: Using a.id = b.id, a.age = b.age, a.state = b.state\
                         \n      SubqueryAlias: a\
                         \n        TableScan: person\
@@ -946,8 +934,7 @@ fn using_join_multiple_keys_subquery() {
 #[test]
 fn using_join_multiple_keys_qualified_wildcard_select() {
     let sql = "SELECT a.* FROM person a join person b using (id, age)";
-    let expected =
-        "Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀\
+    let expected = "Projection: a.*\
                         \n  Inner Join: Using a.id = b.id, a.age = b.age\
                         \n    SubqueryAlias: a\
                         \n      TableScan: person\
@@ -959,8 +946,7 @@ fn using_join_multiple_keys_qualified_wildcard_select() {
 #[test]
 fn using_join_multiple_keys_select_all_columns() {
     let sql = "SELECT a.*, b.* FROM person a join person b using (id, age)";
-    let expected = "Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀, \
-        b.id, b.first_name, b.last_name, b.age, b.state, b.salary, b.birth_date, b.😀\
+    let expected = "Projection: a.*, b.*\
                         \n  Inner Join: Using a.id = b.id, a.age = b.age\
                         \n    SubqueryAlias: a\
                         \n      TableScan: person\
@@ -972,9 +958,7 @@ fn using_join_multiple_keys_select_all_columns() {
 #[test]
 fn using_join_multiple_keys_multiple_joins() {
     let sql = "SELECT * FROM person a join person b using (id, age, state) join person c using (id, age, state)";
-    let expected = "Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀, \
-        b.first_name, b.last_name, b.salary, b.birth_date, b.😀, \
-        c.first_name, c.last_name, c.salary, c.birth_date, c.😀\
+    let expected = "Projection: *\
                         \n  Inner Join: Using a.id = c.id, a.age = c.age, a.state = c.state\
                         \n    Inner Join: Using a.id = b.id, a.age = b.age, a.state = b.state\
                         \n      SubqueryAlias: a\
@@ -1305,13 +1289,13 @@ fn select_binary_expr_nested() {
 fn select_wildcard_with_groupby() {
     quick_test(
             r#"SELECT * FROM person GROUP BY id, first_name, last_name, age, state, salary, birth_date, "😀""#,
-            "Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀\
+            "Projection: *\
              \n  Aggregate: groupBy=[[person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀]], aggr=[[]]\
              \n    TableScan: person",
         );
     quick_test(
             "SELECT * FROM (SELECT first_name, last_name FROM person) AS a GROUP BY first_name, last_name",
-            "Projection: a.first_name, a.last_name\
+            "Projection: *\
             \n  Aggregate: groupBy=[[a.first_name, a.last_name]], aggr=[[]]\
             \n    SubqueryAlias: a\
             \n      Projection: person.first_name, person.last_name\
@@ -1474,7 +1458,7 @@ fn recursive_ctes() {
         select * from numbers;";
     quick_test(
         sql,
-        "Projection: numbers.n\
+        "Projection: *\
     \n  SubqueryAlias: numbers\
     \n    RecursiveQuery: is_distinct=false\
     \n      Projection: Int64(1) AS n\
@@ -1687,10 +1671,10 @@ fn select_aggregate_with_non_column_inner_expression_with_groupby() {
 #[test]
 fn test_wildcard() {
     quick_test(
-            "SELECT * from person",
-            "Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀\
+        "SELECT * from person",
+        "Projection: *\
             \n  TableScan: person",
-        );
+    );
 }
 
 #[test]
@@ -2118,7 +2102,7 @@ fn project_wildcard_on_join_with_using() {
             FROM lineitem \
             JOIN lineitem as lineitem2 \
             USING (l_item_id)";
-    let expected = "Projection: lineitem.l_item_id, lineitem.l_description, lineitem.price, lineitem2.l_description, lineitem2.price\
+    let expected = "Projection: *\
         \n  Inner Join: Using lineitem.l_item_id = lineitem2.l_item_id\
         \n    TableScan: lineitem\
         \n    SubqueryAlias: lineitem2\
@@ -3005,7 +2989,7 @@ fn exists_subquery_wildcard() {
     let expected = "Projection: p.id\
         \n  Filter: EXISTS (<subquery>)\
         \n    Subquery:\
-        \n      Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀\
+        \n      Projection: *\
         \n        Filter: person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)\
         \n          TableScan: person\
         \n    SubqueryAlias: p\
@@ -3092,13 +3076,13 @@ fn subquery_references_cte() {
         cte AS (SELECT * FROM person) \
         SELECT * FROM person WHERE EXISTS (SELECT * FROM cte WHERE id = person.id)";
 
-    let expected = "Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀\
+    let expected = "Projection: *\
         \n  Filter: EXISTS (<subquery>)\
         \n    Subquery:\
-        \n      Projection: cte.id, cte.first_name, cte.last_name, cte.age, cte.state, cte.salary, cte.birth_date, cte.😀\
+        \n      Projection: *\
         \n        Filter: cte.id = outer_ref(person.id)\
         \n          SubqueryAlias: cte\
-        \n            Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀\
+        \n            Projection: *\
         \n              TableScan: person\
         \n    TableScan: person";
 
@@ -3113,7 +3097,7 @@ fn cte_with_no_column_names() {
         ) \
         SELECT * FROM numbers;";
 
-    let expected = "Projection: numbers.a, numbers.b, numbers.c\
+    let expected = "Projection: *\
         \n  SubqueryAlias: numbers\
         \n    Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c\
         \n      EmptyRelation";
@@ -3129,7 +3113,7 @@ fn cte_with_column_names() {
         ) \
         SELECT * FROM numbers;";
 
-    let expected = "Projection: numbers.a, numbers.b, numbers.c\
+    let expected = "Projection: *\
         \n  SubqueryAlias: numbers\
         \n    Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c\
         \n      Projection: Int64(1), Int64(2), Int64(3)\
@@ -3147,7 +3131,7 @@ fn cte_with_column_aliases_precedence() {
         ) \
         SELECT * FROM numbers;";
 
-    let expected = "Projection: numbers.a, numbers.b, numbers.c\
+    let expected = "Projection: *\
         \n  SubqueryAlias: numbers\
         \n    Projection: x AS a, y AS b, z AS c\
         \n      Projection: Int64(1) AS x, Int64(2) AS y, Int64(3) AS z\
@@ -3528,7 +3512,7 @@ fn test_select_all_inner_join() {
             INNER JOIN orders \
             ON orders.customer_id * 2 = person.id + 10";
 
-    let expected = "Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀, orders.order_id, orders.customer_id, orders.o_item_id, orders.qty, orders.price, orders.delivered\
+    let expected = "Projection: *\
             \n  Inner Join:  Filter: orders.customer_id * Int64(2) = person.id + Int64(10)\
             \n    TableScan: person\
             \n    TableScan: orders";
@@ -4245,7 +4229,7 @@ fn test_prepare_statement_to_plan_value_list() {
     let sql = "PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter);";
 
     let expected_plan = "Prepare: \"my_plan\" [Utf8, Utf8] \
-        \n  Projection: t.num, t.letter\
+        \n  Projection: *\
         \n    SubqueryAlias: t\
         \n      Projection: column1 AS num, column2 AS letter\
         \n        Values: (Int64(1), $1), (Int64(2), $2)";
@@ -4260,7 +4244,7 @@ fn test_prepare_statement_to_plan_value_list() {
         ScalarValue::from("a".to_string()),
         ScalarValue::from("b".to_string()),
     ];
-    let expected_plan = "Projection: t.num, t.letter\
+    let expected_plan = "Projection: *\
         \n  SubqueryAlias: t\
         \n    Projection: column1 AS num, column2 AS letter\
         \n      Values: (Int64(1), Utf8(\"a\")), (Int64(2), Utf8(\"b\"))";
@@ -4310,7 +4294,7 @@ fn test_table_alias() {
           (select age from person) t2 \
         ) as f";
 
-    let expected = "Projection: f.id, f.age\
+    let expected = "Projection: *\
         \n  SubqueryAlias: f\
         \n    CrossJoin:\
         \n      SubqueryAlias: t1\
@@ -4327,7 +4311,7 @@ fn test_table_alias() {
           (select age from person) t2 \
         ) as f (c1, c2)";
 
-    let expected = "Projection: f.c1, f.c2\
+    let expected = "Projection: *\
         \n  SubqueryAlias: f\
         \n    Projection: t1.id AS c1, t2.age AS c2\
         \n      CrossJoin:\
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index eae4f428b4b42..1e8850efadff9 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -177,6 +177,7 @@ initial_logical_plan
 01)Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
 02)--TableScan: simple_explain_test
 logical_plan after inline_table_scan SAME TEXT AS ABOVE
+logical_plan after expand_wildcard_rule SAME TEXT AS ABOVE
 logical_plan after type_coercion SAME TEXT AS ABOVE
 logical_plan after count_wildcard_rule SAME TEXT AS ABOVE
 analyzed_logical_plan SAME TEXT AS ABOVE
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index f217cbab074ff..49a18ca09de44 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1195,12 +1195,12 @@ LIMIT 5
 200 2000
 
 # Trying to exclude non-existing column should give error
-statement error DataFusion error: Schema error: No field named e. Valid fields are table1.a, table1.b, table1.c, table1.d.
+statement error
 SELECT * EXCLUDE e
 FROM table1
 
 # similarly, except should raise error if excluded column is not in the table
-statement error DataFusion error: Schema error: No field named e. Valid fields are table1.a, table1.b, table1.c, table1.d.
+statement error
 SELECT * EXCEPT(e)
 FROM table1
 
@@ -1214,7 +1214,7 @@ FROM table1
 2 20 20 200 2000
 
 # EXCEPT, or EXCLUDE shouldn't contain duplicate column names
-statement error DataFusion error: Error during planning: EXCLUDE or EXCEPT contains duplicate column names
+statement error
 SELECT * EXCLUDE(a, a)
 FROM table1
 
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 476ebe7ebebe1..ffbf54c4d93aa 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -637,8 +637,43 @@ SELECT t1.v2, t1.v0 FROM t2 NATURAL JOIN t1
 SELECT t1.v2, t1.v0 FROM t2 NATURAL JOIN t1 WHERE (t1.v2 IS NULL);
 ----
 
+statement ok
+CREATE TABLE t3 (
+  id INT
+) as VALUES
+  (1),
+  (2),
+  (3)
+;
+
+statement ok
+CREATE TABLE t4 (
+  id TEXT
+) as VALUES
+  ('4'),
+  ('5'),
+  ('6')
+;
+
+# test type coersion for wildcard expansion
+query T rowsort
+(SELECT * FROM t3 ) UNION ALL (SELECT * FROM t4)
+----
+1
+2
+3
+4
+5
+6
+
 statement ok
 DROP TABLE t1;
 
 statement ok
 DROP TABLE t2;
+
+statement ok
+DROP TABLE t3;
+
+statement ok
+DROP TABLE t4;
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index dfc8826676174..ddf6a7aabffc3 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -3929,7 +3929,8 @@ b 1 3
 a 1 4
 b 5 5
 
-statement error DataFusion error: Error during planning: Projection references non-aggregate values: Expression aggregate_test_100.c1 could not be resolved from available columns: rn
+# Schema error: No field named aggregate_test_100.c1. Valid fields are rn.
+statement error
 SELECT *
 FROM (SELECT c1, c2, ROW_NUMBER() OVER(PARTITION BY c1) as rn
     FROM aggregate_test_100

From 69c99a7e78b0d9e2ac3881200a58f184c0023d15 Mon Sep 17 00:00:00 2001
From: kf zheng <100595273+Kev1n8@users.noreply.github.com>
Date: Wed, 14 Aug 2024 03:33:40 +0800
Subject: [PATCH 290/357] Add native stringview support for RIGHT (#11955)

* add stringview support for RIGHT

* add tests of stringview support for RIGHT

* combine functions by ArrayAccessor and ArrayIter

* fix fmt

* fix clippy

* fix fmt
---
 datafusion/functions/src/unicode/right.rs     | 48 +++++++++++++++----
 .../sqllogictest/test_files/string_view.slt   | 16 ++++++-
 2 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs
index 20cbbe020ff13..9d542bb2c0065 100644
--- a/datafusion/functions/src/unicode/right.rs
+++ b/datafusion/functions/src/unicode/right.rs
@@ -19,17 +19,21 @@ use std::any::Any;
 use std::cmp::{max, Ordering};
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{
+    Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
+    OffsetSizeTrait,
+};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_common::cast::{
+    as_generic_string_array, as_int64_array, as_string_view_array,
+};
 use datafusion_common::exec_err;
 use datafusion_common::Result;
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
 #[derive(Debug)]
 pub struct RightFunc {
     signature: Signature,
@@ -46,7 +50,11 @@ impl RightFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
+                vec![
+                    Exact(vec![Utf8View, Int64]),
+                    Exact(vec![Utf8, Int64]),
+                    Exact(vec![LargeUtf8, Int64]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -72,9 +80,14 @@ impl ScalarUDFImpl for RightFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(right::<i32>, vec![])(args),
+            DataType::Utf8 | DataType::Utf8View => {
+                make_scalar_function(right::<i32>, vec![])(args)
+            }
             DataType::LargeUtf8 => make_scalar_function(right::<i64>, vec![])(args),
-            other => exec_err!("Unsupported data type {other:?} for function right"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function right,\
+            expected Utf8View, Utf8 or LargeUtf8."
+            ),
         }
     }
 }
@@ -83,11 +96,26 @@ impl ScalarUDFImpl for RightFunc {
 /// right('abcde', 2) = 'de'
 /// The implementation uses UTF-8 code points as characters
 pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
     let n_array = as_int64_array(&args[1])?;
+    if args[0].data_type() == &DataType::Utf8View {
+        // string_view_right(args)
+        let string_array = as_string_view_array(&args[0])?;
+        right_impl::<T, _>(&mut string_array.iter(), n_array)
+    } else {
+        // string_right::<T>(args)
+        let string_array = &as_generic_string_array::<T>(&args[0])?;
+        right_impl::<T, _>(&mut string_array.iter(), n_array)
+    }
+}
 
-    let result = string_array
-        .iter()
+// Currently the return type can only be Utf8 or LargeUtf8, to reach fully support, we need
+// to edit the `get_optimal_return_type` in utils.rs to make the udfs be able to return Utf8View
+// See https://github.com/apache/datafusion/issues/11790#issuecomment-2283777166
+fn right_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
+    string_array_iter: &mut ArrayIter<V>,
+    n_array: &Int64Array,
+) -> Result<ArrayRef> {
+    let result = string_array_iter
         .zip(n_array.iter())
         .map(|(string, n)| match (string, n) {
             (Some(string), Some(n)) => match n.cmp(&0) {
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 2381bd122bdda..0a9b73babb96d 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -896,16 +896,28 @@ logical_plan
 
 
 ## Ensure no casts for RIGHT
-## TODO file ticket
 query TT
 EXPLAIN SELECT
   RIGHT(column1_utf8view, 3) as c2
 FROM test;
 ----
 logical_plan
-01)Projection: right(CAST(test.column1_utf8view AS Utf8), Int64(3)) AS c2
+01)Projection: right(test.column1_utf8view, Int64(3)) AS c2
 02)--TableScan: test projection=[column1_utf8view]
 
+# Test outputs of RIGHT
+query TTT
+SELECT
+  RIGHT(column1_utf8view, 3) as c1,
+  RIGHT(column1_utf8view, 0) as c2,
+  RIGHT(column1_utf8view, -3) as c3
+FROM test;
+----
+rew  (empty) rew
+eng  (empty) ngpeng
+ael  (empty) hael
+NULL NULL    NULL
+
 ## Ensure no casts for RPAD
 ## TODO file ticket
 query TT

From 482ef4551a4828825da8deb29d222fa82e1cfaa9 Mon Sep 17 00:00:00 2001
From: Leonardo Yvens <leoyvens@gmail.com>
Date: Wed, 14 Aug 2024 08:22:56 +0100
Subject: [PATCH 291/357] register get_field by default (#11959)

This makes sense in principle, as all other core
udfs are register by default in the context.

It also has a practical use, which is executing
logical plans that have field access already de-sugared
into get_field invocations.
---
 datafusion/functions/src/core/mod.rs          | 1 +
 datafusion/sqllogictest/test_files/struct.slt | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index 8c51213972843..062a4a104d54a 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -94,6 +94,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         nvl2(),
         arrow_typeof(),
         named_struct(),
+        get_field(),
         coalesce(),
     ]
 }
diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt
index 5c66bca1e0c2f..f3ac6549ad066 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -72,6 +72,14 @@ select struct(a, b, c)['c1'] from values;
 2.2
 3.3
 
+# explicit invocation of get_field
+query R
+select get_field(struct(a, b, c), 'c1') from values;
+----
+1.1
+2.2
+3.3
+
 # struct scalar function #1
 query ?
 select struct(1, 3.14, 'e');

From 3c477bf31cd14d71d88433dbe1afb10c42f5bf71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Berkay=20=C5=9Eahin?=
 <124376117+berkaysynnada@users.noreply.github.com>
Date: Wed, 14 Aug 2024 13:54:56 +0300
Subject: [PATCH 292/357] Refactor `CoalesceBatches` to use an explicit state
 machine (#11966)

* Update coalesce_batches.rs

* Update coalesce_batches.rs

* Update coalesce_batches.rs

* Update coalesce_batches.rs

* Update datafusion/physical-plan/src/coalesce_batches.rs

Co-authored-by: Yongting You <2010youy01@gmail.com>

* Update coalesce_batches.rs

* Apply suggestions from code review

---------

Co-authored-by: Yongting You <2010youy01@gmail.com>
Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
---
 .../physical-plan/src/coalesce_batches.rs     | 272 ++++++++++--------
 1 file changed, 160 insertions(+), 112 deletions(-)

diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 13c10c535c086..5589027694fe4 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -20,24 +20,24 @@
 use std::any::Any;
 use std::pin::Pin;
 use std::sync::Arc;
-use std::task::{ready, Context, Poll};
+use std::task::{Context, Poll};
+
+use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
+use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
+use crate::{
+    DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
+};
 
 use arrow::array::{AsArray, StringViewBuilder};
 use arrow::compute::concat_batches;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use arrow_array::{Array, ArrayRef};
-use futures::stream::{Stream, StreamExt};
-
 use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 
-use crate::{
-    DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
-};
-
-use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
-use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
+use futures::ready;
+use futures::stream::{Stream, StreamExt};
 
 /// `CoalesceBatchesExec` combines small batches into larger batches for more
 /// efficient use of vectorized processing by later operators.
@@ -202,8 +202,9 @@ impl ExecutionPlan for CoalesceBatchesExec {
                 self.target_batch_size,
                 self.fetch,
             ),
-            is_closed: false,
             baseline_metrics: BaselineMetrics::new(&self.metrics, partition),
+            // Start by pulling data
+            inner_state: CoalesceBatchesStreamState::Pull,
         }))
     }
 
@@ -236,10 +237,11 @@ struct CoalesceBatchesStream {
     input: SendableRecordBatchStream,
     /// Buffer for combining batches
     coalescer: BatchCoalescer,
-    /// Whether the stream has finished returning all of its data or not
-    is_closed: bool,
     /// Execution metrics
     baseline_metrics: BaselineMetrics,
+    /// The current inner state of the stream. This state dictates the current
+    /// action or operation to be performed in the streaming process.
+    inner_state: CoalesceBatchesStreamState,
 }
 
 impl Stream for CoalesceBatchesStream {
@@ -259,45 +261,98 @@ impl Stream for CoalesceBatchesStream {
     }
 }
 
+/// Enumeration of possible states for `CoalesceBatchesStream`.
+/// It represents different stages in the lifecycle of a stream of record batches.
+///
+/// An example of state transition:
+/// Notation:
+/// `[3000]`: A batch with size 3000
+/// `{[2000], [3000]}`: `CoalesceBatchStream`'s internal buffer with 2 batches buffered
+/// Input of `CoalesceBatchStream` will generate three batches `[2000], [3000], [4000]`
+/// The coalescing procedure will go through the following steps with 4096 coalescing threshold:
+/// 1. Read the first batch and get it buffered.
+/// - initial state: `Pull`
+/// - initial buffer: `{}`
+/// - updated buffer: `{[2000]}`
+/// - next state: `Pull`
+/// 2. Read the second batch, the coalescing target is reached since 2000 + 3000 > 4096
+/// - initial state: `Pull`
+/// - initial buffer: `{[2000]}`
+/// - updated buffer: `{[2000], [3000]}`
+/// - next state: `ReturnBuffer`
+/// 4. Two batches in the batch get merged and consumed by the upstream operator.
+/// - initial state: `ReturnBuffer`
+/// - initial buffer: `{[2000], [3000]}`
+/// - updated buffer: `{}`
+/// - next state: `Pull`
+/// 5. Read the third input batch.
+/// - initial state: `Pull`
+/// - initial buffer: `{}`
+/// - updated buffer: `{[4000]}`
+/// - next state: `Pull`
+/// 5. The input is ended now. Jump to exhaustion state preparing the finalized data.
+/// - initial state: `Pull`
+/// - initial buffer: `{[4000]}`
+/// - updated buffer: `{[4000]}`
+/// - next state: `Exhausted`
+#[derive(Debug, Clone, Eq, PartialEq)]
+enum CoalesceBatchesStreamState {
+    /// State to pull a new batch from the input stream.
+    Pull,
+    /// State to return a buffered batch.
+    ReturnBuffer,
+    /// State indicating that the stream is exhausted.
+    Exhausted,
+}
+
 impl CoalesceBatchesStream {
     fn poll_next_inner(
         self: &mut Pin<&mut Self>,
         cx: &mut Context<'_>,
     ) -> Poll<Option<Result<RecordBatch>>> {
-        // Get a clone (uses same underlying atomic) as self gets borrowed below
         let cloned_time = self.baseline_metrics.elapsed_compute().clone();
-
-        if self.is_closed {
-            return Poll::Ready(None);
-        }
         loop {
-            let input_batch = self.input.poll_next_unpin(cx);
-            // records time on drop
-            let _timer = cloned_time.timer();
-            match ready!(input_batch) {
-                Some(result) => {
-                    let Ok(input_batch) = result else {
-                        return Poll::Ready(Some(result)); // pass back error
-                    };
-                    // Buffer the batch and either get more input if not enough
-                    // rows yet or output
-                    match self.coalescer.push_batch(input_batch) {
-                        Ok(None) => continue,
-                        res => {
-                            if self.coalescer.limit_reached() {
-                                self.is_closed = true;
+            match &self.inner_state {
+                CoalesceBatchesStreamState::Pull => {
+                    // Attempt to pull the next batch from the input stream.
+                    let input_batch = ready!(self.input.poll_next_unpin(cx));
+                    // Start timing the operation. The timer records time upon being dropped.
+                    let _timer = cloned_time.timer();
+
+                    match input_batch {
+                        Some(Ok(batch)) => match self.coalescer.push_batch(batch) {
+                            CoalescerState::Continue => {}
+                            CoalescerState::LimitReached => {
+                                self.inner_state = CoalesceBatchesStreamState::Exhausted;
                             }
-                            return Poll::Ready(res.transpose());
+                            CoalescerState::TargetReached => {
+                                self.inner_state =
+                                    CoalesceBatchesStreamState::ReturnBuffer;
+                            }
+                        },
+                        None => {
+                            // End of input stream, but buffered batches might still be present.
+                            self.inner_state = CoalesceBatchesStreamState::Exhausted;
                         }
+                        other => return Poll::Ready(other),
                     }
                 }
-                None => {
-                    self.is_closed = true;
-                    // we have reached the end of the input stream but there could still
-                    // be buffered batches
-                    return match self.coalescer.finish() {
-                        Ok(None) => Poll::Ready(None),
-                        res => Poll::Ready(res.transpose()),
+                CoalesceBatchesStreamState::ReturnBuffer => {
+                    // Combine buffered batches into one batch and return it.
+                    let batch = self.coalescer.finish_batch()?;
+                    // Set to pull state for the next iteration.
+                    self.inner_state = CoalesceBatchesStreamState::Pull;
+                    return Poll::Ready(Some(Ok(batch)));
+                }
+                CoalesceBatchesStreamState::Exhausted => {
+                    // Handle the end of the input stream.
+                    return if self.coalescer.buffer.is_empty() {
+                        // If buffer is empty, return None indicating the stream is fully consumed.
+                        Poll::Ready(None)
+                    } else {
+                        // If the buffer still contains batches, prepare to return them.
+                        let batch = self.coalescer.finish_batch()?;
+                        Poll::Ready(Some(Ok(batch)))
                     };
                 }
             }
@@ -364,90 +419,72 @@ impl BatchCoalescer {
         Arc::clone(&self.schema)
     }
 
-    /// Add a batch, returning a batch if the target batch size or limit is reached
-    fn push_batch(&mut self, batch: RecordBatch) -> Result<Option<RecordBatch>> {
-        // discard empty batches
-        if batch.num_rows() == 0 {
-            return Ok(None);
-        }
-
-        // past limit
-        if self.limit_reached() {
-            return Ok(None);
-        }
-
+    /// Given a batch, it updates the buffer of [`BatchCoalescer`]. It returns
+    /// a variant of [`CoalescerState`] indicating the final state of the buffer.
+    fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState {
         let batch = gc_string_view_batch(&batch);
+        if self.limit_reached(&batch) {
+            CoalescerState::LimitReached
+        } else if self.target_reached(batch) {
+            CoalescerState::TargetReached
+        } else {
+            CoalescerState::Continue
+        }
+    }
 
-        // Handle fetch limit:
-        if let Some(fetch) = self.fetch {
-            if self.total_rows + batch.num_rows() >= fetch {
-                // We have reached the fetch limit.
+    /// The function checks if the buffer can reach the specified limit after getting `batch`.
+    /// If it does, it slices the received batch as needed, updates the buffer with it, and
+    /// finally returns `true`. Otherwise; the function does nothing and returns `false`.
+    fn limit_reached(&mut self, batch: &RecordBatch) -> bool {
+        match self.fetch {
+            Some(fetch) if self.total_rows + batch.num_rows() >= fetch => {
+                // Limit is reached
                 let remaining_rows = fetch - self.total_rows;
                 debug_assert!(remaining_rows > 0);
-                self.total_rows = fetch;
-                // Trim the batch and add to buffered batches:
+
                 let batch = batch.slice(0, remaining_rows);
                 self.buffered_rows += batch.num_rows();
+                self.total_rows = fetch;
                 self.buffer.push(batch);
-                // Combine buffered batches:
-                let batch = concat_batches(&self.schema, &self.buffer)?;
-                // Reset the buffer state and return final batch:
-                self.buffer.clear();
-                self.buffered_rows = 0;
-                return Ok(Some(batch));
+                true
             }
+            _ => false,
         }
-        self.total_rows += batch.num_rows();
-
-        // batch itself is already big enough and we have no buffered rows so
-        // return it directly
-        if batch.num_rows() >= self.target_batch_size && self.buffer.is_empty() {
-            return Ok(Some(batch));
-        }
-        // add to the buffered batches
-        self.buffered_rows += batch.num_rows();
-        self.buffer.push(batch);
-        // check to see if we have enough batches yet
-        let batch = if self.buffered_rows >= self.target_batch_size {
-            // combine the batches and return
-            let batch = concat_batches(&self.schema, &self.buffer)?;
-            // reset buffer state
-            self.buffer.clear();
-            self.buffered_rows = 0;
-            // return batch
-            Some(batch)
-        } else {
-            None
-        };
-        Ok(batch)
     }
 
-    /// Finish the coalescing process, returning all buffered data as a final,
-    /// single batch, if any
-    fn finish(&mut self) -> Result<Option<RecordBatch>> {
-        if self.buffer.is_empty() {
-            Ok(None)
+    /// Updates the buffer with the given batch. If the target batch size is reached,
+    /// the function returns `true`. Otherwise, it returns `false`.
+    fn target_reached(&mut self, batch: RecordBatch) -> bool {
+        if batch.num_rows() == 0 {
+            false
         } else {
-            // combine the batches and return
-            let batch = concat_batches(&self.schema, &self.buffer)?;
-            // reset buffer state
-            self.buffer.clear();
-            self.buffered_rows = 0;
-            // return batch
-            Ok(Some(batch))
+            self.total_rows += batch.num_rows();
+            self.buffered_rows += batch.num_rows();
+            self.buffer.push(batch);
+            self.buffered_rows >= self.target_batch_size
         }
     }
 
-    /// returns true if there is a limit and it has been reached
-    pub fn limit_reached(&self) -> bool {
-        if let Some(fetch) = self.fetch {
-            self.total_rows >= fetch
-        } else {
-            false
-        }
+    /// Concatenates and returns all buffered batches, and clears the buffer.
+    fn finish_batch(&mut self) -> Result<RecordBatch> {
+        let batch = concat_batches(&self.schema, &self.buffer)?;
+        self.buffer.clear();
+        self.buffered_rows = 0;
+        Ok(batch)
     }
 }
 
+/// This enumeration acts as a status indicator for the [`BatchCoalescer`] after a
+/// [`BatchCoalescer::push_batch()`] operation.
+enum CoalescerState {
+    /// Neither the limit nor the target batch size is reached.
+    Continue,
+    /// The sufficient row count to produce a complete query result is reached.
+    LimitReached,
+    /// The specified minimum number of rows a batch should have is reached.
+    TargetReached,
+}
+
 /// Heuristically compact `StringViewArray`s to reduce memory usage, if needed
 ///
 /// This function decides when to consolidate the StringView into a new buffer
@@ -521,11 +558,13 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
 
 #[cfg(test)]
 mod tests {
+    use std::ops::Range;
+
     use super::*;
+
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow_array::builder::ArrayBuilder;
     use arrow_array::{StringViewArray, UInt32Array};
-    use std::ops::Range;
 
     #[test]
     fn test_coalesce() {
@@ -670,16 +709,25 @@ mod tests {
             // create a single large input batch for output comparison
             let single_input_batch = concat_batches(&schema, &input_batches).unwrap();
 
-            let mut coalescer = BatchCoalescer::new(schema, target_batch_size, fetch);
+            let mut coalescer =
+                BatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch);
 
             let mut output_batches = vec![];
             for batch in input_batches {
-                if let Some(batch) = coalescer.push_batch(batch).unwrap() {
-                    output_batches.push(batch);
+                match coalescer.push_batch(batch) {
+                    CoalescerState::Continue => {}
+                    CoalescerState::LimitReached => {
+                        output_batches.push(coalescer.finish_batch().unwrap());
+                        break;
+                    }
+                    CoalescerState::TargetReached => {
+                        coalescer.buffered_rows = 0;
+                        output_batches.push(coalescer.finish_batch().unwrap());
+                    }
                 }
             }
-            if let Some(batch) = coalescer.finish().unwrap() {
-                output_batches.push(batch);
+            if coalescer.buffered_rows != 0 {
+                output_batches.extend(coalescer.buffer);
             }
 
             // make sure we got the expected number of output batches and content

From 10fe3c165862e1157224ef2fafd6917782fb255f Mon Sep 17 00:00:00 2001
From: Chojan Shang <psiace@apache.org>
Date: Wed, 14 Aug 2024 04:16:57 -0700
Subject: [PATCH 293/357] Implement native support StringView for find in set
 (#11970)

* Implement native support StringView for find in set

Signed-off-by: Chojan Shang <psiace@apache.org>

* Add more tests

Signed-off-by: Chojan Shang <psiace@apache.org>

* Minor update

---------

Signed-off-by: Chojan Shang <psiace@apache.org>
---
 .../functions/src/unicode/find_in_set.rs      | 67 ++++++++++++-------
 .../sqllogictest/test_files/functions.slt     | 19 +++++-
 .../sqllogictest/test_files/string_view.slt   | 14 ++--
 3 files changed, 69 insertions(+), 31 deletions(-)

diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs
index 7c864bc191d71..41a2b9d9e72de 100644
--- a/datafusion/functions/src/unicode/find_in_set.rs
+++ b/datafusion/functions/src/unicode/find_in_set.rs
@@ -19,11 +19,11 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    ArrayRef, ArrowPrimitiveType, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
+    ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait,
+    PrimitiveArray,
 };
 use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
 
-use datafusion_common::cast::as_generic_string_array;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
@@ -46,7 +46,11 @@ impl FindInSetFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Utf8]), Exact(vec![LargeUtf8, LargeUtf8])],
+                vec![
+                    Exact(vec![Utf8View, Utf8View]),
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![LargeUtf8, LargeUtf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -71,41 +75,52 @@ impl ScalarUDFImpl for FindInSetFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(find_in_set::<Int32Type>, vec![])(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(find_in_set::<Int64Type>, vec![])(args)
-            }
-            other => {
-                exec_err!("Unsupported data type {other:?} for function find_in_set")
-            }
-        }
+        make_scalar_function(find_in_set, vec![])(args)
     }
 }
 
 ///Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings
 ///A string list is a string composed of substrings separated by , characters.
-pub fn find_in_set<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: OffsetSizeTrait,
-{
+fn find_in_set(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() != 2 {
         return exec_err!(
             "find_in_set was called with {} arguments. It requires 2.",
             args.len()
         );
     }
+    match args[0].data_type() {
+        DataType::Utf8 => {
+            let string_array = args[0].as_string::<i32>();
+            let str_list_array = args[1].as_string::<i32>();
+            find_in_set_general::<Int32Type, _>(string_array, str_list_array)
+        }
+        DataType::LargeUtf8 => {
+            let string_array = args[0].as_string::<i64>();
+            let str_list_array = args[1].as_string::<i64>();
+            find_in_set_general::<Int64Type, _>(string_array, str_list_array)
+        }
+        DataType::Utf8View => {
+            let string_array = args[0].as_string_view();
+            let str_list_array = args[1].as_string_view();
+            find_in_set_general::<Int32Type, _>(string_array, str_list_array)
+        }
+        other => {
+            exec_err!("Unsupported data type {other:?} for function find_in_set")
+        }
+    }
+}
 
-    let str_array: &GenericStringArray<T::Native> =
-        as_generic_string_array::<T::Native>(&args[0])?;
-    let str_list_array: &GenericStringArray<T::Native> =
-        as_generic_string_array::<T::Native>(&args[1])?;
-
-    let result = str_array
-        .iter()
-        .zip(str_list_array.iter())
+pub fn find_in_set_general<'a, T: ArrowPrimitiveType, V: ArrayAccessor<Item = &'a str>>(
+    string_array: V,
+    str_list_array: V,
+) -> Result<ArrayRef>
+where
+    T::Native: OffsetSizeTrait,
+{
+    let string_iter = ArrayIter::new(string_array);
+    let str_list_iter = ArrayIter::new(str_list_array);
+    let result = string_iter
+        .zip(str_list_iter)
         .map(|(string, str_list)| match (string, str_list) {
             (Some(string), Some(str_list)) => {
                 let mut res = 0;
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index bea3016a21d32..f728942b38c32 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -1092,7 +1092,7 @@ docs.apache.com docs com
 community.influxdata.com community com
 arrow.apache.org arrow org
 
-
+# find_in_set tests
 query I
 SELECT find_in_set('b', 'a,b,c,d')
 ----
@@ -1136,6 +1136,23 @@ SELECT find_in_set(NULL, NULL)
 ----
 NULL
 
+# find_in_set tests with utf8view
+query I
+SELECT find_in_set(arrow_cast('b', 'Utf8View'), 'a,b,c,d')
+----
+2
+
+
+query I
+SELECT find_in_set('a', arrow_cast('a,b,c,d,a', 'Utf8View'))
+----
+1
+
+query I
+SELECT find_in_set(arrow_cast('', 'Utf8View'), arrow_cast('a,b,c,d,a', 'Utf8View'))
+----
+0
+
 # Verify that multiple calls to volatile functions like `random()` are not combined / optimized away
 query B
 SELECT r FROM (SELECT r1 == r2 r, r1, r2 FROM (SELECT random()+1 r1, random()+1 r2) WHERE r1 > 0 AND r2 > 0)
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 0a9b73babb96d..264f85ff84b92 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -992,18 +992,24 @@ logical_plan
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for FIND_IN_SET
-## TODO file ticket
 query TT
 EXPLAIN SELECT
   FIND_IN_SET(column1_utf8view, 'a,b,c,d') as c
 FROM test;
 ----
 logical_plan
-01)Projection: find_in_set(CAST(test.column1_utf8view AS Utf8), Utf8("a,b,c,d")) AS c
+01)Projection: find_in_set(test.column1_utf8view, Utf8View("a,b,c,d")) AS c
 02)--TableScan: test projection=[column1_utf8view]
 
-
-
+query I
+SELECT
+  FIND_IN_SET(column1_utf8view, 'a,b,c,d') as c
+FROM test;
+----
+0
+0
+0
+NULL
 
 statement ok
 drop table test;

From afa23abc46059e100061295b619b7b66fbc39625 Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Wed, 14 Aug 2024 19:18:06 +0800
Subject: [PATCH 294/357] fix: move coercion of union from builder to
 `TypeCoercion` (#11961)

* Improve type coercion for `UNION`

* fix clippy

* fix test

* fix sqllogictests

* fix EliminateNestedUnion tests

* Move tests to slt

* Move union_coercion to type_coercion.rs

* fix tests

* fix cargo doc

* Improve error msg

* As static member

* Avoid clone

* Fix clippy
---
 datafusion/expr/src/logical_plan/builder.rs   | 112 +----------
 .../optimizer/src/analyzer/type_coercion.rs   | 147 ++++++++++++---
 .../optimizer/src/eliminate_nested_union.rs   |  12 +-
 datafusion/sql/tests/sql_integration.rs       | 142 --------------
 .../sqllogictest/test_files/type_coercion.slt | 176 ++++++++++++++++++
 datafusion/sqllogictest/test_files/union.slt  |  11 ++
 6 files changed, 320 insertions(+), 280 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index e95fcdd128ede..2e53a682854ce 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -20,7 +20,6 @@
 use std::any::Any;
 use std::cmp::Ordering;
 use std::collections::{HashMap, HashSet};
-use std::iter::zip;
 use std::sync::Arc;
 
 use crate::dml::CopyTo;
@@ -36,7 +35,7 @@ use crate::logical_plan::{
     Projection, Repartition, Sort, SubqueryAlias, TableScan, Union, Unnest, Values,
     Window,
 };
-use crate::type_coercion::binary::{comparison_coercion, values_coercion};
+use crate::type_coercion::binary::values_coercion;
 use crate::utils::{
     can_hash, columnize_expr, compare_sort_expr, expr_to_columns,
     find_valid_equijoin_key_pair, group_window_expr_by_sort_keys,
@@ -1338,96 +1337,14 @@ pub fn validate_unique_names<'a>(
     })
 }
 
-pub fn project_with_column_index(
-    expr: Vec<Expr>,
-    input: Arc<LogicalPlan>,
-    schema: DFSchemaRef,
-) -> Result<LogicalPlan> {
-    let alias_expr = expr
-        .into_iter()
-        .enumerate()
-        .map(|(i, e)| match e {
-            Expr::Alias(Alias { ref name, .. }) if name != schema.field(i).name() => {
-                e.unalias().alias(schema.field(i).name())
-            }
-            Expr::Column(Column {
-                relation: _,
-                ref name,
-            }) if name != schema.field(i).name() => e.alias(schema.field(i).name()),
-            Expr::Alias { .. } | Expr::Column { .. } => e,
-            Expr::Wildcard { .. } => e,
-            _ => e.alias(schema.field(i).name()),
-        })
-        .collect::<Vec<_>>();
-
-    Projection::try_new_with_schema(alias_expr, input, schema)
-        .map(LogicalPlan::Projection)
-}
-
 /// Union two logical plans.
 pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result<LogicalPlan> {
-    let left_col_num = left_plan.schema().fields().len();
-
-    // check union plan length same.
-    let right_col_num = right_plan.schema().fields().len();
-    if right_col_num != left_col_num {
-        return plan_err!(
-            "Union queries must have the same number of columns, (left is {left_col_num}, right is {right_col_num})");
-    }
-
-    // create union schema
-    let union_qualified_fields =
-        zip(left_plan.schema().iter(), right_plan.schema().iter())
-            .map(
-                |((left_qualifier, left_field), (_right_qualifier, right_field))| {
-                    let nullable = left_field.is_nullable() || right_field.is_nullable();
-                    let data_type = comparison_coercion(
-                        left_field.data_type(),
-                        right_field.data_type(),
-                    )
-                    .ok_or_else(|| {
-                        plan_datafusion_err!(
-                "UNION Column {} (type: {}) is not compatible with column {} (type: {})",
-                right_field.name(),
-                right_field.data_type(),
-                left_field.name(),
-                left_field.data_type()
-                )
-                    })?;
-                    Ok((
-                        left_qualifier.cloned(),
-                        Arc::new(Field::new(left_field.name(), data_type, nullable)),
-                    ))
-                },
-            )
-            .collect::<Result<Vec<_>>>()?;
-    let union_schema =
-        DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())?;
-
-    let inputs = vec![left_plan, right_plan]
-        .into_iter()
-        .map(|p| {
-            let plan = coerce_plan_expr_for_schema(&p, &union_schema)?;
-            match plan {
-                LogicalPlan::Projection(Projection { expr, input, .. }) => {
-                    Ok(Arc::new(project_with_column_index(
-                        expr,
-                        input,
-                        Arc::new(union_schema.clone()),
-                    )?))
-                }
-                other_plan => Ok(Arc::new(other_plan)),
-            }
-        })
-        .collect::<Result<Vec<_>>>()?;
-
-    if inputs.is_empty() {
-        return plan_err!("Empty UNION");
-    }
-
+    // Temporarily use the schema from the left input and later rely on the analyzer to
+    // coerce the two schemas into a common one.
+    let schema = Arc::clone(left_plan.schema());
     Ok(LogicalPlan::Union(Union {
-        inputs,
-        schema: Arc::new(union_schema),
+        inputs: vec![Arc::new(left_plan), Arc::new(right_plan)],
+        schema,
     }))
 }
 
@@ -1850,23 +1767,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn plan_builder_union_different_num_columns_error() -> Result<()> {
-        let plan1 =
-            table_scan(TableReference::none(), &employee_schema(), Some(vec![3]))?;
-        let plan2 =
-            table_scan(TableReference::none(), &employee_schema(), Some(vec![3, 4]))?;
-
-        let expected = "Error during planning: Union queries must have the same number of columns, (left is 1, right is 2)";
-        let err_msg1 = plan1.clone().union(plan2.clone().build()?).unwrap_err();
-        let err_msg2 = plan1.union_distinct(plan2.build()?).unwrap_err();
-
-        assert_eq!(err_msg1.strip_backtrace(), expected);
-        assert_eq!(err_msg2.strip_backtrace(), expected);
-
-        Ok(())
-    }
-
     #[test]
     fn plan_builder_simple_distinct() -> Result<()> {
         let plan =
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 7392028ba7aba..40efbba6de7a5 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -17,22 +17,24 @@
 
 //! Optimizer rule for type validation and coercion
 
+use std::collections::HashMap;
 use std::sync::Arc;
 
-use arrow::datatypes::{DataType, IntervalUnit};
+use itertools::izip;
+
+use arrow::datatypes::{DataType, Field, IntervalUnit};
 
 use crate::analyzer::AnalyzerRule;
 use crate::utils::NamePreserver;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
 use datafusion_common::{
-    exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema,
-    DataFusionError, Result, ScalarValue,
+    exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, Column,
+    DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue,
 };
-use datafusion_expr::builder::project_with_column_index;
 use datafusion_expr::expr::{
-    self, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, ScalarFunction,
-    WindowFunction,
+    self, Alias, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like,
+    ScalarFunction, WindowFunction,
 };
 use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema;
 use datafusion_expr::expr_schema::cast_subquery;
@@ -51,7 +53,7 @@ use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_large_utf8};
 use datafusion_expr::utils::merge_schema;
 use datafusion_expr::{
     is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not,
-    AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, Operator,
+    AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, Join, LogicalPlan, Operator,
     Projection, ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits,
 };
 
@@ -121,9 +123,8 @@ fn analyze_internal(
         expr.rewrite(&mut expr_rewrite)?
             .map_data(|expr| original_name.restore(expr))
     })?
-    // coerce join expressions specially
-    .map_data(|plan| expr_rewrite.coerce_joins(plan))?
-    .map_data(|plan| expr_rewrite.coerce_union(plan))?
+    // some plans need extra coercion after their expressions are coerced
+    .map_data(|plan| expr_rewrite.coerce_plan(plan))?
     // recompute the schema after the expressions have been rewritten as the types may have changed
     .map_data(|plan| plan.recompute_schema())
 }
@@ -137,6 +138,14 @@ impl<'a> TypeCoercionRewriter<'a> {
         Self { schema }
     }
 
+    fn coerce_plan(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
+        match plan {
+            LogicalPlan::Join(join) => self.coerce_join(join),
+            LogicalPlan::Union(union) => Self::coerce_union(union),
+            _ => Ok(plan),
+        }
+    }
+
     /// Coerce join equality expressions and join filter
     ///
     /// Joins must be treated specially as their equality expressions are stored
@@ -145,11 +154,7 @@ impl<'a> TypeCoercionRewriter<'a> {
     ///
     /// For example, on_exprs like `t1.a = t2.b AND t1.x = t2.y` will be stored
     /// as a list of `(t1.a, t2.b), (t1.x, t2.y)`
-    fn coerce_joins(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
-        let LogicalPlan::Join(mut join) = plan else {
-            return Ok(plan);
-        };
-
+    fn coerce_join(&mut self, mut join: Join) -> Result<LogicalPlan> {
         join.on = join
             .on
             .into_iter()
@@ -170,36 +175,30 @@ impl<'a> TypeCoercionRewriter<'a> {
         Ok(LogicalPlan::Join(join))
     }
 
-    /// Corece the union inputs after expanding the wildcard expressions
-    ///
-    /// Union inputs must have the same schema, so we coerce the expressions to match the schema
-    /// after expanding the wildcard expressions
-    fn coerce_union(&self, plan: LogicalPlan) -> Result<LogicalPlan> {
-        let LogicalPlan::Union(union) = plan else {
-            return Ok(plan);
-        };
-
-        let inputs = union
+    /// Coerce the union’s inputs to a common schema compatible with all inputs.
+    /// This occurs after wildcard expansion and the coercion of the input expressions.
+    fn coerce_union(union_plan: Union) -> Result<LogicalPlan> {
+        let union_schema = Arc::new(coerce_union_schema(&union_plan.inputs)?);
+        let new_inputs = union_plan
             .inputs
-            .into_iter()
+            .iter()
             .map(|p| {
-                let plan = coerce_plan_expr_for_schema(&p, &union.schema)?;
+                let plan = coerce_plan_expr_for_schema(p, &union_schema)?;
                 match plan {
                     LogicalPlan::Projection(Projection { expr, input, .. }) => {
                         Ok(Arc::new(project_with_column_index(
                             expr,
                             input,
-                            Arc::clone(&union.schema),
+                            Arc::clone(&union_schema),
                         )?))
                     }
                     other_plan => Ok(Arc::new(other_plan)),
                 }
             })
             .collect::<Result<Vec<_>>>()?;
-
         Ok(LogicalPlan::Union(Union {
-            inputs,
-            schema: Arc::clone(&union.schema),
+            inputs: new_inputs,
+            schema: union_schema,
         }))
     }
 
@@ -809,6 +808,92 @@ fn coerce_case_expression(case: Case, schema: &DFSchema) -> Result<Case> {
     Ok(Case::new(case_expr, when_then, else_expr))
 }
 
+/// Get a common schema that is compatible with all inputs of UNION.
+fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
+    let base_schema = inputs[0].schema();
+    let mut union_datatypes = base_schema
+        .fields()
+        .iter()
+        .map(|f| f.data_type().clone())
+        .collect::<Vec<_>>();
+    let mut union_nullabilities = base_schema
+        .fields()
+        .iter()
+        .map(|f| f.is_nullable())
+        .collect::<Vec<_>>();
+
+    for (i, plan) in inputs.iter().enumerate().skip(1) {
+        let plan_schema = plan.schema();
+        if plan_schema.fields().len() != base_schema.fields().len() {
+            return plan_err!(
+                "Union schemas have different number of fields: \
+                query 1 has {} fields whereas query {} has {} fields",
+                base_schema.fields().len(),
+                i + 1,
+                plan_schema.fields().len()
+            );
+        }
+        // coerce data type and nullablity for each field
+        for (union_datatype, union_nullable, plan_field) in izip!(
+            union_datatypes.iter_mut(),
+            union_nullabilities.iter_mut(),
+            plan_schema.fields()
+        ) {
+            let coerced_type =
+                comparison_coercion(union_datatype, plan_field.data_type()).ok_or_else(
+                    || {
+                        plan_datafusion_err!(
+                            "Incompatible inputs for Union: Previous inputs were \
+                            of type {}, but got incompatible type {} on column '{}'",
+                            union_datatype,
+                            plan_field.data_type(),
+                            plan_field.name()
+                        )
+                    },
+                )?;
+            *union_datatype = coerced_type;
+            *union_nullable = *union_nullable || plan_field.is_nullable();
+        }
+    }
+    let union_qualified_fields = izip!(
+        base_schema.iter(),
+        union_datatypes.into_iter(),
+        union_nullabilities
+    )
+    .map(|((qualifier, field), datatype, nullable)| {
+        let field = Arc::new(Field::new(field.name().clone(), datatype, nullable));
+        (qualifier.cloned(), field)
+    })
+    .collect::<Vec<_>>();
+    DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())
+}
+
+/// See `<https://github.com/apache/datafusion/pull/2108>`
+fn project_with_column_index(
+    expr: Vec<Expr>,
+    input: Arc<LogicalPlan>,
+    schema: DFSchemaRef,
+) -> Result<LogicalPlan> {
+    let alias_expr = expr
+        .into_iter()
+        .enumerate()
+        .map(|(i, e)| match e {
+            Expr::Alias(Alias { ref name, .. }) if name != schema.field(i).name() => {
+                e.unalias().alias(schema.field(i).name())
+            }
+            Expr::Column(Column {
+                relation: _,
+                ref name,
+            }) if name != schema.field(i).name() => e.alias(schema.field(i).name()),
+            Expr::Alias { .. } | Expr::Column { .. } => e,
+            _ => e.alias(schema.field(i).name()),
+        })
+        .collect::<Vec<_>>();
+
+    Projection::try_new_with_schema(alias_expr, input, schema)
+        .map(LogicalPlan::Projection)
+}
+
 #[cfg(test)]
 mod test {
     use std::any::Any;
diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs
index cc8cf1f56c184..5f41e4f137b15 100644
--- a/datafusion/optimizer/src/eliminate_nested_union.rs
+++ b/datafusion/optimizer/src/eliminate_nested_union.rs
@@ -114,8 +114,11 @@ fn extract_plan_from_distinct(plan: Arc<LogicalPlan>) -> Arc<LogicalPlan> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::analyzer::type_coercion::TypeCoercion;
+    use crate::analyzer::Analyzer;
     use crate::test::*;
     use arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{col, logical_plan::table_scan};
 
     fn schema() -> Schema {
@@ -127,7 +130,14 @@ mod tests {
     }
 
     fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
-        assert_optimized_plan_eq(Arc::new(EliminateNestedUnion::new()), plan, expected)
+        let options = ConfigOptions::default();
+        let analyzed_plan = Analyzer::with_rules(vec![Arc::new(TypeCoercion::new())])
+            .execute_and_check(plan, &options, |_, _| {})?;
+        assert_optimized_plan_eq(
+            Arc::new(EliminateNestedUnion::new()),
+            analyzed_plan,
+            expected,
+        )
     }
 
     #[test]
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 15efe2d2f03ce..7ce3565fa29f6 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -2160,148 +2160,6 @@ fn union_all() {
     quick_test(sql, expected);
 }
 
-#[test]
-fn union_with_different_column_names() {
-    let sql = "SELECT order_id from orders UNION ALL SELECT customer_id FROM orders";
-    let expected = "Union\
-            \n  Projection: orders.order_id\
-            \n    TableScan: orders\
-            \n  Projection: orders.customer_id AS order_id\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_values_with_no_alias() {
-    let sql = "SELECT 1, 2 UNION ALL SELECT 3, 4";
-    let expected = "Union\
-            \n  Projection: Int64(1) AS Int64(1), Int64(2) AS Int64(2)\
-            \n    EmptyRelation\
-            \n  Projection: Int64(3) AS Int64(1), Int64(4) AS Int64(2)\
-            \n    EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_incompatible_data_type() {
-    let sql = "SELECT interval '1 year 1 day' UNION ALL SELECT 1";
-    let err = logical_plan(sql)
-        .expect_err("query should have failed")
-        .strip_backtrace();
-    assert_eq!(
-       "Error during planning: UNION Column Int64(1) (type: Int64) is not compatible with column IntervalMonthDayNano(\"IntervalMonthDayNano { months: 12, days: 1, nanoseconds: 0 }\") (type: Interval(MonthDayNano))",
-       err
-    );
-}
-
-#[test]
-fn union_with_different_decimal_data_types() {
-    let sql = "SELECT 1 a UNION ALL SELECT 1.1 a";
-    let expected = "Union\
-            \n  Projection: CAST(Int64(1) AS Float64) AS a\
-            \n    EmptyRelation\
-            \n  Projection: Float64(1.1) AS a\
-            \n    EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_null() {
-    let sql = "SELECT NULL a UNION ALL SELECT 1.1 a";
-    let expected = "Union\
-            \n  Projection: CAST(NULL AS Float64) AS a\
-            \n    EmptyRelation\
-            \n  Projection: Float64(1.1) AS a\
-            \n    EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_float_and_string() {
-    let sql = "SELECT 'a' a UNION ALL SELECT 1.1 a";
-    let expected = "Union\
-            \n  Projection: Utf8(\"a\") AS a\
-            \n    EmptyRelation\
-            \n  Projection: CAST(Float64(1.1) AS Utf8) AS a\
-            \n    EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_multiply_cols() {
-    let sql = "SELECT 'a' a, 1 b UNION ALL SELECT 1.1 a, 1.1 b";
-    let expected = "Union\
-            \n  Projection: Utf8(\"a\") AS a, CAST(Int64(1) AS Float64) AS b\
-            \n    EmptyRelation\
-            \n  Projection: CAST(Float64(1.1) AS Utf8) AS a, Float64(1.1) AS b\
-            \n    EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn sorted_union_with_different_types_and_group_by() {
-    let sql = "SELECT a FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT a FROM (select 1.1 a) x GROUP BY 1) ORDER BY 1";
-    let expected = "Sort: x.a ASC NULLS LAST\
-        \n  Union\
-        \n    Projection: CAST(x.a AS Float64) AS a\
-        \n      Aggregate: groupBy=[[x.a]], aggr=[[]]\
-        \n        SubqueryAlias: x\
-        \n          Projection: Int64(1) AS a\
-        \n            EmptyRelation\
-        \n    Projection: x.a\
-        \n      Aggregate: groupBy=[[x.a]], aggr=[[]]\
-        \n        SubqueryAlias: x\
-        \n          Projection: Float64(1.1) AS a\
-        \n            EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_binary_expr_and_cast() {
-    let sql = "SELECT cast(0.0 + a as integer) FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT 2.1 + a FROM (select 1 a) x GROUP BY 1)";
-    let expected = "Union\
-        \n  Projection: CAST(Float64(0) + x.a AS Float64) AS Float64(0) + x.a\
-        \n    Aggregate: groupBy=[[CAST(Float64(0) + x.a AS Int32)]], aggr=[[]]\
-        \n      SubqueryAlias: x\
-        \n        Projection: Int64(1) AS a\
-        \n          EmptyRelation\
-        \n  Projection: Float64(2.1) + x.a AS Float64(0) + x.a\
-        \n    Aggregate: groupBy=[[Float64(2.1) + x.a]], aggr=[[]]\
-        \n      SubqueryAlias: x\
-        \n        Projection: Int64(1) AS a\
-        \n          EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_aliases() {
-    let sql = "SELECT a as a1 FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT a as a1 FROM (select 1.1 a) x GROUP BY 1)";
-    let expected = "Union\
-        \n  Projection: CAST(x.a AS Float64) AS a1\
-        \n    Aggregate: groupBy=[[x.a]], aggr=[[]]\
-        \n      SubqueryAlias: x\
-        \n        Projection: Int64(1) AS a\
-        \n          EmptyRelation\
-        \n  Projection: x.a AS a1\
-        \n    Aggregate: groupBy=[[x.a]], aggr=[[]]\
-        \n      SubqueryAlias: x\
-        \n        Projection: Float64(1.1) AS a\
-        \n          EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_incompatible_data_types() {
-    let sql = "SELECT 'a' a UNION ALL SELECT true a";
-    let err = logical_plan(sql)
-        .expect_err("query should have failed")
-        .strip_backtrace();
-    assert_eq!(
-        "Error during planning: UNION Column a (type: Boolean) is not compatible with column a (type: Utf8)",
-        err
-    );
-}
-
 #[test]
 fn empty_over() {
     let sql = "SELECT order_id, MAX(order_id) OVER () from orders";
diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt
index aa1e6826eca55..e420c0cc71554 100644
--- a/datafusion/sqllogictest/test_files/type_coercion.slt
+++ b/datafusion/sqllogictest/test_files/type_coercion.slt
@@ -49,3 +49,179 @@ select interval '1 month' - '2023-05-01'::date;
 # interval - timestamp
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
 SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp;
+
+
+####################################
+## Test type coercion with UNIONs ##
+####################################
+
+# Disable optimizer to test only the analyzer with type coercion
+statement ok
+set datafusion.optimizer.max_passes = 0;
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+# Create test table
+statement ok
+CREATE TABLE orders(
+    order_id INT UNSIGNED NOT NULL,
+    customer_id INT UNSIGNED NOT NULL,
+    o_item_id VARCHAR NOT NULL,
+    qty INT NOT NULL,
+    price DOUBLE NOT NULL,
+    delivered BOOLEAN NOT NULL
+);
+
+# union_different_num_columns_error() / UNION
+query error Error during planning: Union schemas have different number of fields: query 1 has 1 fields whereas query 2 has 2 fields
+SELECT order_id FROM orders UNION SELECT customer_id, o_item_id FROM orders
+
+# union_different_num_columns_error() / UNION ALL
+query error Error during planning: Union schemas have different number of fields: query 1 has 1 fields whereas query 2 has 2 fields
+SELECT order_id FROM orders UNION ALL SELECT customer_id, o_item_id FROM orders
+
+# union_with_different_column_names()
+query TT
+EXPLAIN SELECT order_id from orders UNION ALL SELECT customer_id FROM orders
+----
+logical_plan
+01)Union
+02)--Projection: orders.order_id
+03)----TableScan: orders
+04)--Projection: orders.customer_id AS order_id
+05)----TableScan: orders
+
+# union_values_with_no_alias()
+query TT
+EXPLAIN SELECT 1, 2 UNION ALL SELECT 3, 4
+----
+logical_plan
+01)Union
+02)--Projection: Int64(1) AS Int64(1), Int64(2) AS Int64(2)
+03)----EmptyRelation
+04)--Projection: Int64(3) AS Int64(1), Int64(4) AS Int64(2)
+05)----EmptyRelation
+
+# union_with_incompatible_data_type()
+query error Incompatible inputs for Union: Previous inputs were of type Interval\(MonthDayNano\), but got incompatible type Int64 on column 'Int64\(1\)'
+SELECT interval '1 year 1 day' UNION ALL SELECT 1
+
+# union_with_different_decimal_data_types()
+query TT
+EXPLAIN SELECT 1 a UNION ALL SELECT 1.1 a
+----
+logical_plan
+01)Union
+02)--Projection: CAST(Int64(1) AS Float64) AS a
+03)----EmptyRelation
+04)--Projection: Float64(1.1) AS a
+05)----EmptyRelation
+
+# union_with_null()
+query TT
+EXPLAIN SELECT NULL a UNION ALL SELECT 1.1 a
+----
+logical_plan
+01)Union
+02)--Projection: CAST(NULL AS Float64) AS a
+03)----EmptyRelation
+04)--Projection: Float64(1.1) AS a
+05)----EmptyRelation
+
+# union_with_float_and_string()
+query TT
+EXPLAIN SELECT 'a' a UNION ALL SELECT 1.1 a
+----
+logical_plan
+01)Union
+02)--Projection: Utf8("a") AS a
+03)----EmptyRelation
+04)--Projection: CAST(Float64(1.1) AS Utf8) AS a
+05)----EmptyRelation
+
+# union_with_multiply_cols()
+query TT
+EXPLAIN SELECT 'a' a, 1 b UNION ALL SELECT 1.1 a, 1.1 b
+----
+logical_plan
+01)Union
+02)--Projection: Utf8("a") AS a, CAST(Int64(1) AS Float64) AS b
+03)----EmptyRelation
+04)--Projection: CAST(Float64(1.1) AS Utf8) AS a, Float64(1.1) AS b
+05)----EmptyRelation
+
+# sorted_union_with_different_types_and_group_by()
+query TT
+EXPLAIN SELECT a FROM (select 1 a) x GROUP BY 1
+    UNION ALL
+(SELECT a FROM (select 1.1 a) x GROUP BY 1) ORDER BY 1
+----
+logical_plan
+01)Sort: x.a ASC NULLS LAST
+02)--Union
+03)----Projection: CAST(x.a AS Float64) AS a
+04)------Aggregate: groupBy=[[x.a]], aggr=[[]]
+05)--------SubqueryAlias: x
+06)----------Projection: Int64(1) AS a
+07)------------EmptyRelation
+08)----Projection: x.a
+09)------Aggregate: groupBy=[[x.a]], aggr=[[]]
+10)--------SubqueryAlias: x
+11)----------Projection: Float64(1.1) AS a
+12)------------EmptyRelation
+
+# union_with_binary_expr_and_cast()
+query TT
+EXPLAIN SELECT cast(0.0 + a as integer) FROM (select 1 a) x GROUP BY 1
+    UNION ALL
+(SELECT 2.1 + a FROM (select 1 a) x GROUP BY 1)
+----
+logical_plan
+01)Union
+02)--Projection: CAST(Float64(0) + x.a AS Float64) AS Float64(0) + x.a
+03)----Aggregate: groupBy=[[CAST(Float64(0) + CAST(x.a AS Float64) AS Int32)]], aggr=[[]]
+04)------SubqueryAlias: x
+05)--------Projection: Int64(1) AS a
+06)----------EmptyRelation
+07)--Projection: Float64(2.1) + x.a AS Float64(0) + x.a
+08)----Aggregate: groupBy=[[Float64(2.1) + CAST(x.a AS Float64)]], aggr=[[]]
+09)------SubqueryAlias: x
+10)--------Projection: Int64(1) AS a
+11)----------EmptyRelation
+
+# union_with_aliases()
+query TT
+EXPLAIN SELECT a as a1 FROM (select 1 a) x GROUP BY 1
+    UNION ALL
+(SELECT a as a1 FROM (select 1.1 a) x GROUP BY 1)
+----
+logical_plan
+01)Union
+02)--Projection: CAST(x.a AS Float64) AS a1
+03)----Aggregate: groupBy=[[x.a]], aggr=[[]]
+04)------SubqueryAlias: x
+05)--------Projection: Int64(1) AS a
+06)----------EmptyRelation
+07)--Projection: x.a AS a1
+08)----Aggregate: groupBy=[[x.a]], aggr=[[]]
+09)------SubqueryAlias: x
+10)--------Projection: Float64(1.1) AS a
+11)----------EmptyRelation
+
+# union_with_incompatible_data_types()
+query error Incompatible inputs for Union: Previous inputs were of type Utf8, but got incompatible type Boolean on column 'a'
+SELECT 'a' a UNION ALL SELECT true a
+
+statement ok
+SET datafusion.optimizer.max_passes = 3;
+
+statement ok
+SET datafusion.explain.logical_plan_only = false;
+
+statement ok
+DROP TABLE orders;
+
+########################################
+## Test type coercion with UNIONs end ##
+########################################
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index ffbf54c4d93aa..288f99d82c107 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -677,3 +677,14 @@ DROP TABLE t3;
 
 statement ok
 DROP TABLE t4;
+
+# Test issue: https://github.com/apache/datafusion/issues/11742
+query R rowsort
+WITH 
+  tt(v1) AS (VALUES (1::INT),(NULL::INT)) 
+SELECT NVL(v1, 0.5) FROM tt
+  UNION ALL
+SELECT NULL WHERE FALSE;
+----
+0.5
+1

From e4be013064943786c9915bbc79c18ee82106340a Mon Sep 17 00:00:00 2001
From: Eduard Karacharov <eduard.karacharov@gmail.com>
Date: Wed, 14 Aug 2024 14:19:34 +0300
Subject: [PATCH 295/357] test: re-enable window function over parquet with
 forced collisions (#11939)

* test: window function over parquet with forced collisions

* trigger CI
---
 .../sqllogictest/test_files/parquet.slt       | 38 +++++++++----------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index a7ca0384d3f43..34d4ed6ff284b 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -251,27 +251,25 @@ SELECT COUNT(*) FROM timestamp_with_tz;
 ----
 131072
 
-# FIXME(#TODO) fails with feature `force_hash_collisions`
-# https://github.com/apache/datafusion/issues/11660
 # Perform the query:
-# query IPT
-# SELECT
-#   count,
-#   LAG(timestamp, 1) OVER (ORDER BY timestamp),
-#   arrow_typeof(LAG(timestamp, 1) OVER (ORDER BY timestamp))
-# FROM timestamp_with_tz
-# LIMIT 10;
-# ----
-# 0 NULL Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 4 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 14 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+query IPT
+SELECT
+  count,
+  LAG(timestamp, 1) OVER (ORDER BY timestamp),
+  arrow_typeof(LAG(timestamp, 1) OVER (ORDER BY timestamp))
+FROM timestamp_with_tz
+LIMIT 10;
+----
+0 NULL Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+4 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+14 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
 
 # Test config listing_table_ignore_subdirectory:
 

From f98f8a92128451614d334db731c61682d134db6f Mon Sep 17 00:00:00 2001
From: Tai Le Manh <49281946+tlm365@users.noreply.github.com>
Date: Wed, 14 Aug 2024 23:04:05 +0700
Subject: [PATCH 296/357] Implement native support StringView for `REPEAT`
 (#11962)

* Implement native support StringView for REPEAT

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>

* cargo fmt

---------

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions/src/string/repeat.rs     | 84 ++++++++++++++++---
 .../sqllogictest/test_files/string_view.slt   |  3 +-
 2 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs
index 9d122f6101a74..a377dee06f41b 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -18,10 +18,12 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use datafusion_common::cast::{
+    as_generic_string_array, as_int64_array, as_string_view_array,
+};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Volatility};
@@ -45,7 +47,14 @@ impl RepeatFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
+                vec![
+                    // Planner attempts coercion to the target type starting with the most preferred candidate.
+                    // For example, given input `(Utf8View, Int64)`, it first tries coercing to `(Utf8View, Int64)`.
+                    // If that fails, it proceeds to `(Utf8, Int64)`.
+                    Exact(vec![Utf8View, Int64]),
+                    Exact(vec![Utf8, Int64]),
+                    Exact(vec![LargeUtf8, Int64]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -71,9 +80,10 @@ impl ScalarUDFImpl for RepeatFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
+            DataType::Utf8View => make_scalar_function(repeat_utf8view, vec![])(args),
             DataType::Utf8 => make_scalar_function(repeat::<i32>, vec![])(args),
             DataType::LargeUtf8 => make_scalar_function(repeat::<i64>, vec![])(args),
-            other => exec_err!("Unsupported data type {other:?} for function repeat"),
+            other => exec_err!("Unsupported data type {other:?} for function repeat. Expected Utf8, Utf8View or LargeUtf8"),
         }
     }
 }
@@ -87,18 +97,35 @@ fn repeat<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let result = string_array
         .iter()
         .zip(number_array.iter())
-        .map(|(string, number)| match (string, number) {
-            (Some(string), Some(number)) if number >= 0 => {
-                Some(string.repeat(number as usize))
-            }
-            (Some(_), Some(_)) => Some("".to_string()),
-            _ => None,
-        })
+        .map(|(string, number)| repeat_common(string, number))
         .collect::<GenericStringArray<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
+fn repeat_utf8view(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_view_array = as_string_view_array(&args[0])?;
+    let number_array = as_int64_array(&args[1])?;
+
+    let result = string_view_array
+        .iter()
+        .zip(number_array.iter())
+        .map(|(string, number)| repeat_common(string, number))
+        .collect::<StringArray>();
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+fn repeat_common(string: Option<&str>, number: Option<i64>) -> Option<String> {
+    match (string, number) {
+        (Some(string), Some(number)) if number >= 0 => {
+            Some(string.repeat(number as usize))
+        }
+        (Some(_), Some(_)) => Some("".to_string()),
+        _ => None,
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use arrow::array::{Array, StringArray};
@@ -124,7 +151,6 @@ mod tests {
             Utf8,
             StringArray
         );
-
         test_function!(
             RepeatFunc::new(),
             &[
@@ -148,6 +174,40 @@ mod tests {
             StringArray
         );
 
+        test_function!(
+            RepeatFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("Pg")))),
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(4))),
+            ],
+            Ok(Some("PgPgPgPg")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            RepeatFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8View(None)),
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(4))),
+            ],
+            Ok(None),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            RepeatFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("Pg")))),
+                ColumnarValue::Scalar(ScalarValue::Int64(None)),
+            ],
+            Ok(None),
+            &str,
+            Utf8,
+            StringArray
+        );
+
         Ok(())
     }
 }
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 264f85ff84b92..a84b0c7b45949 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -860,14 +860,13 @@ logical_plan
 
 
 ## Ensure no casts for REPEAT
-## TODO file ticket
 query TT
 EXPLAIN SELECT
   REPEAT(column1_utf8view, 2) as c1
 FROM test;
 ----
 logical_plan
-01)Projection: repeat(CAST(test.column1_utf8view AS Utf8), Int64(2)) AS c1
+01)Projection: repeat(test.column1_utf8view, Int64(2)) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for REPLACE

From 02bfefe7f68f8255caf3d20b5bc5e3ca7080dda6 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Wed, 14 Aug 2024 13:48:33 -0700
Subject: [PATCH 297/357] Update RPAD scalar function to support Utf8View
 (#11942)

* Update RPAD scalar function to support Utf8View

* adding more test coverage

* optimize macro
---
 datafusion/functions/src/unicode/rpad.rs      | 233 ++++++++++++------
 .../sqllogictest/test_files/functions.slt     |  28 +++
 .../sqllogictest/test_files/string_view.slt   |  22 +-
 3 files changed, 203 insertions(+), 80 deletions(-)

diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs
index fc6bf1ffe748b..4bcf102c8793d 100644
--- a/datafusion/functions/src/unicode/rpad.rs
+++ b/datafusion/functions/src/unicode/rpad.rs
@@ -20,7 +20,9 @@ use std::sync::Arc;
 
 use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
 use arrow::datatypes::DataType;
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use datafusion_common::cast::{
+    as_generic_string_array, as_int64_array, as_string_view_array,
+};
 use unicode_segmentation::UnicodeSegmentation;
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
@@ -45,11 +47,17 @@ impl RPadFunc {
         Self {
             signature: Signature::one_of(
                 vec![
+                    Exact(vec![Utf8View, Int64]),
+                    Exact(vec![Utf8View, Int64, Utf8View]),
+                    Exact(vec![Utf8View, Int64, Utf8]),
+                    Exact(vec![Utf8View, Int64, LargeUtf8]),
                     Exact(vec![Utf8, Int64]),
-                    Exact(vec![LargeUtf8, Int64]),
+                    Exact(vec![Utf8, Int64, Utf8View]),
                     Exact(vec![Utf8, Int64, Utf8]),
-                    Exact(vec![LargeUtf8, Int64, Utf8]),
                     Exact(vec![Utf8, Int64, LargeUtf8]),
+                    Exact(vec![LargeUtf8, Int64]),
+                    Exact(vec![LargeUtf8, Int64, Utf8View]),
+                    Exact(vec![LargeUtf8, Int64, Utf8]),
                     Exact(vec![LargeUtf8, Int64, LargeUtf8]),
                 ],
                 Volatility::Immutable,
@@ -76,97 +84,168 @@ impl ScalarUDFImpl for RPadFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(rpad::<i32>, vec![])(args),
-            DataType::LargeUtf8 => make_scalar_function(rpad::<i64>, vec![])(args),
-            other => exec_err!("Unsupported data type {other:?} for function rpad"),
+        match args.len() {
+            2 => match args[0].data_type() {
+                DataType::Utf8 | DataType::Utf8View => {
+                    make_scalar_function(rpad::<i32, i32>, vec![])(args)
+                }
+                DataType::LargeUtf8 => {
+                    make_scalar_function(rpad::<i64, i64>, vec![])(args)
+                }
+                other => exec_err!("Unsupported data type {other:?} for function rpad"),
+            },
+            3 => match (args[0].data_type(), args[2].data_type()) {
+                (
+                    DataType::Utf8 | DataType::Utf8View,
+                    DataType::Utf8 | DataType::Utf8View,
+                ) => make_scalar_function(rpad::<i32, i32>, vec![])(args),
+                (DataType::LargeUtf8, DataType::LargeUtf8) => {
+                    make_scalar_function(rpad::<i64, i64>, vec![])(args)
+                }
+                (DataType::LargeUtf8, DataType::Utf8View | DataType::Utf8) => {
+                    make_scalar_function(rpad::<i64, i32>, vec![])(args)
+                }
+                (DataType::Utf8View | DataType::Utf8, DataType::LargeUtf8) => {
+                    make_scalar_function(rpad::<i32, i64>, vec![])(args)
+                }
+                (first_type, last_type) => {
+                    exec_err!("unsupported arguments type for rpad, first argument type is {}, last argument type is {}", first_type, last_type)
+                }
+            },
+            number => {
+                exec_err!("unsupported arguments number {} for rpad", number)
+            }
         }
     }
 }
 
-/// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated.
-/// rpad('hi', 5, 'xy') = 'hixyx'
-pub fn rpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .map(|(string, length)| match (string, length) {
-                    (Some(string), Some(length)) => {
-                        if length > i32::MAX as i64 {
-                            return exec_err!(
-                                "rpad requested length {length} too large"
-                            );
-                        }
-
-                        let length = if length < 0 { 0 } else { length as usize };
-                        if length == 0 {
-                            Ok(Some("".to_string()))
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            if length < graphemes.len() {
-                                Ok(Some(graphemes[..length].concat()))
-                            } else {
-                                let mut s = string.to_string();
-                                s.push_str(" ".repeat(length - graphemes.len()).as_str());
-                                Ok(Some(s))
-                            }
-                        }
+macro_rules! process_rpad {
+    // For the two-argument case
+    ($string_array:expr, $length_array:expr) => {{
+        $string_array
+            .iter()
+            .zip($length_array.iter())
+            .map(|(string, length)| match (string, length) {
+                (Some(string), Some(length)) => {
+                    if length > i32::MAX as i64 {
+                        return exec_err!("rpad requested length {} too large", length);
                     }
-                    _ => Ok(None),
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        3 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
-            let fill_array = as_generic_string_array::<T>(&args[2])?;
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .zip(fill_array.iter())
-                .map(|((string, length), fill)| match (string, length, fill) {
-                    (Some(string), Some(length), Some(fill)) => {
-                        if length > i32::MAX as i64 {
-                            return exec_err!(
-                                "rpad requested length {length} too large"
-                            );
-                        }
 
-                        let length = if length < 0 { 0 } else { length as usize };
+                    let length = if length < 0 { 0 } else { length as usize };
+                    if length == 0 {
+                        Ok(Some("".to_string()))
+                    } else {
                         let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                        let fill_chars = fill.chars().collect::<Vec<char>>();
-
                         if length < graphemes.len() {
                             Ok(Some(graphemes[..length].concat()))
-                        } else if fill_chars.is_empty() {
-                            Ok(Some(string.to_string()))
                         } else {
                             let mut s = string.to_string();
-                            let mut char_vector =
-                                Vec::<char>::with_capacity(length - graphemes.len());
-                            for l in 0..length - graphemes.len() {
-                                char_vector
-                                    .push(*fill_chars.get(l % fill_chars.len()).unwrap());
-                            }
-                            s.push_str(char_vector.iter().collect::<String>().as_str());
+                            s.push_str(" ".repeat(length - graphemes.len()).as_str());
                             Ok(Some(s))
                         }
                     }
-                    _ => Ok(None),
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
+                }
+                _ => Ok(None),
+            })
+            .collect::<Result<GenericStringArray<StringArrayLen>>>()
+    }};
 
+    // For the three-argument case
+    ($string_array:expr, $length_array:expr, $fill_array:expr) => {{
+        $string_array
+            .iter()
+            .zip($length_array.iter())
+            .zip($fill_array.iter())
+            .map(|((string, length), fill)| match (string, length, fill) {
+                (Some(string), Some(length), Some(fill)) => {
+                    if length > i32::MAX as i64 {
+                        return exec_err!("rpad requested length {} too large", length);
+                    }
+
+                    let length = if length < 0 { 0 } else { length as usize };
+                    let graphemes = string.graphemes(true).collect::<Vec<&str>>();
+                    let fill_chars = fill.chars().collect::<Vec<char>>();
+
+                    if length < graphemes.len() {
+                        Ok(Some(graphemes[..length].concat()))
+                    } else if fill_chars.is_empty() {
+                        Ok(Some(string.to_string()))
+                    } else {
+                        let mut s = string.to_string();
+                        let char_vector: Vec<char> = (0..length - graphemes.len())
+                            .map(|l| fill_chars[l % fill_chars.len()])
+                            .collect();
+                        s.push_str(&char_vector.iter().collect::<String>());
+                        Ok(Some(s))
+                    }
+                }
+                _ => Ok(None),
+            })
+            .collect::<Result<GenericStringArray<StringArrayLen>>>()
+    }};
+}
+
+/// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated.
+/// rpad('hi', 5, 'xy') = 'hixyx'
+pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
+    args: &[ArrayRef],
+) -> Result<ArrayRef> {
+    match (args.len(), args[0].data_type()) {
+        (2, DataType::Utf8View) => {
+            let string_array = as_string_view_array(&args[0])?;
+            let length_array = as_int64_array(&args[1])?;
+
+            let result = process_rpad!(string_array, length_array)?;
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        (2, _) => {
+            let string_array = as_generic_string_array::<StringArrayLen>(&args[0])?;
+            let length_array = as_int64_array(&args[1])?;
+
+            let result = process_rpad!(string_array, length_array)?;
             Ok(Arc::new(result) as ArrayRef)
         }
-        other => exec_err!(
-            "rpad was called with {other} arguments. It requires at least 2 and at most 3."
+        (3, DataType::Utf8View) => {
+            let string_array = as_string_view_array(&args[0])?;
+            let length_array = as_int64_array(&args[1])?;
+            match args[2].data_type() {
+                DataType::Utf8View => {
+                    let fill_array = as_string_view_array(&args[2])?;
+                    let result = process_rpad!(string_array, length_array, fill_array)?;
+                    Ok(Arc::new(result) as ArrayRef)
+                }
+                DataType::Utf8 | DataType::LargeUtf8 => {
+                    let fill_array = as_generic_string_array::<FillArrayLen>(&args[2])?;
+                    let result = process_rpad!(string_array, length_array, fill_array)?;
+                    Ok(Arc::new(result) as ArrayRef)
+                }
+                other_type => {
+                    exec_err!("unsupported type for rpad's third operator: {}", other_type)
+                }
+            }
+        }
+        (3, _) => {
+            let string_array = as_generic_string_array::<StringArrayLen>(&args[0])?;
+            let length_array = as_int64_array(&args[1])?;
+            match args[2].data_type() {
+                DataType::Utf8View => {
+                    let fill_array = as_string_view_array(&args[2])?;
+                    let result = process_rpad!(string_array, length_array, fill_array)?;
+                    Ok(Arc::new(result) as ArrayRef)
+                }
+                DataType::Utf8 | DataType::LargeUtf8 => {
+                    let fill_array = as_generic_string_array::<FillArrayLen>(&args[2])?;
+                    let result = process_rpad!(string_array, length_array, fill_array)?;
+                    Ok(Arc::new(result) as ArrayRef)
+                }
+                other_type => {
+                    exec_err!("unsupported type for rpad's third operator: {}", other_type)
+                }
+            }
+        }
+        (other, other_type) => exec_err!(
+            "rpad requires 2 or 3 arguments with corresponding types, but got {}. number of arguments with {}",
+            other, other_type
         ),
     }
 }
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index f728942b38c32..f2f37a59cc2a0 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -294,6 +294,7 @@ SELECT right(NULL, CAST(NULL AS INT))
 ----
 NULL
 
+
 query T
 SELECT rpad('hi', -1, 'xy')
 ----
@@ -354,6 +355,33 @@ SELECT rpad('xyxhi', 3)
 ----
 xyx
 
+# test for rpad with largeutf8 and utf8View
+
+query T
+SELECT rpad(arrow_cast('hi', 'LargeUtf8'), 5, 'xy')
+----
+hixyx
+
+query T
+SELECT rpad(arrow_cast('hi', 'Utf8View'), 5, 'xy')
+----
+hixyx
+
+query T
+SELECT rpad(arrow_cast('hi', 'LargeUtf8'), 5, arrow_cast('xy', 'LargeUtf8'))
+----
+hixyx
+
+query T
+SELECT rpad(arrow_cast('hi', 'Utf8View'), 5, arrow_cast('xy', 'Utf8View'))
+----
+hixyx
+
+query T
+SELECT rpad(arrow_cast(NULL, 'Utf8View'), 5, 'xy')
+----
+NULL
+
 query I
 SELECT strpos('abc', 'c')
 ----
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index a84b0c7b45949..8bc053234e8ca 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -926,10 +926,26 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: rpad(__common_expr_1, Int64(1)) AS c1, rpad(__common_expr_1, Int64(2), CAST(test.column2_utf8view AS Utf8)) AS c2
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+01)Projection: rpad(test.column1_utf8view, Int64(1)) AS c1, rpad(test.column1_utf8view, Int64(2), test.column2_utf8view) AS c2
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+query TT
+EXPLAIN SELECT
+  RPAD(column1_utf8view, 12, column2_large_utf8) as c1
+FROM test;
+----
+logical_plan
+01)Projection: rpad(test.column1_utf8view, Int64(12), test.column2_large_utf8) AS c1
+02)--TableScan: test projection=[column2_large_utf8, column1_utf8view]
 
+query TT
+EXPLAIN SELECT
+  RPAD(column1_utf8view, 12, column2_utf8view) as c1
+FROM test;
+----
+logical_plan
+01)Projection: rpad(test.column1_utf8view, Int64(12), test.column2_utf8view) AS c1
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for SPLIT_PART
 ## TODO file ticket

From 94034bef9357b166d5593de90b00a35563eec3f8 Mon Sep 17 00:00:00 2001
From: Bruce Ritchie <bruce.ritchie@veeva.com>
Date: Wed, 14 Aug 2024 17:11:33 -0400
Subject: [PATCH 298/357]  Improve lpad udf by using a GenericStringBuilder
 (#11987)

* Update LPAD scalar function to support Utf8View

* Lpad code improvements and benchmark.

* Improved use of GenericStringBuilder.
---
 datafusion/functions/Cargo.toml          |   5 +
 datafusion/functions/benches/pad.rs      | 141 +++++++++++++++
 datafusion/functions/src/unicode/lpad.rs | 213 +++++++++++------------
 3 files changed, 244 insertions(+), 115 deletions(-)
 create mode 100644 datafusion/functions/benches/pad.rs

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 9675d03a01617..688563baecfae 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -146,3 +146,8 @@ required-features = ["string_expressions"]
 harness = false
 name = "upper"
 required-features = ["string_expressions"]
+
+[[bench]]
+harness = false
+name = "pad"
+required-features = ["unicode_expressions"]
diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs
new file mode 100644
index 0000000000000..5ff1e2fb860d4
--- /dev/null
+++ b/datafusion/functions/benches/pad.rs
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray};
+use arrow::datatypes::Int64Type;
+use arrow::util::bench_util::{
+    create_string_array_with_len, create_string_view_array_with_len,
+};
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::unicode::{lpad, rpad};
+use rand::distributions::{Distribution, Uniform};
+use rand::Rng;
+use std::sync::Arc;
+
+struct Filter<Dist> {
+    dist: Dist,
+}
+
+impl<T, Dist> Distribution<T> for Filter<Dist>
+where
+    Dist: Distribution<T>,
+{
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> T {
+        self.dist.sample(rng)
+    }
+}
+
+pub fn create_primitive_array<T>(
+    size: usize,
+    null_density: f32,
+    len: usize,
+) -> PrimitiveArray<T>
+where
+    T: ArrowPrimitiveType<Native = i64>,
+{
+    let dist = Filter {
+        dist: Uniform::new_inclusive::<i64, i64>(0, len as i64),
+    };
+
+    let mut rng = rand::thread_rng();
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < null_density {
+                None
+            } else {
+                Some(rng.sample(&dist))
+            }
+        })
+        .collect()
+}
+
+fn create_args<O: OffsetSizeTrait>(
+    size: usize,
+    str_len: usize,
+    use_string_view: bool,
+) -> Vec<ColumnarValue> {
+    let length_array = Arc::new(create_primitive_array::<Int64Type>(size, 0.0, str_len));
+
+    if !use_string_view {
+        let string_array =
+            Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
+        let fill_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
+
+        vec![
+            ColumnarValue::Array(string_array),
+            ColumnarValue::Array(Arc::clone(&length_array) as ArrayRef),
+            ColumnarValue::Array(fill_array),
+        ]
+    } else {
+        let string_array =
+            Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false));
+        let fill_array =
+            Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false));
+
+        vec![
+            ColumnarValue::Array(string_array),
+            ColumnarValue::Array(Arc::clone(&length_array) as ArrayRef),
+            ColumnarValue::Array(fill_array),
+        ]
+    }
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    for size in [1024, 2048] {
+        let mut group = c.benchmark_group("lpad function");
+
+        let args = create_args::<i32>(size, 32, false);
+        group.bench_function(BenchmarkId::new("utf8 type", size), |b| {
+            b.iter(|| criterion::black_box(lpad().invoke(&args).unwrap()))
+        });
+
+        let args = create_args::<i64>(size, 32, false);
+        group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| {
+            b.iter(|| criterion::black_box(lpad().invoke(&args).unwrap()))
+        });
+
+        let args = create_args::<i32>(size, 32, true);
+        group.bench_function(BenchmarkId::new("stringview type", size), |b| {
+            b.iter(|| criterion::black_box(lpad().invoke(&args).unwrap()))
+        });
+
+        group.finish();
+
+        let mut group = c.benchmark_group("rpad function");
+
+        let args = create_args::<i32>(size, 32, false);
+        group.bench_function(BenchmarkId::new("utf8 type", size), |b| {
+            b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
+        });
+
+        let args = create_args::<i64>(size, 32, false);
+        group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| {
+            b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
+        });
+        //
+        // let args = create_args::<i32>(size, 32, true);
+        // group.bench_function(BenchmarkId::new("stringview type", size), |b| {
+        //     b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
+        // });
+
+        group.finish();
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs
index 5caa6acd67453..521cdc5d0ff03 100644
--- a/datafusion/functions/src/unicode/lpad.rs
+++ b/datafusion/functions/src/unicode/lpad.rs
@@ -16,11 +16,12 @@
 // under the License.
 
 use std::any::Any;
+use std::fmt::Write;
 use std::sync::Arc;
 
 use arrow::array::{
-    Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, Int64Array,
-    OffsetSizeTrait, StringViewArray,
+    Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray,
+    GenericStringBuilder, Int64Array, OffsetSizeTrait, StringViewArray,
 };
 use arrow::datatypes::DataType;
 use unicode_segmentation::UnicodeSegmentation;
@@ -87,14 +88,18 @@ impl ScalarUDFImpl for LPadFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        make_scalar_function(lpad, vec![])(args)
+        match args[0].data_type() {
+            Utf8 | Utf8View => make_scalar_function(lpad::<i32>, vec![])(args),
+            LargeUtf8 => make_scalar_function(lpad::<i64>, vec![])(args),
+            other => exec_err!("Unsupported data type {other:?} for function lpad"),
+        }
     }
 }
 
 /// Extends the string to length 'length' by prepending the characters fill (a space by default).
 /// If the string is already longer than length then it is truncated (on the right).
 /// lpad('hi', 5, 'xy') = 'xyxhi'
-pub fn lpad(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() <= 1 || args.len() > 3 {
         return exec_err!(
             "lpad was called with {} arguments. It requires at least 2 and at most 3.",
@@ -104,49 +109,28 @@ pub fn lpad(args: &[ArrayRef]) -> Result<ArrayRef> {
 
     let length_array = as_int64_array(&args[1])?;
 
-    match args[0].data_type() {
-        Utf8 => match args.len() {
-            2 => lpad_impl::<&GenericStringArray<i32>, &GenericStringArray<i32>, i32>(
-                args[0].as_string::<i32>(),
-                length_array,
-                None,
-            ),
-            3 => lpad_with_replace::<&GenericStringArray<i32>, i32>(
-                args[0].as_string::<i32>(),
-                length_array,
-                &args[2],
-            ),
-            _ => unreachable!(),
-        },
-        LargeUtf8 => match args.len() {
-            2 => lpad_impl::<&GenericStringArray<i64>, &GenericStringArray<i64>, i64>(
-                args[0].as_string::<i64>(),
-                length_array,
-                None,
-            ),
-            3 => lpad_with_replace::<&GenericStringArray<i64>, i64>(
-                args[0].as_string::<i64>(),
-                length_array,
-                &args[2],
-            ),
-            _ => unreachable!(),
-        },
-        Utf8View => match args.len() {
-            2 => lpad_impl::<&StringViewArray, &GenericStringArray<i32>, i32>(
-                args[0].as_string_view(),
-                length_array,
-                None,
-            ),
-            3 => lpad_with_replace::<&StringViewArray, i32>(
-                args[0].as_string_view(),
-                length_array,
-                &args[2],
-            ),
-            _ => unreachable!(),
-        },
-        other => {
-            exec_err!("Unsupported data type {other:?} for function lpad")
-        }
+    match (args.len(), args[0].data_type()) {
+        (2, Utf8View) => lpad_impl::<&StringViewArray, &GenericStringArray<i32>, T>(
+            args[0].as_string_view(),
+            length_array,
+            None,
+        ),
+        (2, Utf8 | LargeUtf8) => lpad_impl::<
+            &GenericStringArray<T>,
+            &GenericStringArray<T>,
+            T,
+        >(args[0].as_string::<T>(), length_array, None),
+        (3, Utf8View) => lpad_with_replace::<&StringViewArray, T>(
+            args[0].as_string_view(),
+            length_array,
+            &args[2],
+        ),
+        (3, Utf8 | LargeUtf8) => lpad_with_replace::<&GenericStringArray<T>, T>(
+            args[0].as_string::<T>(),
+            length_array,
+            &args[2],
+        ),
+        (_, _) => unreachable!(),
     }
 }
 
@@ -159,20 +143,20 @@ where
     V: StringArrayType<'a>,
 {
     match fill_array.data_type() {
-        Utf8 => lpad_impl::<V, &GenericStringArray<i32>, T>(
+        Utf8View => lpad_impl::<V, &StringViewArray, T>(
             string_array,
             length_array,
-            Some(fill_array.as_string::<i32>()),
+            Some(fill_array.as_string_view()),
         ),
         LargeUtf8 => lpad_impl::<V, &GenericStringArray<i64>, T>(
             string_array,
             length_array,
             Some(fill_array.as_string::<i64>()),
         ),
-        Utf8View => lpad_impl::<V, &StringViewArray, T>(
+        Utf8 => lpad_impl::<V, &GenericStringArray<i32>, T>(
             string_array,
             length_array,
-            Some(fill_array.as_string_view()),
+            Some(fill_array.as_string::<i32>()),
         ),
         other => {
             exec_err!("Unsupported data type {other:?} for function lpad")
@@ -190,87 +174,86 @@ where
     V2: StringArrayType<'a>,
     T: OffsetSizeTrait,
 {
-    if fill_array.is_none() {
-        let result = string_array
-            .iter()
-            .zip(length_array.iter())
-            .map(|(string, length)| match (string, length) {
-                (Some(string), Some(length)) => {
-                    if length > i32::MAX as i64 {
-                        return exec_err!("lpad requested length {length} too large");
-                    }
+    let array = if fill_array.is_none() {
+        let mut builder: GenericStringBuilder<T> = GenericStringBuilder::new();
 
-                    let length = if length < 0 { 0 } else { length as usize };
-                    if length == 0 {
-                        Ok(Some("".to_string()))
-                    } else {
-                        let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                        if length < graphemes.len() {
-                            Ok(Some(graphemes[..length].concat()))
-                        } else {
-                            let mut s: String = " ".repeat(length - graphemes.len());
-                            s.push_str(string);
-                            Ok(Some(s))
-                        }
-                    }
+        for (string, length) in string_array.iter().zip(length_array.iter()) {
+            if let (Some(string), Some(length)) = (string, length) {
+                if length > i32::MAX as i64 {
+                    return exec_err!("lpad requested length {length} too large");
                 }
-                _ => Ok(None),
-            })
-            .collect::<Result<GenericStringArray<T>>>()?;
 
-        Ok(Arc::new(result) as ArrayRef)
+                let length = if length < 0 { 0 } else { length as usize };
+                if length == 0 {
+                    builder.append_value("");
+                    continue;
+                }
+
+                let graphemes = string.graphemes(true).collect::<Vec<&str>>();
+                if length < graphemes.len() {
+                    builder.append_value(graphemes[..length].concat());
+                } else {
+                    builder.write_str(" ".repeat(length - graphemes.len()).as_str())?;
+                    builder.write_str(string)?;
+                    builder.append_value("");
+                }
+            } else {
+                builder.append_null();
+            }
+        }
+
+        builder.finish()
     } else {
-        let result = string_array
+        let mut builder: GenericStringBuilder<T> = GenericStringBuilder::new();
+
+        for ((string, length), fill) in string_array
             .iter()
             .zip(length_array.iter())
             .zip(fill_array.unwrap().iter())
-            .map(|((string, length), fill)| match (string, length, fill) {
-                (Some(string), Some(length), Some(fill)) => {
-                    if length > i32::MAX as i64 {
-                        return exec_err!("lpad requested length {length} too large");
-                    }
+        {
+            if let (Some(string), Some(length), Some(fill)) = (string, length, fill) {
+                if length > i32::MAX as i64 {
+                    return exec_err!("lpad requested length {length} too large");
+                }
 
-                    let length = if length < 0 { 0 } else { length as usize };
-                    if length == 0 {
-                        Ok(Some("".to_string()))
-                    } else {
-                        let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                        let fill_chars = fill.chars().collect::<Vec<char>>();
-
-                        if length < graphemes.len() {
-                            Ok(Some(graphemes[..length].concat()))
-                        } else if fill_chars.is_empty() {
-                            Ok(Some(string.to_string()))
-                        } else {
-                            let mut s = string.to_string();
-                            let mut char_vector =
-                                Vec::<char>::with_capacity(length - graphemes.len());
-                            for l in 0..length - graphemes.len() {
-                                char_vector
-                                    .push(*fill_chars.get(l % fill_chars.len()).unwrap());
-                            }
-                            s.insert_str(
-                                0,
-                                char_vector.iter().collect::<String>().as_str(),
-                            );
-                            Ok(Some(s))
-                        }
+                let length = if length < 0 { 0 } else { length as usize };
+                if length == 0 {
+                    builder.append_value("");
+                    continue;
+                }
+
+                let graphemes = string.graphemes(true).collect::<Vec<&str>>();
+                let fill_chars = fill.chars().collect::<Vec<char>>();
+
+                if length < graphemes.len() {
+                    builder.append_value(graphemes[..length].concat());
+                } else if fill_chars.is_empty() {
+                    builder.append_value(string);
+                } else {
+                    for l in 0..length - graphemes.len() {
+                        let c = *fill_chars.get(l % fill_chars.len()).unwrap();
+                        builder.write_char(c)?;
                     }
+                    builder.write_str(string)?;
+                    builder.append_value("");
                 }
-                _ => Ok(None),
-            })
-            .collect::<Result<GenericStringArray<T>>>()?;
+            } else {
+                builder.append_null();
+            }
+        }
 
-        Ok(Arc::new(result) as ArrayRef)
-    }
+        builder.finish()
+    };
+
+    Ok(Arc::new(array) as ArrayRef)
 }
 
 trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
     fn iter(&self) -> ArrayIter<Self>;
 }
-impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
+impl<'a, T: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<T> {
     fn iter(&self) -> ArrayIter<Self> {
-        GenericStringArray::<O>::iter(self)
+        GenericStringArray::<T>::iter(self)
     }
 }
 impl<'a> StringArrayType<'a> for &'a StringViewArray {

From a00922b33be59b48d13ca965404b4b00e35d32a9 Mon Sep 17 00:00:00 2001
From: Chojan Shang <psiace@apache.org>
Date: Wed, 14 Aug 2024 14:34:16 -0700
Subject: [PATCH 299/357] Implement native support StringView for substr_index
 (#11974)

Signed-off-by: Chojan Shang <psiace@apache.org>
---
 .../functions/src/unicode/substrindex.rs      | 83 ++++++++++++++-----
 .../sqllogictest/test_files/functions.slt     | 59 +++++++++++++
 .../sqllogictest/test_files/string_view.slt   | 22 +++++
 3 files changed, 144 insertions(+), 20 deletions(-)

diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs
index f8ecab9073c42..6591ee26403aa 100644
--- a/datafusion/functions/src/unicode/substrindex.rs
+++ b/datafusion/functions/src/unicode/substrindex.rs
@@ -18,10 +18,12 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, OffsetSizeTrait, StringBuilder};
-use arrow::datatypes::DataType;
+use arrow::array::{
+    ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait,
+    PrimitiveArray, StringBuilder,
+};
+use arrow::datatypes::{DataType, Int32Type, Int64Type};
 
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
@@ -46,6 +48,7 @@ impl SubstrIndexFunc {
         Self {
             signature: Signature::one_of(
                 vec![
+                    Exact(vec![Utf8View, Utf8View, Int64]),
                     Exact(vec![Utf8, Utf8, Int64]),
                     Exact(vec![LargeUtf8, LargeUtf8, Int64]),
                 ],
@@ -74,15 +77,7 @@ impl ScalarUDFImpl for SubstrIndexFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(substr_index::<i32>, vec![])(args),
-            DataType::LargeUtf8 => {
-                make_scalar_function(substr_index::<i64>, vec![])(args)
-            }
-            other => {
-                exec_err!("Unsupported data type {other:?} for function substr_index")
-            }
-        }
+        make_scalar_function(substr_index, vec![])(args)
     }
 
     fn aliases(&self) -> &[String] {
@@ -95,7 +90,7 @@ impl ScalarUDFImpl for SubstrIndexFunc {
 /// SUBSTRING_INDEX('www.apache.org', '.', 2) = www.apache
 /// SUBSTRING_INDEX('www.apache.org', '.', -2) = apache.org
 /// SUBSTRING_INDEX('www.apache.org', '.', -1) = org
-pub fn substr_index<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn substr_index(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() != 3 {
         return exec_err!(
             "substr_index was called with {} arguments. It requires 3.",
@@ -103,15 +98,63 @@ pub fn substr_index<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
         );
     }
 
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-    let delimiter_array = as_generic_string_array::<T>(&args[1])?;
-    let count_array = as_int64_array(&args[2])?;
+    match args[0].data_type() {
+        DataType::Utf8 => {
+            let string_array = args[0].as_string::<i32>();
+            let delimiter_array = args[1].as_string::<i32>();
+            let count_array: &PrimitiveArray<Int64Type> = args[2].as_primitive();
+            substr_index_general::<Int32Type, _, _>(
+                string_array,
+                delimiter_array,
+                count_array,
+            )
+        }
+        DataType::LargeUtf8 => {
+            let string_array = args[0].as_string::<i64>();
+            let delimiter_array = args[1].as_string::<i64>();
+            let count_array: &PrimitiveArray<Int64Type> = args[2].as_primitive();
+            substr_index_general::<Int64Type, _, _>(
+                string_array,
+                delimiter_array,
+                count_array,
+            )
+        }
+        DataType::Utf8View => {
+            let string_array = args[0].as_string_view();
+            let delimiter_array = args[1].as_string_view();
+            let count_array: &PrimitiveArray<Int64Type> = args[2].as_primitive();
+            substr_index_general::<Int32Type, _, _>(
+                string_array,
+                delimiter_array,
+                count_array,
+            )
+        }
+        other => {
+            exec_err!("Unsupported data type {other:?} for function substr_index")
+        }
+    }
+}
 
+pub fn substr_index_general<
+    'a,
+    T: ArrowPrimitiveType,
+    V: ArrayAccessor<Item = &'a str>,
+    P: ArrayAccessor<Item = i64>,
+>(
+    string_array: V,
+    delimiter_array: V,
+    count_array: P,
+) -> Result<ArrayRef>
+where
+    T::Native: OffsetSizeTrait,
+{
     let mut builder = StringBuilder::new();
-    string_array
-        .iter()
-        .zip(delimiter_array.iter())
-        .zip(count_array.iter())
+    let string_iter = ArrayIter::new(string_array);
+    let delimiter_array_iter = ArrayIter::new(delimiter_array);
+    let count_array_iter = ArrayIter::new(count_array);
+    string_iter
+        .zip(delimiter_array_iter)
+        .zip(count_array_iter)
         .for_each(|((string, delimiter), n)| match (string, delimiter, n) {
             (Some(string), Some(delimiter), Some(n)) => {
                 // In MySQL, these cases will return an empty string.
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index f2f37a59cc2a0..04ab0d76e65f7 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -1042,6 +1042,65 @@ arrow.apache.org 100 arrow.apache.org
 . 3 .
 . 100 .
 
+query I
+SELECT levenshtein(NULL, NULL)
+----
+NULL
+
+# Test substring_index using '.' as delimiter with utf8view
+query TIT
+SELECT str, n, substring_index(arrow_cast(str, 'Utf8View'), '.', n) AS c FROM
+  (VALUES
+    ROW('arrow.apache.org'),
+    ROW('.'),
+    ROW('...'),
+    ROW(NULL)
+  ) AS strings(str),
+  (VALUES
+    ROW(1),
+    ROW(2),
+    ROW(3),
+    ROW(100),
+    ROW(-1),
+    ROW(-2),
+    ROW(-3),
+    ROW(-100)
+  ) AS occurrences(n)
+ORDER BY str DESC, n;
+----
+NULL -100 NULL
+NULL -3 NULL
+NULL -2 NULL
+NULL -1 NULL
+NULL 1 NULL
+NULL 2 NULL
+NULL 3 NULL
+NULL 100 NULL
+arrow.apache.org -100 arrow.apache.org
+arrow.apache.org -3 arrow.apache.org
+arrow.apache.org -2 apache.org
+arrow.apache.org -1 org
+arrow.apache.org 1 arrow
+arrow.apache.org 2 arrow.apache
+arrow.apache.org 3 arrow.apache.org
+arrow.apache.org 100 arrow.apache.org
+... -100 ...
+... -3 ..
+... -2 .
+... -1 (empty)
+... 1 (empty)
+... 2 .
+... 3 ..
+... 100 ...
+. -100 .
+. -3 .
+. -2 .
+. -1 (empty)
+. 1 (empty)
+. 2 .
+. 3 .
+. 100 .
+
 # Test substring_index using 'ac' as delimiter
 query TIT
 SELECT str, n, substring_index(str, 'ac', n) AS c FROM
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 8bc053234e8ca..37ca89cf2dc6b 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -984,6 +984,28 @@ logical_plan
 02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
 03)----TableScan: test projection=[column1_utf8view]
 
+## Ensure no casts for SUBSTRINDEX
+query TT
+EXPLAIN SELECT
+  SUBSTR_INDEX(column1_utf8view, 'a', 1) as c,
+  SUBSTR_INDEX(column1_utf8view, 'a', 2) as c2
+FROM test;
+----
+logical_plan
+01)Projection: substr_index(test.column1_utf8view, Utf8View("a"), Int64(1)) AS c, substr_index(test.column1_utf8view, Utf8View("a"), Int64(2)) AS c2
+02)--TableScan: test projection=[column1_utf8view]
+
+query TT
+SELECT
+  SUBSTR_INDEX(column1_utf8view, 'a', 1) as c,
+  SUBSTR_INDEX(column1_utf8view, 'a', 2) as c2
+FROM test;
+----
+Andrew Andrew
+Xi Xiangpeng
+R Raph
+NULL NULL
+
 ## Ensure no casts on columns for STARTS_WITH
 query TT
 EXPLAIN SELECT

From b9961c30014719818679e6bbce80ed4fa4b09f27 Mon Sep 17 00:00:00 2001
From: kf zheng <100595273+Kev1n8@users.noreply.github.com>
Date: Thu, 15 Aug 2024 05:40:53 +0800
Subject: [PATCH 300/357] Add native stringview support for LEFT (#11976)

* add string view support for left

* add tests for stringview support of LEFT
---
 datafusion/functions/src/unicode/left.rs      | 43 +++++++++++++++----
 .../sqllogictest/test_files/string_view.slt   | 23 ++++++++++
 2 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs
index 7d456f5f1e946..c49784948dd0d 100644
--- a/datafusion/functions/src/unicode/left.rs
+++ b/datafusion/functions/src/unicode/left.rs
@@ -19,10 +19,15 @@ use std::any::Any;
 use std::cmp::Ordering;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{
+    Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
+    OffsetSizeTrait,
+};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use datafusion_common::cast::{
+    as_generic_string_array, as_int64_array, as_string_view_array,
+};
 use datafusion_common::exec_err;
 use datafusion_common::Result;
 use datafusion_expr::TypeSignature::Exact;
@@ -46,7 +51,11 @@ impl LeftFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
+                vec![
+                    Exact(vec![Utf8View, Int64]),
+                    Exact(vec![Utf8, Int64]),
+                    Exact(vec![LargeUtf8, Int64]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -72,9 +81,14 @@ impl ScalarUDFImpl for LeftFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(left::<i32>, vec![])(args),
+            DataType::Utf8 | DataType::Utf8View => {
+                make_scalar_function(left::<i32>, vec![])(args)
+            }
             DataType::LargeUtf8 => make_scalar_function(left::<i64>, vec![])(args),
-            other => exec_err!("Unsupported data type {other:?} for function left"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function left,\
+                expected Utf8View, Utf8 or LargeUtf8."
+            ),
         }
     }
 }
@@ -83,10 +97,23 @@ impl ScalarUDFImpl for LeftFunc {
 /// left('abcde', 2) = 'ab'
 /// The implementation uses UTF-8 code points as characters
 pub fn left<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
     let n_array = as_int64_array(&args[1])?;
-    let result = string_array
-        .iter()
+
+    if args[0].data_type() == &DataType::Utf8View {
+        let string_array = as_string_view_array(&args[0])?;
+        left_impl::<T, _>(string_array, n_array)
+    } else {
+        let string_array = as_generic_string_array::<T>(&args[0])?;
+        left_impl::<T, _>(string_array, n_array)
+    }
+}
+
+fn left_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
+    string_array: V,
+    n_array: &Int64Array,
+) -> Result<ArrayRef> {
+    let iter = ArrayIter::new(string_array);
+    let result = iter
         .zip(n_array.iter())
         .map(|(string, n)| match (string, n) {
             (Some(string), Some(n)) => match n.cmp(&0) {
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 37ca89cf2dc6b..e1d4a96620f8f 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -917,6 +917,29 @@ eng  (empty) ngpeng
 ael  (empty) hael
 NULL NULL    NULL
 
+## Ensure no casts for LEFT
+query TT
+EXPLAIN SELECT
+  LEFT(column1_utf8view, 3) as c2
+FROM test;
+----
+logical_plan
+01)Projection: left(test.column1_utf8view, Int64(3)) AS c2
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test outputs of LEFT
+query TTT
+SELECT
+  LEFT(column1_utf8view, 3) as c1,
+  LEFT(column1_utf8view, 0) as c2,
+  LEFT(column1_utf8view, -3) as c3
+FROM test;
+----
+And  (empty) And
+Xia  (empty) Xiangp
+Rap  (empty) Raph
+NULL NULL    NULL
+
 ## Ensure no casts for RPAD
 ## TODO file ticket
 query TT

From e24a5dd5a508ba605686206c853c914a2d0cc095 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 14 Aug 2024 19:59:28 -0400
Subject: [PATCH 301/357] Improve function documentation (#11996)

---
 datafusion/functions/src/core/mod.rs     | 8 ++++++++
 datafusion/functions/src/crypto/mod.rs   | 1 +
 datafusion/functions/src/datetime/mod.rs | 2 +-
 datafusion/functions/src/encoding/mod.rs | 1 +
 datafusion/functions/src/math/mod.rs     | 1 +
 datafusion/functions/src/regex/mod.rs    | 2 +-
 datafusion/functions/src/string/mod.rs   | 2 +-
 datafusion/functions/src/unicode/mod.rs  | 2 +-
 8 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index 062a4a104d54a..af340930eabce 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -86,6 +86,7 @@ pub mod expr_fn {
     }
 }
 
+/// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         nullif(),
@@ -94,6 +95,13 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         nvl2(),
         arrow_typeof(),
         named_struct(),
+        // Note: most users invoke `get_field` indirectly via field access
+        // syntax like `my_struct_col['field_name']`, which results in a call to
+        // `get_field(my_struct_col, "field_name")`.
+        //
+        // However, it is also exposed directly for use cases such as
+        // serializing / deserializing plans with the field access  desugared to
+        // calls to `get_field`
         get_field(),
         coalesce(),
     ]
diff --git a/datafusion/functions/src/crypto/mod.rs b/datafusion/functions/src/crypto/mod.rs
index 497c1af62a722..46177fc22b601 100644
--- a/datafusion/functions/src/crypto/mod.rs
+++ b/datafusion/functions/src/crypto/mod.rs
@@ -62,6 +62,7 @@ pub mod expr_fn {
     ));
 }
 
+/// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![digest(), md5(), sha224(), sha256(), sha384(), sha512()]
 }
diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs
index a7e9827d6ca69..db4e365267dd2 100644
--- a/datafusion/functions/src/datetime/mod.rs
+++ b/datafusion/functions/src/datetime/mod.rs
@@ -272,7 +272,7 @@ pub mod expr_fn {
     }
 }
 
-///   Return a list of all functions in this package
+/// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         current_date(),
diff --git a/datafusion/functions/src/encoding/mod.rs b/datafusion/functions/src/encoding/mod.rs
index 24e11e5d635f6..48171370ad585 100644
--- a/datafusion/functions/src/encoding/mod.rs
+++ b/datafusion/functions/src/encoding/mod.rs
@@ -37,6 +37,7 @@ pub mod expr_fn {
     ));
 }
 
+/// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![encode(), decode()]
 }
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index 1e41fff289a48..b221fb900cfa3 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -276,6 +276,7 @@ pub mod expr_fn {
     );
 }
 
+/// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         abs(),
diff --git a/datafusion/functions/src/regex/mod.rs b/datafusion/functions/src/regex/mod.rs
index 884db24d9ec85..4ac162290ddb7 100644
--- a/datafusion/functions/src/regex/mod.rs
+++ b/datafusion/functions/src/regex/mod.rs
@@ -65,7 +65,7 @@ pub mod expr_fn {
     }
 }
 
-#[doc = r" Return a list of all functions in this package"]
+/// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<std::sync::Arc<datafusion_expr::ScalarUDF>> {
     vec![regexp_match(), regexp_like(), regexp_replace()]
 }
diff --git a/datafusion/functions/src/string/mod.rs b/datafusion/functions/src/string/mod.rs
index 9a19151a85e26..622802f0142bc 100644
--- a/datafusion/functions/src/string/mod.rs
+++ b/datafusion/functions/src/string/mod.rs
@@ -167,7 +167,7 @@ pub mod expr_fn {
     }
 }
 
-///   Return a list of all functions in this package
+/// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         ascii(),
diff --git a/datafusion/functions/src/unicode/mod.rs b/datafusion/functions/src/unicode/mod.rs
index 9e8c07cd36edb..40915bc9efde8 100644
--- a/datafusion/functions/src/unicode/mod.rs
+++ b/datafusion/functions/src/unicode/mod.rs
@@ -125,7 +125,7 @@ pub mod expr_fn {
     }
 }
 
-///   Return a list of all functions in this package
+/// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         character_length(),

From ea2e7ab6885de734243dffc4642e2742206de5b9 Mon Sep 17 00:00:00 2001
From: Chojan Shang <psiace@apache.org>
Date: Thu, 15 Aug 2024 04:23:01 -0700
Subject: [PATCH 302/357] Implement native support StringView for overlay
 (#11968)

* Implement native support StringView for overlay

Signed-off-by: Chojan Shang <psiace@apache.org>

* Re-write impl of overlay

Signed-off-by: Chojan Shang <psiace@apache.org>

* Minor update

Signed-off-by: Chojan Shang <psiace@apache.org>

* Add more tests

Signed-off-by: Chojan Shang <psiace@apache.org>

---------

Signed-off-by: Chojan Shang <psiace@apache.org>
---
 datafusion/functions/src/string/overlay.rs    | 182 ++++++++++++------
 .../sqllogictest/test_files/functions.slt     |  27 ++-
 .../sqllogictest/test_files/string_view.slt   |  11 +-
 3 files changed, 153 insertions(+), 67 deletions(-)

diff --git a/datafusion/functions/src/string/overlay.rs b/datafusion/functions/src/string/overlay.rs
index 772b041361290..e285bd85b197b 100644
--- a/datafusion/functions/src/string/overlay.rs
+++ b/datafusion/functions/src/string/overlay.rs
@@ -21,7 +21,9 @@ use std::sync::Arc;
 use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use datafusion_common::cast::{
+    as_generic_string_array, as_int64_array, as_string_view_array,
+};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Volatility};
@@ -46,8 +48,10 @@ impl OverlayFunc {
         Self {
             signature: Signature::one_of(
                 vec![
+                    Exact(vec![Utf8View, Utf8View, Int64, Int64]),
                     Exact(vec![Utf8, Utf8, Int64, Int64]),
                     Exact(vec![LargeUtf8, LargeUtf8, Int64, Int64]),
+                    Exact(vec![Utf8View, Utf8View, Int64]),
                     Exact(vec![Utf8, Utf8, Int64]),
                     Exact(vec![LargeUtf8, LargeUtf8, Int64]),
                 ],
@@ -76,54 +80,107 @@ impl ScalarUDFImpl for OverlayFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(overlay::<i32>, vec![])(args),
+            DataType::Utf8View | DataType::Utf8 => {
+                make_scalar_function(overlay::<i32>, vec![])(args)
+            }
             DataType::LargeUtf8 => make_scalar_function(overlay::<i64>, vec![])(args),
             other => exec_err!("Unsupported data type {other:?} for function overlay"),
         }
     }
 }
 
+macro_rules! process_overlay {
+    // For the three-argument case
+    ($string_array:expr, $characters_array:expr, $pos_num:expr) => {{
+        $string_array
+        .iter()
+        .zip($characters_array.iter())
+        .zip($pos_num.iter())
+        .map(|((string, characters), start_pos)| {
+            match (string, characters, start_pos) {
+                (Some(string), Some(characters), Some(start_pos)) => {
+                    let string_len = string.chars().count();
+                    let characters_len = characters.chars().count();
+                    let replace_len = characters_len as i64;
+                    let mut res =
+                        String::with_capacity(string_len.max(characters_len));
+
+                    //as sql replace index start from 1 while string index start from 0
+                    if start_pos > 1 && start_pos - 1 < string_len as i64 {
+                        let start = (start_pos - 1) as usize;
+                        res.push_str(&string[..start]);
+                    }
+                    res.push_str(characters);
+                    // if start + replace_len - 1 >= string_length, just to string end
+                    if start_pos + replace_len - 1 < string_len as i64 {
+                        let end = (start_pos + replace_len - 1) as usize;
+                        res.push_str(&string[end..]);
+                    }
+                    Ok(Some(res))
+                }
+                _ => Ok(None),
+            }
+        })
+        .collect::<Result<GenericStringArray<T>>>()
+    }};
+
+    // For the four-argument case
+    ($string_array:expr, $characters_array:expr, $pos_num:expr, $len_num:expr) => {{
+        $string_array
+        .iter()
+        .zip($characters_array.iter())
+        .zip($pos_num.iter())
+        .zip($len_num.iter())
+        .map(|(((string, characters), start_pos), len)| {
+            match (string, characters, start_pos, len) {
+                (Some(string), Some(characters), Some(start_pos), Some(len)) => {
+                    let string_len = string.chars().count();
+                    let characters_len = characters.chars().count();
+                    let replace_len = len.min(string_len as i64);
+                    let mut res =
+                        String::with_capacity(string_len.max(characters_len));
+
+                    //as sql replace index start from 1 while string index start from 0
+                    if start_pos > 1 && start_pos - 1 < string_len as i64 {
+                        let start = (start_pos - 1) as usize;
+                        res.push_str(&string[..start]);
+                    }
+                    res.push_str(characters);
+                    // if start + replace_len - 1 >= string_length, just to string end
+                    if start_pos + replace_len - 1 < string_len as i64 {
+                        let end = (start_pos + replace_len - 1) as usize;
+                        res.push_str(&string[end..]);
+                    }
+                    Ok(Some(res))
+                }
+                _ => Ok(None),
+            }
+        })
+        .collect::<Result<GenericStringArray<T>>>()
+    }};
+}
+
 /// OVERLAY(string1 PLACING string2 FROM integer FOR integer2)
 /// Replaces a substring of string1 with string2 starting at the integer bit
 /// pgsql overlay('Txxxxas' placing 'hom' from 2 for 4) → Thomas
 /// overlay('Txxxxas' placing 'hom' from 2) -> Thomxas, without for option, str2's len is instead
-pub fn overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let use_string_view = args[0].data_type() == &DataType::Utf8View;
+    if use_string_view {
+        string_view_overlay::<T>(args)
+    } else {
+        string_overlay::<T>(args)
+    }
+}
+
+pub fn string_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         3 => {
             let string_array = as_generic_string_array::<T>(&args[0])?;
             let characters_array = as_generic_string_array::<T>(&args[1])?;
             let pos_num = as_int64_array(&args[2])?;
 
-            let result = string_array
-                .iter()
-                .zip(characters_array.iter())
-                .zip(pos_num.iter())
-                .map(|((string, characters), start_pos)| {
-                    match (string, characters, start_pos) {
-                        (Some(string), Some(characters), Some(start_pos)) => {
-                            let string_len = string.chars().count();
-                            let characters_len = characters.chars().count();
-                            let replace_len = characters_len as i64;
-                            let mut res =
-                                String::with_capacity(string_len.max(characters_len));
-
-                            //as sql replace index start from 1 while string index start from 0
-                            if start_pos > 1 && start_pos - 1 < string_len as i64 {
-                                let start = (start_pos - 1) as usize;
-                                res.push_str(&string[..start]);
-                            }
-                            res.push_str(characters);
-                            // if start + replace_len - 1 >= string_length, just to string end
-                            if start_pos + replace_len - 1 < string_len as i64 {
-                                let end = (start_pos + replace_len - 1) as usize;
-                                res.push_str(&string[end..]);
-                            }
-                            Ok(Some(res))
-                        }
-                        _ => Ok(None),
-                    }
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
+            let result = process_overlay!(string_array, characters_array, pos_num)?;
             Ok(Arc::new(result) as ArrayRef)
         }
         4 => {
@@ -132,37 +189,34 @@ pub fn overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             let pos_num = as_int64_array(&args[2])?;
             let len_num = as_int64_array(&args[3])?;
 
-            let result = string_array
-                .iter()
-                .zip(characters_array.iter())
-                .zip(pos_num.iter())
-                .zip(len_num.iter())
-                .map(|(((string, characters), start_pos), len)| {
-                    match (string, characters, start_pos, len) {
-                        (Some(string), Some(characters), Some(start_pos), Some(len)) => {
-                            let string_len = string.chars().count();
-                            let characters_len = characters.chars().count();
-                            let replace_len = len.min(string_len as i64);
-                            let mut res =
-                                String::with_capacity(string_len.max(characters_len));
-
-                            //as sql replace index start from 1 while string index start from 0
-                            if start_pos > 1 && start_pos - 1 < string_len as i64 {
-                                let start = (start_pos - 1) as usize;
-                                res.push_str(&string[..start]);
-                            }
-                            res.push_str(characters);
-                            // if start + replace_len - 1 >= string_length, just to string end
-                            if start_pos + replace_len - 1 < string_len as i64 {
-                                let end = (start_pos + replace_len - 1) as usize;
-                                res.push_str(&string[end..]);
-                            }
-                            Ok(Some(res))
-                        }
-                        _ => Ok(None),
-                    }
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
+            let result =
+                process_overlay!(string_array, characters_array, pos_num, len_num)?;
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        other => {
+            exec_err!("overlay was called with {other} arguments. It requires 3 or 4.")
+        }
+    }
+}
+
+pub fn string_view_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args.len() {
+        3 => {
+            let string_array = as_string_view_array(&args[0])?;
+            let characters_array = as_string_view_array(&args[1])?;
+            let pos_num = as_int64_array(&args[2])?;
+
+            let result = process_overlay!(string_array, characters_array, pos_num)?;
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        4 => {
+            let string_array = as_string_view_array(&args[0])?;
+            let characters_array = as_string_view_array(&args[1])?;
+            let pos_num = as_int64_array(&args[2])?;
+            let len_num = as_int64_array(&args[3])?;
+
+            let result =
+                process_overlay!(string_array, characters_array, pos_num, len_num)?;
             Ok(Arc::new(result) as ArrayRef)
         }
         other => {
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index 04ab0d76e65f7..9034e5baa4c66 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -925,7 +925,7 @@ SELECT products.* REPLACE (price*2 AS price, product_id+1000 AS product_id) FROM
 1003 OldBrand Product 3 79.98
 1004 OldBrand Product 4 99.98
 
-#overlay tests
+# overlay tests
 statement ok
 CREATE TABLE over_test(
   str TEXT,
@@ -967,6 +967,31 @@ NULL
 Thomxas
 NULL
 
+# overlay tests with utf8view
+query T
+SELECT overlay(arrow_cast(str, 'Utf8View') placing arrow_cast(characters, 'Utf8View') from pos for len) from over_test
+----
+abc
+qwertyasdfg
+ijkz
+Thomas
+NULL
+NULL
+NULL
+NULL
+
+query T
+SELECT overlay(arrow_cast(str, 'Utf8View') placing arrow_cast(characters, 'Utf8View') from pos) from over_test
+----
+abc
+qwertyasdfg
+ijk
+Thomxas
+NULL
+NULL
+Thomxas
+NULL
+
 query I
 SELECT levenshtein('kitten', 'sitting')
 ----
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index e1d4a96620f8f..b1ac84aba7e66 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -818,16 +818,23 @@ logical_plan
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for OVERLAY
-## TODO file ticket
 query TT
 EXPLAIN SELECT
   OVERLAY(column1_utf8view PLACING 'foo' FROM 2 ) as c1
 FROM test;
 ----
 logical_plan
-01)Projection: overlay(CAST(test.column1_utf8view AS Utf8), Utf8("foo"), Int64(2)) AS c1
+01)Projection: overlay(test.column1_utf8view, Utf8View("foo"), Int64(2)) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
+query T
+SELECT OVERLAY(column1_utf8view PLACING 'foo' FROM 2 ) as c1 FROM test;
+----
+Afooew
+Xfoogpeng
+Rfooael
+NULL
+
 ## Ensure no casts for REGEXP_LIKE
 query TT
 EXPLAIN SELECT

From cb3ec77f28a02728f7454ba73d05afbf12b81e67 Mon Sep 17 00:00:00 2001
From: Jax Liu <liugs963@gmail.com>
Date: Thu, 15 Aug 2024 21:40:19 +0800
Subject: [PATCH 303/357] disable `with_create_default_catalog_and_schema` if
 the default catalog exists (#11991)

---
 .../core/src/execution/session_state.rs       | 98 +++++++++++++++++--
 1 file changed, 90 insertions(+), 8 deletions(-)

diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 0a057d6f1417e..e9c876291845a 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -987,8 +987,24 @@ impl SessionStateBuilder {
 
     /// Returns a new [SessionStateBuilder] based on an existing [SessionState]
     /// The session id for the new builder will be unset; all other fields will
-    /// be cloned from what is set in the provided session state
+    /// be cloned from what is set in the provided session state. If the default
+    /// catalog exists in existing session state, the new session state will not
+    /// create default catalog and schema.
     pub fn new_from_existing(existing: SessionState) -> Self {
+        let default_catalog_exist = existing
+            .catalog_list()
+            .catalog(&existing.config.options().catalog.default_catalog)
+            .is_some();
+        // The new `with_create_default_catalog_and_schema` should be false if the default catalog exists
+        let create_default_catalog_and_schema = existing
+            .config
+            .options()
+            .catalog
+            .create_default_catalog_and_schema
+            && !default_catalog_exist;
+        let new_config = existing
+            .config
+            .with_create_default_catalog_and_schema(create_default_catalog_and_schema);
         Self {
             session_id: None,
             analyzer: Some(existing.analyzer),
@@ -1005,7 +1021,7 @@ impl SessionStateBuilder {
             window_functions: Some(existing.window_functions.into_values().collect_vec()),
             serializer_registry: Some(existing.serializer_registry),
             file_formats: Some(existing.file_formats.into_values().collect_vec()),
-            config: Some(existing.config),
+            config: Some(new_config),
             table_options: Some(existing.table_options),
             execution_props: Some(existing.execution_props),
             table_factories: Some(existing.table_factories),
@@ -1801,17 +1817,19 @@ impl<'a> SimplifyInfo for SessionSimplifyProvider<'a> {
 
 #[cfg(test)]
 mod tests {
-    use std::collections::HashMap;
-
+    use super::{SessionContextProvider, SessionStateBuilder};
+    use crate::catalog_common::MemoryCatalogProviderList;
+    use crate::datasource::MemTable;
+    use crate::execution::context::SessionState;
+    use arrow_array::{ArrayRef, Int32Array, RecordBatch, StringArray};
     use arrow_schema::{DataType, Field, Schema};
     use datafusion_common::DFSchema;
     use datafusion_common::Result;
+    use datafusion_execution::config::SessionConfig;
     use datafusion_expr::Expr;
     use datafusion_sql::planner::{PlannerContext, SqlToRel};
-
-    use crate::execution::context::SessionState;
-
-    use super::{SessionContextProvider, SessionStateBuilder};
+    use std::collections::HashMap;
+    use std::sync::Arc;
 
     #[test]
     fn test_session_state_with_default_features() {
@@ -1841,4 +1859,68 @@ mod tests {
 
         assert!(sql_to_expr(&state).is_err())
     }
+
+    #[test]
+    fn test_from_existing() -> Result<()> {
+        fn employee_batch() -> RecordBatch {
+            let name: ArrayRef =
+                Arc::new(StringArray::from_iter_values(["Andy", "Andrew"]));
+            let age: ArrayRef = Arc::new(Int32Array::from(vec![11, 22]));
+            RecordBatch::try_from_iter(vec![("name", name), ("age", age)]).unwrap()
+        }
+        let batch = employee_batch();
+        let table = MemTable::try_new(batch.schema(), vec![vec![batch]])?;
+
+        let session_state = SessionStateBuilder::new()
+            .with_catalog_list(Arc::new(MemoryCatalogProviderList::new()))
+            .build();
+        let table_ref = session_state.resolve_table_ref("employee").to_string();
+        session_state
+            .schema_for_ref(&table_ref)?
+            .register_table("employee".to_string(), Arc::new(table))?;
+
+        let default_catalog = session_state
+            .config
+            .options()
+            .catalog
+            .default_catalog
+            .clone();
+        let default_schema = session_state
+            .config
+            .options()
+            .catalog
+            .default_schema
+            .clone();
+        let is_exist = session_state
+            .catalog_list()
+            .catalog(default_catalog.as_str())
+            .unwrap()
+            .schema(default_schema.as_str())
+            .unwrap()
+            .table_exist("employee");
+        assert!(is_exist);
+        let new_state = SessionStateBuilder::new_from_existing(session_state).build();
+        assert!(new_state
+            .catalog_list()
+            .catalog(default_catalog.as_str())
+            .unwrap()
+            .schema(default_schema.as_str())
+            .unwrap()
+            .table_exist("employee"));
+
+        // if `with_create_default_catalog_and_schema` is disabled, the new one shouldn't create default catalog and schema
+        let disable_create_default =
+            SessionConfig::default().with_create_default_catalog_and_schema(false);
+        let without_default_state = SessionStateBuilder::new()
+            .with_config(disable_create_default)
+            .build();
+        assert!(without_default_state
+            .catalog_list()
+            .catalog(&default_catalog)
+            .is_none());
+        let new_state =
+            SessionStateBuilder::new_from_existing(without_default_state).build();
+        assert!(new_state.catalog_list().catalog(&default_catalog).is_none());
+        Ok(())
+    }
 }

From 4baa901cec5e8326c9392fc9da49416498ae942c Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Thu, 15 Aug 2024 09:57:00 -0700
Subject: [PATCH 304/357] Use tracked-consumers memory pool be the default.
 (#11949)

* feat(11523): set the default memory pool to the tracked-consumer pool

* test(11523): update tests for the OOM message including the top consumers

* chore(11523): remove duplicate wording from OOM messages
---
 datafusion/core/tests/memory_limit/mod.rs     | 35 +++++++------------
 datafusion/execution/src/memory_pool/pool.rs  | 16 ++++-----
 datafusion/execution/src/runtime_env.rs       | 14 ++++++--
 .../physical-plan/src/joins/cross_join.rs     |  3 +-
 .../physical-plan/src/joins/hash_join.rs      | 13 +++----
 .../src/joins/nested_loop_join.rs             |  3 +-
 6 files changed, 38 insertions(+), 46 deletions(-)

diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index 5c712af801922..e6a51eae13372 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -76,8 +76,7 @@ async fn group_by_none() {
     TestCase::new()
         .with_query("select median(request_bytes) from t")
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "AggregateStream",
+            "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: AggregateStream"
         ])
         .with_memory_limit(2_000)
         .run()
@@ -89,8 +88,7 @@ async fn group_by_row_hash() {
     TestCase::new()
         .with_query("select count(*) from t GROUP BY response_bytes")
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "GroupedHashAggregateStream",
+            "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: GroupedHashAggregateStream"
         ])
         .with_memory_limit(2_000)
         .run()
@@ -103,8 +101,7 @@ async fn group_by_hash() {
         // group by dict column
         .with_query("select count(*) from t GROUP BY service, host, pod, container")
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "GroupedHashAggregateStream",
+            "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: GroupedHashAggregateStream"
         ])
         .with_memory_limit(1_000)
         .run()
@@ -117,8 +114,7 @@ async fn join_by_key_multiple_partitions() {
     TestCase::new()
         .with_query("select t1.* from t t1 JOIN t t2 ON t1.service = t2.service")
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "HashJoinInput[0]",
+            "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: HashJoinInput[0]",
         ])
         .with_memory_limit(1_000)
         .with_config(config)
@@ -132,8 +128,7 @@ async fn join_by_key_single_partition() {
     TestCase::new()
         .with_query("select t1.* from t t1 JOIN t t2 ON t1.service = t2.service")
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "HashJoinInput",
+            "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: HashJoinInput",
         ])
         .with_memory_limit(1_000)
         .with_config(config)
@@ -146,8 +141,7 @@ async fn join_by_expression() {
     TestCase::new()
         .with_query("select t1.* from t t1 JOIN t t2 ON t1.service != t2.service")
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "NestedLoopJoinLoad[0]",
+           "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: NestedLoopJoinLoad[0]",
         ])
         .with_memory_limit(1_000)
         .run()
@@ -159,8 +153,7 @@ async fn cross_join() {
     TestCase::new()
         .with_query("select t1.* from t t1 CROSS JOIN t t2")
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "CrossJoinExec",
+            "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: CrossJoinExec",
         ])
         .with_memory_limit(1_000)
         .run()
@@ -216,8 +209,7 @@ async fn symmetric_hash_join() {
             "select t1.* from t t1 JOIN t t2 ON t1.pod = t2.pod AND t1.time = t2.time",
         )
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "SymmetricHashJoinStream",
+            "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: SymmetricHashJoinStream",
         ])
         .with_memory_limit(1_000)
         .with_scenario(Scenario::AccessLogStreaming)
@@ -235,8 +227,7 @@ async fn sort_preserving_merge() {
     // so only a merge is needed
         .with_query("select * from t ORDER BY a ASC NULLS LAST, b ASC NULLS LAST LIMIT 10")
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "SortPreservingMergeExec",
+            "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: SortPreservingMergeExec",
         ])
         // provide insufficient memory to merge
         .with_memory_limit(partition_size / 2)
@@ -313,8 +304,7 @@ async fn sort_spill_reservation() {
 
     test.clone()
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "ExternalSorterMerge", // merging in sort fails
+            "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: ExternalSorterMerge",
         ])
         .with_config(config)
         .run()
@@ -343,8 +333,7 @@ async fn oom_recursive_cte() {
         SELECT * FROM nodes;",
         )
         .with_expected_errors(vec![
-            "Resources exhausted: Failed to allocate additional",
-            "RecursiveQuery",
+            "Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: RecursiveQuery",
         ])
         .with_memory_limit(2_000)
         .run()
@@ -396,7 +385,7 @@ async fn oom_with_tracked_consumer_pool() {
         .with_expected_errors(vec![
             "Failed to allocate additional",
             "for ParquetSink(ArrowColumnWriter)",
-            "Resources exhausted with top memory consumers (across reservations) are: ParquetSink(ArrowColumnWriter)"
+            "Additional allocation failed with top memory consumers (across reservations) as: ParquetSink(ArrowColumnWriter)"
         ])
         .with_memory_pool(Arc::new(
             TrackConsumersPool::new(
diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs
index 4a41602bd961f..d3cd93979bafa 100644
--- a/datafusion/execution/src/memory_pool/pool.rs
+++ b/datafusion/execution/src/memory_pool/pool.rs
@@ -392,7 +392,7 @@ fn provide_top_memory_consumers_to_error_msg(
     error_msg: String,
     top_consumers: String,
 ) -> String {
-    format!("Resources exhausted with top memory consumers (across reservations) are: {}. Error: {}", top_consumers, error_msg)
+    format!("Additional allocation failed with top memory consumers (across reservations) as: {}. Error: {}", top_consumers, error_msg)
 }
 
 #[cfg(test)]
@@ -501,7 +501,7 @@ mod tests {
         // Test: reports if new reservation causes error
         // using the previously set sizes for other consumers
         let mut r5 = MemoryConsumer::new("r5").register(&pool);
-        let expected = "Resources exhausted with top memory consumers (across reservations) are: r1 consumed 50 bytes, r3 consumed 20 bytes, r2 consumed 15 bytes. Error: Failed to allocate additional 150 bytes for r5 with 0 bytes already allocated for this reservation - 5 bytes remain available for the total pool";
+        let expected = "Additional allocation failed with top memory consumers (across reservations) as: r1 consumed 50 bytes, r3 consumed 20 bytes, r2 consumed 15 bytes. Error: Failed to allocate additional 150 bytes for r5 with 0 bytes already allocated for this reservation - 5 bytes remain available for the total pool";
         let res = r5.try_grow(150);
         assert!(
             matches!(
@@ -524,7 +524,7 @@ mod tests {
 
         // Test: see error message when no consumers recorded yet
         let mut r0 = MemoryConsumer::new(same_name).register(&pool);
-        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 100 bytes remain available for the total pool";
+        let expected = "Additional allocation failed with top memory consumers (across reservations) as: foo consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 100 bytes remain available for the total pool";
         let res = r0.try_grow(150);
         assert!(
             matches!(
@@ -543,7 +543,7 @@ mod tests {
         let mut r1 = new_consumer_same_name.clone().register(&pool);
         // TODO: the insufficient_capacity_err() message is per reservation, not per consumer.
         // a followup PR will clarify this message "0 bytes already allocated for this reservation"
-        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 10 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 90 bytes remain available for the total pool";
+        let expected = "Additional allocation failed with top memory consumers (across reservations) as: foo consumed 10 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 90 bytes remain available for the total pool";
         let res = r1.try_grow(150);
         assert!(
             matches!(
@@ -555,7 +555,7 @@ mod tests {
 
         // Test: will accumulate size changes per consumer, not per reservation
         r1.grow(20);
-        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo consumed 30 bytes. Error: Failed to allocate additional 150 bytes for foo with 20 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
+        let expected = "Additional allocation failed with top memory consumers (across reservations) as: foo consumed 30 bytes. Error: Failed to allocate additional 150 bytes for foo with 20 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
         let res = r1.try_grow(150);
         assert!(
             matches!(
@@ -570,7 +570,7 @@ mod tests {
         let consumer_with_same_name_but_different_hash =
             MemoryConsumer::new(same_name).with_can_spill(true);
         let mut r2 = consumer_with_same_name_but_different_hash.register(&pool);
-        let expected = "Resources exhausted with top memory consumers (across reservations) are: foo(can_spill=false) consumed 30 bytes, foo(can_spill=true) consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
+        let expected = "Additional allocation failed with top memory consumers (across reservations) as: foo(can_spill=false) consumed 30 bytes, foo(can_spill=true) consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
         let res = r2.try_grow(150);
         assert!(
             matches!(
@@ -590,7 +590,7 @@ mod tests {
             let r1_consumer = MemoryConsumer::new("r1");
             let mut r1 = r1_consumer.clone().register(&pool);
             r1.grow(20);
-            let expected = "Resources exhausted with top memory consumers (across reservations) are: r1 consumed 20 bytes, r0 consumed 10 bytes. Error: Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
+            let expected = "Additional allocation failed with top memory consumers (across reservations) as: r1 consumed 20 bytes, r0 consumed 10 bytes. Error: Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
             let res = r0.try_grow(150);
             assert!(
                 matches!(
@@ -604,7 +604,7 @@ mod tests {
             // Test: unregister one
             // only the remaining one should be listed
             pool.unregister(&r1_consumer);
-            let expected_consumers = "Resources exhausted with top memory consumers (across reservations) are: r0 consumed 10 bytes";
+            let expected_consumers = "Additional allocation failed with top memory consumers (across reservations) as: r0 consumed 10 bytes";
             let res = r0.try_grow(150);
             assert!(
                 matches!(
diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs
index 25573d915959b..4202465955589 100644
--- a/datafusion/execution/src/runtime_env.rs
+++ b/datafusion/execution/src/runtime_env.rs
@@ -20,16 +20,21 @@
 
 use crate::{
     disk_manager::{DiskManager, DiskManagerConfig},
-    memory_pool::{GreedyMemoryPool, MemoryPool, UnboundedMemoryPool},
+    memory_pool::{
+        GreedyMemoryPool, MemoryPool, TrackConsumersPool, UnboundedMemoryPool,
+    },
     object_store::{DefaultObjectStoreRegistry, ObjectStoreRegistry},
 };
 
 use crate::cache::cache_manager::{CacheManager, CacheManagerConfig};
 use datafusion_common::{DataFusionError, Result};
 use object_store::ObjectStore;
-use std::fmt::{Debug, Formatter};
 use std::path::PathBuf;
 use std::sync::Arc;
+use std::{
+    fmt::{Debug, Formatter},
+    num::NonZeroUsize,
+};
 use url::Url;
 
 #[derive(Clone)]
@@ -213,7 +218,10 @@ impl RuntimeConfig {
     /// Note DataFusion does not yet respect this limit in all cases.
     pub fn with_memory_limit(self, max_memory: usize, memory_fraction: f64) -> Self {
         let pool_size = (max_memory as f64 * memory_fraction) as usize;
-        self.with_memory_pool(Arc::new(GreedyMemoryPool::new(pool_size)))
+        self.with_memory_pool(Arc::new(TrackConsumersPool::new(
+            GreedyMemoryPool::new(pool_size),
+            NonZeroUsize::new(5).unwrap(),
+        )))
     }
 
     /// Use the specified path to create any needed temporary files
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 2840d3f62bf93..0868ee7216659 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -693,9 +693,8 @@ mod tests {
 
         assert_contains!(
             err.to_string(),
-            "External error: Resources exhausted: Failed to allocate additional"
+            "External error: Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: CrossJoinExec"
         );
-        assert_contains!(err.to_string(), "CrossJoinExec");
 
         Ok(())
     }
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 14835f717ea37..e40a07cf62201 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -3821,13 +3821,11 @@ mod tests {
             let stream = join.execute(0, task_ctx)?;
             let err = common::collect(stream).await.unwrap_err();
 
+            // Asserting that operator-level reservation attempting to overallocate
             assert_contains!(
                 err.to_string(),
-                "External error: Resources exhausted: Failed to allocate additional"
+                "External error: Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: HashJoinInput"
             );
-
-            // Asserting that operator-level reservation attempting to overallocate
-            assert_contains!(err.to_string(), "HashJoinInput");
         }
 
         Ok(())
@@ -3902,13 +3900,12 @@ mod tests {
             let stream = join.execute(1, task_ctx)?;
             let err = common::collect(stream).await.unwrap_err();
 
+            // Asserting that stream-level reservation attempting to overallocate
             assert_contains!(
                 err.to_string(),
-                "External error: Resources exhausted: Failed to allocate additional"
-            );
+                "External error: Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: HashJoinInput[1]"
 
-            // Asserting that stream-level reservation attempting to overallocate
-            assert_contains!(err.to_string(), "HashJoinInput[1]");
+            );
         }
 
         Ok(())
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index d69d818331be2..04a025c932882 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -1039,9 +1039,8 @@ mod tests {
 
             assert_contains!(
                 err.to_string(),
-                "External error: Resources exhausted: Failed to allocate additional"
+                "External error: Resources exhausted: Additional allocation failed with top memory consumers (across reservations) as: NestedLoopJoinLoad[0]"
             );
-            assert_contains!(err.to_string(), "NestedLoopJoinLoad[0]");
         }
 
         Ok(())

From 06bcf33efd4d2d554326b71825012bf867fe25d6 Mon Sep 17 00:00:00 2001
From: Bruce Ritchie <bruce.ritchie@veeva.com>
Date: Thu, 15 Aug 2024 18:33:07 -0400
Subject: [PATCH 305/357] Update REVERSE scalar function to support Utf8View
 (#11973)

---
 datafusion/functions/src/unicode/reverse.rs   | 106 ++++++++++--------
 .../sqllogictest/test_files/functions.slt     |  30 +++++
 .../sqllogictest/test_files/string_view.slt   |   3 +-
 3 files changed, 88 insertions(+), 51 deletions(-)

diff --git a/datafusion/functions/src/unicode/reverse.rs b/datafusion/functions/src/unicode/reverse.rs
index 52666cc57059b..da16d3ee37520 100644
--- a/datafusion/functions/src/unicode/reverse.rs
+++ b/datafusion/functions/src/unicode/reverse.rs
@@ -18,12 +18,14 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{
+    Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray,
+    OffsetSizeTrait,
+};
 use arrow::datatypes::DataType;
-
-use datafusion_common::cast::as_generic_string_array;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use DataType::{LargeUtf8, Utf8, Utf8View};
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
@@ -44,7 +46,7 @@ impl ReverseFunc {
         Self {
             signature: Signature::uniform(
                 1,
-                vec![Utf8, LargeUtf8],
+                vec![Utf8View, Utf8, LargeUtf8],
                 Volatility::Immutable,
             ),
         }
@@ -70,8 +72,8 @@ impl ScalarUDFImpl for ReverseFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(reverse::<i32>, vec![])(args),
-            DataType::LargeUtf8 => make_scalar_function(reverse::<i64>, vec![])(args),
+            Utf8 | Utf8View => make_scalar_function(reverse::<i32>, vec![])(args),
+            LargeUtf8 => make_scalar_function(reverse::<i64>, vec![])(args),
             other => {
                 exec_err!("Unsupported data type {other:?} for function reverse")
             }
@@ -83,10 +85,17 @@ impl ScalarUDFImpl for ReverseFunc {
 /// reverse('abcde') = 'edcba'
 /// The implementation uses UTF-8 code points as characters
 pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
+    if args[0].data_type() == &Utf8View {
+        reverse_impl::<T, _>(args[0].as_string_view())
+    } else {
+        reverse_impl::<T, _>(args[0].as_string::<T>())
+    }
+}
 
-    let result = string_array
-        .iter()
+fn reverse_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
+    string_array: V,
+) -> Result<ArrayRef> {
+    let result = ArrayIter::new(string_array)
         .map(|string| string.map(|string: &str| string.chars().rev().collect::<String>()))
         .collect::<GenericStringArray<T>>();
 
@@ -95,8 +104,8 @@ pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 #[cfg(test)]
 mod tests {
-    use arrow::array::{Array, StringArray};
-    use arrow::datatypes::DataType::Utf8;
+    use arrow::array::{Array, LargeStringArray, StringArray};
+    use arrow::datatypes::DataType::{LargeUtf8, Utf8};
 
     use datafusion_common::{Result, ScalarValue};
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
@@ -104,50 +113,49 @@ mod tests {
     use crate::unicode::reverse::ReverseFunc;
     use crate::utils::test::test_function;
 
+    macro_rules! test_reverse {
+        ($INPUT:expr, $EXPECTED:expr) => {
+            test_function!(
+                ReverseFunc::new(),
+                &[ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+
+            test_function!(
+                ReverseFunc::new(),
+                &[ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))],
+                $EXPECTED,
+                &str,
+                LargeUtf8,
+                LargeStringArray
+            );
+
+            test_function!(
+                ReverseFunc::new(),
+                &[ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+        };
+    }
+
     #[test]
     fn test_functions() -> Result<()> {
-        test_function!(
-            ReverseFunc::new(),
-            &[ColumnarValue::Scalar(ScalarValue::from("abcde"))],
-            Ok(Some("edcba")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ReverseFunc::new(),
-            &[ColumnarValue::Scalar(ScalarValue::from("loẅks"))],
-            Ok(Some("sk̈wol")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ReverseFunc::new(),
-            &[ColumnarValue::Scalar(ScalarValue::from("loẅks"))],
-            Ok(Some("sk̈wol")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ReverseFunc::new(),
-            &[ColumnarValue::Scalar(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
+        test_reverse!(Some("abcde".into()), Ok(Some("edcba")));
+        test_reverse!(Some("loẅks".into()), Ok(Some("sk̈wol")));
+        test_reverse!(Some("loẅks".into()), Ok(Some("sk̈wol")));
+        test_reverse!(None, Ok(None));
         #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            ReverseFunc::new(),
-            &[ColumnarValue::Scalar(ScalarValue::from("abcde"))],
+        test_reverse!(
+            Some("abcde".into()),
             internal_err!(
                 "function reverse requires compilation with feature flag: unicode_expressions."
             ),
-            &str,
-            Utf8,
-            StringArray
         );
 
         Ok(())
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index 9034e5baa4c66..e2369c0e9d4ec 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -234,6 +234,16 @@ SELECT reverse('abcde')
 ----
 edcba
 
+query T
+SELECT reverse(arrow_cast('abcde', 'LargeUtf8'))
+----
+edcba
+
+query T
+SELECT reverse(arrow_cast('abcde', 'Utf8View'))
+----
+edcba
+
 query T
 SELECT reverse(arrow_cast('abcde', 'Dictionary(Int32, Utf8)'))
 ----
@@ -244,11 +254,31 @@ SELECT reverse('loẅks')
 ----
 sk̈wol
 
+query T
+SELECT reverse(arrow_cast('loẅks', 'LargeUtf8'))
+----
+sk̈wol
+
+query T
+SELECT reverse(arrow_cast('loẅks', 'Utf8View'))
+----
+sk̈wol
+
 query T
 SELECT reverse(NULL)
 ----
 NULL
 
+query T
+SELECT reverse(arrow_cast(NULL, 'LargeUtf8'))
+----
+NULL
+
+query T
+SELECT reverse(arrow_cast(NULL, 'Utf8View'))
+----
+NULL
+
 query T
 SELECT right('abcde', -2)
 ----
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index b1ac84aba7e66..d056d34533bec 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -890,14 +890,13 @@ logical_plan
 03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for REVERSE
-## TODO file ticket
 query TT
 EXPLAIN SELECT
   REVERSE(column1_utf8view) as c1
 FROM test;
 ----
 logical_plan
-01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1
+01)Projection: reverse(test.column1_utf8view) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
 

From 41f6dd92756f7b2897a5bda2f7efb3b236245425 Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Fri, 16 Aug 2024 06:35:47 +0800
Subject: [PATCH 306/357] Support partial aggregation skip for boolean
 functions (#11847)

* partial aggr for bool_*()

* Use null filter
---
 .../aggregate/groups_accumulator/bool_op.rs   |  19 +++
 .../test_files/aggregate_skip_partial.slt     | 114 ++++++++++++++++++
 2 files changed, 133 insertions(+)

diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs
index be2b5e48a8db9..f4b4c0c932150 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs
@@ -17,6 +17,7 @@
 
 use std::sync::Arc;
 
+use crate::aggregate::groups_accumulator::nulls::filtered_null_mask;
 use arrow::array::{ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder};
 use arrow::buffer::BooleanBuffer;
 use datafusion_common::Result;
@@ -135,4 +136,22 @@ where
         // capacity is in bits, so convert to bytes
         self.values.capacity() / 8 + self.null_state.size()
     }
+
+    fn convert_to_state(
+        &self,
+        values: &[ArrayRef],
+        opt_filter: Option<&BooleanArray>,
+    ) -> Result<Vec<ArrayRef>> {
+        let values = values[0].as_boolean().clone();
+
+        let values_null_buffer_filtered = filtered_null_mask(opt_filter, &values);
+        let (values_buf, _) = values.into_parts();
+        let values_filtered = BooleanArray::new(values_buf, values_null_buffer_filtered);
+
+        Ok(vec![Arc::new(values_filtered)])
+    }
+
+    fn supports_convert_to_state(&self) -> bool {
+        true
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
index ba378f4230f89..ab1c7e78f1ffc 100644
--- a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
+++ b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
@@ -40,6 +40,22 @@ STORED AS CSV
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
 OPTIONS ('format.has_header' 'true');
 
+# Table to test `bool_and()`, `bool_or()` aggregate functions
+statement ok
+CREATE TABLE aggregate_test_100_bool (
+  v1 VARCHAR NOT NULL,
+  v2 BOOLEAN,
+  v3 BOOLEAN
+);
+
+statement ok
+INSERT INTO aggregate_test_100_bool
+SELECT
+  c1 as v1,
+  CASE WHEN c2 > 3 THEN TRUE WHEN c2 > 1 THEN FALSE ELSE NULL END as v2,
+  CASE WHEN c1='a' OR c1='b' THEN TRUE WHEN c1='c' OR c1='d' THEN FALSE ELSE NULL END as v3
+FROM aggregate_test_100;
+
 # Prepare settings to skip partial aggregation from the beginning
 statement ok
 set datafusion.execution.skip_partial_aggregation_probe_rows_threshold = 0;
@@ -117,6 +133,33 @@ GROUP BY 1, 2 ORDER BY 1 LIMIT 5;
 -2117946883 d -2117946883 NULL NULL NULL
 -2098805236 c -2098805236 NULL NULL NULL
 
+# FIXME: add bool_and(v3) column when issue fixed
+# ISSUE https://github.com/apache/datafusion/issues/11846
+query TBBB rowsort
+select v1, bool_or(v2), bool_and(v2), bool_or(v3)
+from aggregate_test_100_bool
+group by v1
+----
+a true false true
+b true false true
+c true false false
+d true false false
+e true false NULL
+
+query TBBB rowsort
+select v1,
+      bool_or(v2) FILTER (WHERE v1 = 'a' OR v1 = 'c' OR v1 = 'e'),
+      bool_or(v2) FILTER (WHERE v2 = false),
+      bool_or(v2) FILTER (WHERE v2 = NULL)
+from aggregate_test_100_bool
+group by v1
+----
+a true false NULL
+b NULL false NULL
+c true false NULL
+d NULL false NULL
+e true false NULL
+
 # Prepare settings to always skip aggregation after couple of batches
 statement ok
 set datafusion.execution.skip_partial_aggregation_probe_rows_threshold = 10;
@@ -223,6 +266,32 @@ c 2.666666666667 0.425241138254
 d 2.444444444444 0.541519476308
 e 3 0.505440263521
 
+# FIXME: add bool_and(v3) column when issue fixed
+# ISSUE https://github.com/apache/datafusion/issues/11846
+query TBBB rowsort
+select v1, bool_or(v2), bool_and(v2), bool_or(v3)
+from aggregate_test_100_bool
+group by v1
+----
+a true false true
+b true false true
+c true false false
+d true false false
+e true false NULL
+
+query TBBB rowsort
+select v1,
+      bool_or(v2) FILTER (WHERE v1 = 'a' OR v1 = 'c' OR v1 = 'e'),
+      bool_or(v2) FILTER (WHERE v2 = false),
+      bool_or(v2) FILTER (WHERE v2 = NULL)
+from aggregate_test_100_bool
+group by v1
+----
+a true false NULL
+b NULL false NULL
+c true false NULL
+d NULL false NULL
+e true false NULL
 
 # Enabling PG dialect for filtered aggregates tests
 statement ok
@@ -377,3 +446,48 @@ ORDER BY i;
 
 statement ok
 DROP TABLE decimal_table;
+
+# Extra tests for 'bool_*()' edge cases
+statement ok
+set datafusion.execution.skip_partial_aggregation_probe_rows_threshold = 0;
+
+statement ok
+set datafusion.execution.skip_partial_aggregation_probe_ratio_threshold = 0.0;
+
+statement ok
+set datafusion.execution.target_partitions = 1;
+
+statement ok
+set datafusion.execution.batch_size = 1;
+
+statement ok
+create table bool_aggregate_functions (
+  c1 boolean not null,
+  c2 boolean not null,
+  c3 boolean not null,
+  c4 boolean not null,
+  c5 boolean,
+  c6 boolean,
+  c7 boolean,
+  c8 boolean
+)
+as values
+  (true, true, false, false, true, true, null, null),
+  (true, false, true, false, false, null, false, null),
+  (true, true, false, false, null, true, false, null);
+
+query BBBBBBBB
+SELECT bool_and(c1), bool_and(c2), bool_and(c3), bool_and(c4), bool_and(c5), bool_and(c6), bool_and(c7), bool_and(c8) FROM bool_aggregate_functions
+----
+true false false false false true false NULL
+
+statement ok
+set datafusion.execution.skip_partial_aggregation_probe_rows_threshold = 2;
+
+query BBBBBBBB
+SELECT bool_and(c1), bool_and(c2), bool_and(c3), bool_and(c4), bool_and(c5), bool_and(c6), bool_and(c7), bool_and(c8) FROM bool_aggregate_functions
+----
+true false false false false true false NULL
+
+statement ok
+DROP TABLE aggregate_test_100_bool

From 6b73c4f8660fc722d80dec6918faaccec43be8dd Mon Sep 17 00:00:00 2001
From: WeblWabl <devandbenz@gmail.com>
Date: Thu, 15 Aug 2024 17:35:57 -0500
Subject: [PATCH 307/357] feat/11953: Support StringView for TRANSLATE() fn
 (#11967)

* feat/11953: Support StringView for TRANSLATE() fn

Signed-off-by: Devan <devandbenz@gmail.com>

* formatting

Signed-off-by: Devan <devandbenz@gmail.com>

* fixes internal error for GenericByteArray cast

Signed-off-by: Devan <devandbenz@gmail.com>

* adds additional TRANSLATE test

Signed-off-by: Devan <devandbenz@gmail.com>

* adds additional TRANSLATE test

Signed-off-by: Devan <devandbenz@gmail.com>

* rm unnecessary generic

Signed-off-by: Devan <devandbenz@gmail.com>

* cleanup + fix typo

Signed-off-by: Devan <devandbenz@gmail.com>

* cleanup + fix typo

Signed-off-by: Devan <devandbenz@gmail.com>

* adds some additional testing to sqllogictests for TRANSLATE string_view

Signed-off-by: Devan <devandbenz@gmail.com>

---------

Signed-off-by: Devan <devandbenz@gmail.com>
---
 datafusion/functions/src/unicode/translate.rs | 70 +++++++++++++------
 .../sqllogictest/test_files/string_view.slt   | 40 ++++++++++-
 2 files changed, 88 insertions(+), 22 deletions(-)

diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs
index 5f64d8875bf50..a42b9c6cb8578 100644
--- a/datafusion/functions/src/unicode/translate.rs
+++ b/datafusion/functions/src/unicode/translate.rs
@@ -18,18 +18,18 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{
+    ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, OffsetSizeTrait,
+};
 use arrow::datatypes::DataType;
 use hashbrown::HashMap;
 use unicode_segmentation::UnicodeSegmentation;
 
-use datafusion_common::cast::as_generic_string_array;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
 #[derive(Debug)]
 pub struct TranslateFunc {
     signature: Signature,
@@ -46,7 +46,10 @@ impl TranslateFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Utf8, Utf8])],
+                vec![
+                    Exact(vec![Utf8View, Utf8, Utf8]),
+                    Exact(vec![Utf8, Utf8, Utf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -71,27 +74,54 @@ impl ScalarUDFImpl for TranslateFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(translate::<i32>, vec![])(args),
-            DataType::LargeUtf8 => make_scalar_function(translate::<i64>, vec![])(args),
-            other => {
-                exec_err!("Unsupported data type {other:?} for function translate")
-            }
+        make_scalar_function(invoke_translate, vec![])(args)
+    }
+}
+
+fn invoke_translate(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args[0].data_type() {
+        DataType::Utf8View => {
+            let string_array = args[0].as_string_view();
+            let from_array = args[1].as_string::<i32>();
+            let to_array = args[2].as_string::<i32>();
+            translate::<i32, _, _>(string_array, from_array, to_array)
+        }
+        DataType::Utf8 => {
+            let string_array = args[0].as_string::<i32>();
+            let from_array = args[1].as_string::<i32>();
+            let to_array = args[2].as_string::<i32>();
+            translate::<i32, _, _>(string_array, from_array, to_array)
+        }
+        DataType::LargeUtf8 => {
+            let string_array = args[0].as_string::<i64>();
+            let from_array = args[1].as_string::<i64>();
+            let to_array = args[2].as_string::<i64>();
+            translate::<i64, _, _>(string_array, from_array, to_array)
+        }
+        other => {
+            exec_err!("Unsupported data type {other:?} for function translate")
         }
     }
 }
 
 /// Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.
 /// translate('12345', '143', 'ax') = 'a2x5'
-fn translate<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-    let from_array = as_generic_string_array::<T>(&args[1])?;
-    let to_array = as_generic_string_array::<T>(&args[2])?;
-
-    let result = string_array
-        .iter()
-        .zip(from_array.iter())
-        .zip(to_array.iter())
+fn translate<'a, T: OffsetSizeTrait, V, B>(
+    string_array: V,
+    from_array: B,
+    to_array: B,
+) -> Result<ArrayRef>
+where
+    V: ArrayAccessor<Item = &'a str>,
+    B: ArrayAccessor<Item = &'a str>,
+{
+    let string_array_iter = ArrayIter::new(string_array);
+    let from_array_iter = ArrayIter::new(from_array);
+    let to_array_iter = ArrayIter::new(to_array);
+
+    let result = string_array_iter
+        .zip(from_array_iter)
+        .zip(to_array_iter)
         .map(|((string, from), to)| match (string, from, to) {
             (Some(string), Some(from), Some(to)) => {
                 // create a hashmap of [char, index] to change from O(n) to O(1) for from list
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index d056d34533bec..591bccfb936b4 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -425,6 +425,43 @@ logical_plan
 01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1, starts_with(test.column1_utf8view, Utf8View("")) AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
 02)--TableScan: test projection=[column1_utf8view]
 
+### Test TRANSLATE
+
+# Should run TRANSLATE using utf8view column successfully
+query T
+SELECT
+  TRANSLATE(column1_utf8view, 'foo', 'bar') as c
+FROM test;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
+# Should run TRANSLATE using utf8 column successfully
+query T
+SELECT
+  TRANSLATE(column1_utf8, 'foo', 'bar') as c
+FROM test;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
+# Should run TRANSLATE using large_utf8 column successfully
+query T
+SELECT
+  TRANSLATE(column1_large_utf8, 'foo', 'bar') as c
+FROM test;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
+
+
 ### Initcap
 
 query TT
@@ -1047,14 +1084,13 @@ logical_plan
 02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for TRANSLATE
-## TODO file ticket
 query TT
 EXPLAIN SELECT
   TRANSLATE(column1_utf8view, 'foo', 'bar') as c
 FROM test;
 ----
 logical_plan
-01)Projection: translate(CAST(test.column1_utf8view AS Utf8), Utf8("foo"), Utf8("bar")) AS c
+01)Projection: translate(test.column1_utf8view, Utf8("foo"), Utf8("bar")) AS c
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for FIND_IN_SET

From c1fb989aedfa620eed9b2ff249320978a59c4043 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Thu, 15 Aug 2024 15:36:05 -0700
Subject: [PATCH 308/357] Update SPLIT_PART scalar function to support Utf8View
 (#11975)

---
 datafusion/functions/src/string/split_part.rs | 128 +++++++++++++-----
 .../sqllogictest/test_files/functions.slt     |  32 +++++
 .../sqllogictest/test_files/string_view.slt   |   5 +-
 3 files changed, 128 insertions(+), 37 deletions(-)

diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs
index d6f7bb4a4d4a9..19721f0fad28d 100644
--- a/datafusion/functions/src/string/split_part.rs
+++ b/datafusion/functions/src/string/split_part.rs
@@ -21,7 +21,9 @@ use std::sync::Arc;
 use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use datafusion_common::cast::{
+    as_generic_string_array, as_int64_array, as_string_view_array,
+};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Volatility};
@@ -46,7 +48,12 @@ impl SplitPartFunc {
         Self {
             signature: Signature::one_of(
                 vec![
+                    Exact(vec![Utf8View, Utf8View, Int64]),
+                    Exact(vec![Utf8View, Utf8, Int64]),
+                    Exact(vec![Utf8View, LargeUtf8, Int64]),
+                    Exact(vec![Utf8, Utf8View, Int64]),
                     Exact(vec![Utf8, Utf8, Int64]),
+                    Exact(vec![LargeUtf8, Utf8View, Int64]),
                     Exact(vec![LargeUtf8, Utf8, Int64]),
                     Exact(vec![Utf8, LargeUtf8, Int64]),
                     Exact(vec![LargeUtf8, LargeUtf8, Int64]),
@@ -75,50 +82,101 @@ impl ScalarUDFImpl for SplitPartFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(split_part::<i32>, vec![])(args),
-            DataType::LargeUtf8 => make_scalar_function(split_part::<i64>, vec![])(args),
-            other => {
-                exec_err!("Unsupported data type {other:?} for function split_part")
+        match (args[0].data_type(), args[1].data_type()) {
+            (
+                DataType::Utf8 | DataType::Utf8View,
+                DataType::Utf8 | DataType::Utf8View,
+            ) => make_scalar_function(split_part::<i32, i32>, vec![])(args),
+            (DataType::LargeUtf8, DataType::LargeUtf8) => {
+                make_scalar_function(split_part::<i64, i64>, vec![])(args)
             }
+            (_, DataType::LargeUtf8) => {
+                make_scalar_function(split_part::<i32, i64>, vec![])(args)
+            }
+            (DataType::LargeUtf8, _) => {
+                make_scalar_function(split_part::<i64, i32>, vec![])(args)
+            }
+            (first_type, second_type) => exec_err!(
+                "unsupported first type {} and second type {} for split_part function",
+                first_type,
+                second_type
+            ),
         }
     }
 }
 
+macro_rules! process_split_part {
+    ($string_array: expr, $delimiter_array: expr, $n_array: expr) => {{
+        let result = $string_array
+            .iter()
+            .zip($delimiter_array.iter())
+            .zip($n_array.iter())
+            .map(|((string, delimiter), n)| match (string, delimiter, n) {
+                (Some(string), Some(delimiter), Some(n)) => {
+                    let split_string: Vec<&str> = string.split(delimiter).collect();
+                    let len = split_string.len();
+
+                    let index = match n.cmp(&0) {
+                        std::cmp::Ordering::Less => len as i64 + n,
+                        std::cmp::Ordering::Equal => {
+                            return exec_err!("field position must not be zero");
+                        }
+                        std::cmp::Ordering::Greater => n - 1,
+                    } as usize;
+
+                    if index < len {
+                        Ok(Some(split_string[index]))
+                    } else {
+                        Ok(Some(""))
+                    }
+                }
+                _ => Ok(None),
+            })
+            .collect::<Result<GenericStringArray<StringLen>>>()?;
+        Ok(Arc::new(result) as ArrayRef)
+    }};
+}
+
 /// Splits string at occurrences of delimiter and returns the n'th field (counting from one).
 /// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def'
-fn split_part<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-    let delimiter_array = as_generic_string_array::<T>(&args[1])?;
+fn split_part<StringLen: OffsetSizeTrait, DelimiterLen: OffsetSizeTrait>(
+    args: &[ArrayRef],
+) -> Result<ArrayRef> {
     let n_array = as_int64_array(&args[2])?;
-    let result = string_array
-        .iter()
-        .zip(delimiter_array.iter())
-        .zip(n_array.iter())
-        .map(|((string, delimiter), n)| match (string, delimiter, n) {
-            (Some(string), Some(delimiter), Some(n)) => {
-                let split_string: Vec<&str> = string.split(delimiter).collect();
-                let len = split_string.len();
-
-                let index = match n.cmp(&0) {
-                    std::cmp::Ordering::Less => len as i64 + n,
-                    std::cmp::Ordering::Equal => {
-                        return exec_err!("field position must not be zero");
-                    }
-                    std::cmp::Ordering::Greater => n - 1,
-                } as usize;
-
-                if index < len {
-                    Ok(Some(split_string[index]))
-                } else {
-                    Ok(Some(""))
+    match (args[0].data_type(), args[1].data_type()) {
+        (DataType::Utf8View, _) => {
+            let string_array = as_string_view_array(&args[0])?;
+            match args[1].data_type() {
+                DataType::Utf8View => {
+                    let delimiter_array = as_string_view_array(&args[1])?;
+                    process_split_part!(string_array, delimiter_array, n_array)
+                }
+                _ => {
+                    let delimiter_array =
+                        as_generic_string_array::<DelimiterLen>(&args[1])?;
+                    process_split_part!(string_array, delimiter_array, n_array)
                 }
             }
-            _ => Ok(None),
-        })
-        .collect::<Result<GenericStringArray<T>>>()?;
-
-    Ok(Arc::new(result) as ArrayRef)
+        }
+        (_, DataType::Utf8View) => {
+            let delimiter_array = as_string_view_array(&args[1])?;
+            match args[0].data_type() {
+                DataType::Utf8View => {
+                    let string_array = as_string_view_array(&args[0])?;
+                    process_split_part!(string_array, delimiter_array, n_array)
+                }
+                _ => {
+                    let string_array = as_generic_string_array::<StringLen>(&args[0])?;
+                    process_split_part!(string_array, delimiter_array, n_array)
+                }
+            }
+        }
+        (_, _) => {
+            let string_array = as_generic_string_array::<StringLen>(&args[0])?;
+            let delimiter_array = as_generic_string_array::<DelimiterLen>(&args[1])?;
+            process_split_part!(string_array, delimiter_array, n_array)
+        }
+    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index e2369c0e9d4ec..cb592fdda0c88 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -846,6 +846,38 @@ SELECT split_part(arrow_cast('foo_bar', 'Dictionary(Int32, Utf8)'), '_', 2)
 ----
 bar
 
+# test largeutf8, utf8view for split_part
+query T
+SELECT split_part(arrow_cast('large_apple_large_orange_large_banana', 'LargeUtf8'), '_', 3)
+----
+large
+
+query T
+SELECT split_part(arrow_cast('view_apple_view_orange_view_banana', 'Utf8View'), '_', 3);
+----
+view
+
+query T
+SELECT split_part('test_large_split_large_case', arrow_cast('_large', 'LargeUtf8'), 2)
+----
+_split
+
+query T
+SELECT split_part(arrow_cast('huge_large_apple_large_orange_large_banana', 'LargeUtf8'), arrow_cast('_', 'Utf8View'), 2)
+----
+large
+
+query T
+SELECT split_part(arrow_cast('view_apple_view_large_banana', 'Utf8View'), arrow_cast('_large', 'LargeUtf8'), 2)
+----
+_banana
+
+query T
+SELECT split_part(NULL, '_', 2)
+----
+NULL
+
+
 query B
 SELECT starts_with('foobar', 'foo')
 ----
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 591bccfb936b4..e094bcaf1b5d3 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -1017,11 +1017,12 @@ logical_plan
 ## TODO file ticket
 query TT
 EXPLAIN SELECT
-  SPLIT_PART(column1_utf8view, 'f', 1) as c
+  SPLIT_PART(column1_utf8view, 'f', 1) as c1,
+  SPLIT_PART('testtesttest',column1_utf8view, 1) as c2
 FROM test;
 ----
 logical_plan
-01)Projection: split_part(CAST(test.column1_utf8view AS Utf8), Utf8("f"), Int64(1)) AS c
+01)Projection: split_part(test.column1_utf8view, Utf8("f"), Int64(1)) AS c1, split_part(Utf8("testtesttest"), test.column1_utf8view, Int64(1)) AS c2
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for STRPOS

From 36158b6e6e39b90cd2689f5c521d2d6964427793 Mon Sep 17 00:00:00 2001
From: Tai Le Manh <49281946+tlm365@users.noreply.github.com>
Date: Fri, 16 Aug 2024 05:36:49 +0700
Subject: [PATCH 309/357] Handle arguments checking of `min`/`max` function to
 avoid crashes (#12016)

* Handle arguments checking of min/max function to avoid crashes

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>

* Fix code format error

---------

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>
---
 datafusion/functions-aggregate/src/min_max.rs | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
index f9a08631bfb9d..4dcd5ac0e9515 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -48,7 +48,9 @@ use arrow::datatypes::{
     Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
 use arrow_schema::IntervalUnit;
-use datafusion_common::{downcast_value, internal_err, DataFusionError, Result};
+use datafusion_common::{
+    downcast_value, exec_err, internal_err, DataFusionError, Result,
+};
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use std::fmt::Debug;
 
@@ -68,7 +70,12 @@ use std::ops::Deref;
 
 fn get_min_max_result_type(input_types: &[DataType]) -> Result<Vec<DataType>> {
     // make sure that the input types only has one element.
-    assert_eq!(input_types.len(), 1);
+    if input_types.len() != 1 {
+        return exec_err!(
+            "min/max was called with {} arguments. It requires only 1.",
+            input_types.len()
+        );
+    }
     // min and max support the dictionary data type
     // unpack the dictionary to get the value
     match &input_types[0] {

From 19ad53d95ca9bf218113e1bb812e7c4b9ef601e6 Mon Sep 17 00:00:00 2001
From: HuSen <husen.xjtu@gmail.com>
Date: Fri, 16 Aug 2024 06:44:50 +0800
Subject: [PATCH 310/357] Fix: support NULL input for regular expression
 comparison operations (#11985)

---
 .../expr-common/src/type_coercion/binary.rs   |  12 ++
 .../physical-expr/src/expressions/binary.rs   | 105 ++++++++++++++++++
 datafusion/sqllogictest/test_files/regexp.slt |  60 ++++++++++
 3 files changed, 177 insertions(+)

diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 251ac6cb8c0e2..fd97f9af1328a 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -1048,11 +1048,23 @@ pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
         .or_else(|| null_coercion(lhs_type, rhs_type))
 }
 
+/// coercion rules for regular expression comparison operations with NULL input.
+fn regex_null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+    match (lhs_type, rhs_type) {
+        (DataType::Null, Utf8View | Utf8 | LargeUtf8) => Some(rhs_type.clone()),
+        (Utf8View | Utf8 | LargeUtf8, DataType::Null) => Some(lhs_type.clone()),
+        (DataType::Null, DataType::Null) => Some(Utf8),
+        _ => None,
+    }
+}
+
 /// coercion rules for regular expression comparison operations.
 /// This is a union of string coercion rules and dictionary coercion rules
 pub fn regex_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     string_coercion(lhs_type, rhs_type)
         .or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
+        .or_else(|| regex_null_coercion(lhs_type, rhs_type))
 }
 
 /// Checks if the TimeUnit associated with a Time32 or Time64 type is consistent,
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index 347a5d82dbecd..06f54481a6faf 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -2498,6 +2498,111 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn regex_with_nulls() -> Result<()> {
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Utf8, true),
+            Field::new("b", DataType::Utf8, true),
+        ]);
+        let a = Arc::new(StringArray::from(vec![
+            Some("abc"),
+            None,
+            Some("abc"),
+            None,
+            Some("abc"),
+        ])) as ArrayRef;
+        let b = Arc::new(StringArray::from(vec![
+            Some("^a"),
+            Some("^A"),
+            None,
+            None,
+            Some("^(b|c)"),
+        ])) as ArrayRef;
+
+        let regex_expected =
+            BooleanArray::from(vec![Some(true), None, None, None, Some(false)]);
+        let regex_not_expected =
+            BooleanArray::from(vec![Some(false), None, None, None, Some(true)]);
+        apply_logic_op(
+            &Arc::new(schema.clone()),
+            &a,
+            &b,
+            Operator::RegexMatch,
+            regex_expected.clone(),
+        )?;
+        apply_logic_op(
+            &Arc::new(schema.clone()),
+            &a,
+            &b,
+            Operator::RegexIMatch,
+            regex_expected.clone(),
+        )?;
+        apply_logic_op(
+            &Arc::new(schema.clone()),
+            &a,
+            &b,
+            Operator::RegexNotMatch,
+            regex_not_expected.clone(),
+        )?;
+        apply_logic_op(
+            &Arc::new(schema),
+            &a,
+            &b,
+            Operator::RegexNotIMatch,
+            regex_not_expected.clone(),
+        )?;
+
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::LargeUtf8, true),
+            Field::new("b", DataType::LargeUtf8, true),
+        ]);
+        let a = Arc::new(LargeStringArray::from(vec![
+            Some("abc"),
+            None,
+            Some("abc"),
+            None,
+            Some("abc"),
+        ])) as ArrayRef;
+        let b = Arc::new(LargeStringArray::from(vec![
+            Some("^a"),
+            Some("^A"),
+            None,
+            None,
+            Some("^(b|c)"),
+        ])) as ArrayRef;
+
+        apply_logic_op(
+            &Arc::new(schema.clone()),
+            &a,
+            &b,
+            Operator::RegexMatch,
+            regex_expected.clone(),
+        )?;
+        apply_logic_op(
+            &Arc::new(schema.clone()),
+            &a,
+            &b,
+            Operator::RegexIMatch,
+            regex_expected.clone(),
+        )?;
+        apply_logic_op(
+            &Arc::new(schema.clone()),
+            &a,
+            &b,
+            Operator::RegexNotMatch,
+            regex_not_expected.clone(),
+        )?;
+        apply_logic_op(
+            &Arc::new(schema),
+            &a,
+            &b,
+            Operator::RegexNotIMatch,
+            regex_not_expected.clone(),
+        )?;
+
+        Ok(())
+    }
+
     #[test]
     fn or_with_nulls_op() -> Result<()> {
         let schema = Schema::new(vec![
diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt
index 149ad7f6fdcd2..22322d79ccfe8 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -230,6 +230,66 @@ SELECT regexp_match('aaa-555', '.*-(\d*)');
 ----
 [555]
 
+query B
+select 'abc' ~ null;
+----
+NULL
+
+query B
+select null ~ null;
+----
+NULL
+
+query B
+select null ~ 'abc';
+----
+NULL
+
+query B
+select 'abc' ~* null;
+----
+NULL
+
+query B
+select null ~* null;
+----
+NULL
+
+query B
+select null ~* 'abc';
+----
+NULL
+
+query B
+select 'abc' !~ null;
+----
+NULL
+
+query B
+select null !~ null;
+----
+NULL
+
+query B
+select null !~ 'abc';
+----
+NULL
+
+query B
+select 'abc' !~* null;
+----
+NULL
+
+query B
+select null !~* null;
+----
+NULL
+
+query B
+select null !~* 'abc';
+----
+NULL
+
 #
 # regexp_replace tests
 #

From 9d1cf74aa7032fd5cfd8ebd0e3860ccf9ea5f5e8 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Thu, 15 Aug 2024 19:05:44 -0400
Subject: [PATCH 311/357] Remove physical sort parameters on aggregate window
 functions (#12009)

* Remove order_by on aggregate window functions since that operation is handled by the window function

* Add unit test for window functions using udaf with ordering

* Resolve clippy warning
---
 datafusion/core/src/dataframe/mod.rs        | 89 ++++++++++++++++++++-
 datafusion/physical-plan/src/windows/mod.rs |  1 -
 2 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 25a8d1c87f004..3705873ce3bc9 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1703,13 +1703,16 @@ mod tests {
     use arrow::array::{self, Int32Array};
     use datafusion_common::{Constraint, Constraints, ScalarValue};
     use datafusion_common_runtime::SpawnedTask;
+    use datafusion_expr::expr::WindowFunction;
     use datafusion_expr::{
         cast, create_udf, expr, lit, BuiltInWindowFunction, ExprFunctionExt,
-        ScalarFunctionImplementation, Volatility, WindowFunctionDefinition,
+        ScalarFunctionImplementation, Volatility, WindowFrame, WindowFrameBound,
+        WindowFrameUnits, WindowFunctionDefinition,
     };
     use datafusion_functions_aggregate::expr_fn::{array_agg, count_distinct};
     use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
+    use sqlparser::ast::NullTreatment;
 
     // Get string representation of the plan
     async fn assert_physical_plan(df: &DataFrame, expected: Vec<&str>) {
@@ -2355,6 +2358,90 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn window_using_aggregates() -> Result<()> {
+        // build plan using DataFrame API
+        let df = test_table().await?.filter(col("c1").eq(lit("a")))?;
+        let mut aggr_expr = vec![
+            (
+                datafusion_functions_aggregate::first_last::first_value_udaf(),
+                "first_value",
+            ),
+            (
+                datafusion_functions_aggregate::first_last::last_value_udaf(),
+                "last_val",
+            ),
+            (
+                datafusion_functions_aggregate::approx_distinct::approx_distinct_udaf(),
+                "approx_distinct",
+            ),
+            (
+                datafusion_functions_aggregate::approx_median::approx_median_udaf(),
+                "approx_median",
+            ),
+            (
+                datafusion_functions_aggregate::median::median_udaf(),
+                "median",
+            ),
+            (datafusion_functions_aggregate::min_max::max_udaf(), "max"),
+            (datafusion_functions_aggregate::min_max::min_udaf(), "min"),
+        ]
+        .into_iter()
+        .map(|(func, name)| {
+            let w = WindowFunction::new(
+                WindowFunctionDefinition::AggregateUDF(func),
+                vec![col("c3")],
+            );
+
+            Expr::WindowFunction(w)
+                .null_treatment(NullTreatment::IgnoreNulls)
+                .order_by(vec![col("c2").sort(true, true), col("c3").sort(true, true)])
+                .window_frame(WindowFrame::new_bounds(
+                    WindowFrameUnits::Rows,
+                    WindowFrameBound::Preceding(ScalarValue::UInt64(None)),
+                    WindowFrameBound::Preceding(ScalarValue::UInt64(Some(1))),
+                ))
+                .build()
+                .unwrap()
+                .alias(name)
+        })
+        .collect::<Vec<_>>();
+        aggr_expr.extend_from_slice(&[col("c2"), col("c3")]);
+
+        let df: Vec<RecordBatch> = df.select(aggr_expr)?.collect().await?;
+
+        assert_batches_sorted_eq!(
+            ["+-------------+----------+-----------------+---------------+--------+-----+------+----+------+",
+                "| first_value | last_val | approx_distinct | approx_median | median | max | min  | c2 | c3   |",
+                "+-------------+----------+-----------------+---------------+--------+-----+------+----+------+",
+                "|             |          |                 |               |        |     |      | 1  | -85  |",
+                "| -85         | -101     | 14              | -12           | -101   | 83  | -101 | 4  | -54  |",
+                "| -85         | -101     | 17              | -25           | -101   | 83  | -101 | 5  | -31  |",
+                "| -85         | -12      | 10              | -32           | -12    | 83  | -85  | 3  | 13   |",
+                "| -85         | -25      | 3               | -56           | -25    | -25 | -85  | 1  | -5   |",
+                "| -85         | -31      | 18              | -29           | -31    | 83  | -101 | 5  | 36   |",
+                "| -85         | -38      | 16              | -25           | -38    | 83  | -101 | 4  | 65   |",
+                "| -85         | -43      | 7               | -43           | -43    | 83  | -85  | 2  | 45   |",
+                "| -85         | -48      | 6               | -35           | -48    | 83  | -85  | 2  | -43  |",
+                "| -85         | -5       | 4               | -37           | -5     | -5  | -85  | 1  | 83   |",
+                "| -85         | -54      | 15              | -17           | -54    | 83  | -101 | 4  | -38  |",
+                "| -85         | -56      | 2               | -70           | -56    | -56 | -85  | 1  | -25  |",
+                "| -85         | -72      | 9               | -43           | -72    | 83  | -85  | 3  | -12  |",
+                "| -85         | -85      | 1               | -85           | -85    | -85 | -85  | 1  | -56  |",
+                "| -85         | 13       | 11              | -17           | 13     | 83  | -85  | 3  | 14   |",
+                "| -85         | 13       | 11              | -25           | 13     | 83  | -85  | 3  | 13   |",
+                "| -85         | 14       | 12              | -12           | 14     | 83  | -85  | 3  | 17   |",
+                "| -85         | 17       | 13              | -11           | 17     | 83  | -85  | 4  | -101 |",
+                "| -85         | 45       | 8               | -34           | 45     | 83  | -85  | 3  | -72  |",
+                "| -85         | 65       | 17              | -17           | 65     | 83  | -101 | 5  | -101 |",
+                "| -85         | 83       | 5               | -25           | 83     | 83  | -85  | 2  | -48  |",
+                "+-------------+----------+-----------------+---------------+--------+-----+------+----+------+"],
+            &df
+        );
+
+        Ok(())
+    }
+
     // Test issue: https://github.com/apache/datafusion/issues/10346
     #[tokio::test]
     async fn test_select_over_aggregate_schema() -> Result<()> {
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 1fd0ca36b1eb9..03090faf3efdc 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -113,7 +113,6 @@ pub fn create_window_expr(
             let aggregate = AggregateExprBuilder::new(Arc::clone(fun), args.to_vec())
                 .schema(Arc::new(input_schema.clone()))
                 .alias(name)
-                .order_by(order_by.to_vec())
                 .with_ignore_nulls(ignore_nulls)
                 .build()?;
             window_expr_from_aggregate_expr(

From 9f77021c2e5b10c4ee7e98f1c0887524ebf87ae2 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Fri, 16 Aug 2024 10:55:43 +0800
Subject: [PATCH 312/357] Minor: Use execution error in
 ScalarValue::iter_to_array for incorrect usage (#11999)

* fix error

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* use exec err

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/common/src/scalar/mod.rs | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index fd0c11ed0ab01..677685b2c65b2 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -36,7 +36,7 @@ use crate::cast::{
     as_decimal128_array, as_decimal256_array, as_dictionary_array,
     as_fixed_size_binary_array, as_fixed_size_list_array,
 };
-use crate::error::{DataFusionError, Result, _internal_err, _not_impl_err};
+use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
 use crate::hash_utils::create_hashes;
 use crate::utils::{
     array_into_fixed_size_list_array, array_into_large_list_array, array_into_list_array,
@@ -1707,9 +1707,7 @@ impl ScalarValue {
         // figure out the type based on the first element
         let data_type = match scalars.peek() {
             None => {
-                return _internal_err!(
-                    "Empty iterator passed to ScalarValue::iter_to_array"
-                );
+                return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
             }
             Some(sv) => sv.data_type(),
         };
@@ -1723,7 +1721,7 @@ impl ScalarValue {
                         if let ScalarValue::$SCALAR_TY(v) = sv {
                             Ok(v)
                         } else {
-                            _internal_err!(
+                            _exec_err!(
                                 "Inconsistent types in ScalarValue::iter_to_array. \
                                     Expected {:?}, got {:?}",
                                 data_type, sv
@@ -1743,7 +1741,7 @@ impl ScalarValue {
                         if let ScalarValue::$SCALAR_TY(v, _) = sv {
                             Ok(v)
                         } else {
-                            _internal_err!(
+                            _exec_err!(
                                 "Inconsistent types in ScalarValue::iter_to_array. \
                                     Expected {:?}, got {:?}",
                                 data_type, sv
@@ -1765,7 +1763,7 @@ impl ScalarValue {
                         if let ScalarValue::$SCALAR_TY(v) = sv {
                             Ok(v)
                         } else {
-                            _internal_err!(
+                            _exec_err!(
                                 "Inconsistent types in ScalarValue::iter_to_array. \
                                     Expected {:?}, got {:?}",
                                 data_type, sv
@@ -1908,11 +1906,11 @@ impl ScalarValue {
                             if &inner_key_type == key_type {
                                 Ok(*scalar)
                             } else {
-                                _internal_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
+                                _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
                             }
                         }
                         _ => {
-                            _internal_err!(
+                            _exec_err!(
                                 "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
                             )
                         }
@@ -1940,7 +1938,7 @@ impl ScalarValue {
                         if let ScalarValue::FixedSizeBinary(_, v) = sv {
                             Ok(v)
                         } else {
-                            _internal_err!(
+                            _exec_err!(
                                 "Inconsistent types in ScalarValue::iter_to_array. \
                                 Expected {data_type:?}, got {sv:?}"
                             )
@@ -1965,7 +1963,7 @@ impl ScalarValue {
             | DataType::RunEndEncoded(_, _)
             | DataType::ListView(_)
             | DataType::LargeListView(_) => {
-                return _internal_err!(
+                return _not_impl_err!(
                     "Unsupported creation of {:?} array from ScalarValue {:?}",
                     data_type,
                     scalars.peek()

From 58075e2329e989ebe3cede1088ea8849f9eb25bd Mon Sep 17 00:00:00 2001
From: HuSen <husen.xjtu@gmail.com>
Date: Fri, 16 Aug 2024 16:43:13 +0800
Subject: [PATCH 313/357] Fix: support NULL input for like operations (#12025)

---
 .../expr-common/src/type_coercion/binary.rs   |  1 +
 datafusion/sqllogictest/test_files/regexp.slt | 20 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index fd97f9af1328a..6d2fb660f6695 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -1045,6 +1045,7 @@ pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
         .or_else(|| list_coercion(lhs_type, rhs_type))
         .or_else(|| binary_to_string_coercion(lhs_type, rhs_type))
         .or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
+        .or_else(|| regex_null_coercion(lhs_type, rhs_type))
         .or_else(|| null_coercion(lhs_type, rhs_type))
 }
 
diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt
index 22322d79ccfe8..c04021651a507 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -395,6 +395,26 @@ SELECT 'foo\nbar\nbaz' LIKE '%bar%';
 ----
 true
 
+query B
+SELECT NULL LIKE NULL;
+----
+NULL
+
+query B
+SELECT NULL iLIKE NULL;
+----
+NULL
+
+query B
+SELECT NULL not LIKE NULL;
+----
+NULL
+
+query B
+SELECT NULL not iLIKE NULL;
+----
+NULL
+
 statement ok
 drop table t;
 

From 57d5e0ec0dbeed1bfd6cfa4ebcabcba9a11b3eb5 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 16 Aug 2024 04:43:55 -0400
Subject: [PATCH 314/357] Minor: Add error tests for min/max with 2 arguments
 (#12024)

---
 datafusion/sqllogictest/test_files/aggregate.slt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 322ddcdb047b3..462acaa266ae1 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -1881,6 +1881,12 @@ SELECT MIN(c1), MIN(c2) FROM test
 ----
 0 1
 
+query error min/max was called with 2 arguments. It requires only 1.
+SELECT MIN(c1, c2) FROM test
+
+query error min/max was called with 2 arguments. It requires only 1.
+SELECT MAX(c1, c2) FROM test
+
 # aggregate_grouped
 query II
 SELECT c1, SUM(c2) FROM test GROUP BY c1 order by c1

From 5db036e90fd76ebeb3ccf7aab747613895c01abe Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Fri, 16 Aug 2024 16:44:41 +0800
Subject: [PATCH 315/357] fix: incorrect aggregation result of `bool_and`
 (#12017)

---
 .../aggregate/groups_accumulator/bool_op.rs   | 13 ++++--
 .../functions-aggregate/src/bool_and_or.rs    |  9 ++--
 .../sqllogictest/test_files/aggregate.slt     | 45 +++++++++++++++++++
 3 files changed, 60 insertions(+), 7 deletions(-)

diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs
index f4b4c0c932150..149312e5a9c0f 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs
@@ -47,17 +47,22 @@ where
 
     /// Function that computes the output
     bool_fn: F,
+
+    /// The identity element for the boolean operation.
+    /// Any value combined with this returns the original value.
+    identity: bool,
 }
 
 impl<F> BooleanGroupsAccumulator<F>
 where
     F: Fn(bool, bool) -> bool + Send + Sync,
 {
-    pub fn new(bitop_fn: F) -> Self {
+    pub fn new(bool_fn: F, identity: bool) -> Self {
         Self {
             values: BooleanBufferBuilder::new(0),
             null_state: NullState::new(),
-            bool_fn: bitop_fn,
+            bool_fn,
+            identity,
         }
     }
 }
@@ -78,7 +83,9 @@ where
 
         if self.values.len() < total_num_groups {
             let new_groups = total_num_groups - self.values.len();
-            self.values.append_n(new_groups, Default::default());
+            // Fill with the identity element, so that when the first non-null value is encountered,
+            // it will combine with the identity and the result will be the first non-null value itself.
+            self.values.append_n(new_groups, self.identity);
         }
 
         // NullState dispatches / handles tracking nulls and groups that saw no values
diff --git a/datafusion/functions-aggregate/src/bool_and_or.rs b/datafusion/functions-aggregate/src/bool_and_or.rs
index b993b2a4979c8..7cc7d9ff7fec3 100644
--- a/datafusion/functions-aggregate/src/bool_and_or.rs
+++ b/datafusion/functions-aggregate/src/bool_and_or.rs
@@ -151,7 +151,7 @@ impl AggregateUDFImpl for BoolAnd {
     ) -> Result<Box<dyn GroupsAccumulator>> {
         match args.return_type {
             DataType::Boolean => {
-                Ok(Box::new(BooleanGroupsAccumulator::new(|x, y| x && y)))
+                Ok(Box::new(BooleanGroupsAccumulator::new(|x, y| x && y, true)))
             }
             _ => not_impl_err!(
                 "GroupsAccumulator not supported for {} with {}",
@@ -270,9 +270,10 @@ impl AggregateUDFImpl for BoolOr {
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
         match args.return_type {
-            DataType::Boolean => {
-                Ok(Box::new(BooleanGroupsAccumulator::new(|x, y| x || y)))
-            }
+            DataType::Boolean => Ok(Box::new(BooleanGroupsAccumulator::new(
+                |x, y| x || y,
+                false,
+            ))),
             _ => not_impl_err!(
                 "GroupsAccumulator not supported for {} with {}",
                 args.name,
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 462acaa266ae1..0cda24d6ff5e4 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -3730,6 +3730,51 @@ SELECT bool_or(distinct c1), bool_or(distinct c2), bool_or(distinct c3), bool_or
 ----
 true true true false true true false NULL
 
+# Test issue: https://github.com/apache/datafusion/issues/11846
+statement ok
+create table t1(v1 int, v2 boolean);
+
+statement ok
+insert into t1 values (1, true), (1, true);
+
+statement ok
+insert into t1 values (3, null), (3, true);
+
+statement ok
+insert into t1 values (2, false), (2, true);
+
+statement ok
+insert into t1 values (6, false), (6, false);
+
+statement ok
+insert into t1 values (4, null), (4, null);
+
+statement ok
+insert into t1 values (5, false), (5, null);
+
+query IB
+select v1, bool_and(v2) from t1 group by v1 order by v1;
+----
+1 true
+2 false
+3 true
+4 NULL
+5 false
+6 false
+
+query IB
+select v1, bool_or(v2) from t1 group by v1 order by v1;
+----
+1 true
+2 true
+3 true
+4 NULL
+5 false
+6 false
+
+statement ok
+drop table t1;
+
 # All supported timestamp types
 
 # "nanos" --> TimestampNanosecondArray

From bb921812fa523717774122c8639b654d04bac705 Mon Sep 17 00:00:00 2001
From: Tai Le Manh <49281946+tlm365@users.noreply.github.com>
Date: Fri, 16 Aug 2024 17:21:21 +0700
Subject: [PATCH 316/357] Improve performance of REPEAT functions (#12015)

* Improve performance of REPEAT functions

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>

* Improve performance of REPEAT functions

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>

* Fix cargo fmt

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>

---------

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>
---
 datafusion/functions/Cargo.toml           |   5 +
 datafusion/functions/benches/repeat.rs    | 136 ++++++++++++++++++++++
 datafusion/functions/src/string/common.rs |  21 +++-
 datafusion/functions/src/string/repeat.rs |  84 ++++++-------
 datafusion/functions/src/unicode/lpad.rs  |  19 +--
 5 files changed, 207 insertions(+), 58 deletions(-)
 create mode 100644 datafusion/functions/benches/repeat.rs

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 688563baecfae..2b3f80fc930bf 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -151,3 +151,8 @@ required-features = ["string_expressions"]
 harness = false
 name = "pad"
 required-features = ["unicode_expressions"]
+
+[[bench]]
+harness = false
+name = "repeat"
+required-features = ["string_expressions"]
diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs
new file mode 100644
index 0000000000000..916c8374e5fb9
--- /dev/null
+++ b/datafusion/functions/benches/repeat.rs
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::{ArrayRef, Int64Array, OffsetSizeTrait};
+use arrow::util::bench_util::{
+    create_string_array_with_len, create_string_view_array_with_len,
+};
+use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::string;
+use std::sync::Arc;
+use std::time::Duration;
+
+fn create_args<O: OffsetSizeTrait>(
+    size: usize,
+    str_len: usize,
+    repeat_times: i64,
+    use_string_view: bool,
+) -> Vec<ColumnarValue> {
+    let number_array = Arc::new(Int64Array::from(
+        (0..size).map(|_| repeat_times).collect::<Vec<_>>(),
+    ));
+
+    if use_string_view {
+        let string_array =
+            Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false));
+        vec![
+            ColumnarValue::Array(string_array),
+            ColumnarValue::Array(number_array),
+        ]
+    } else {
+        let string_array =
+            Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
+
+        vec![
+            ColumnarValue::Array(string_array),
+            ColumnarValue::Array(Arc::clone(&number_array) as ArrayRef),
+        ]
+    }
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let repeat = string::repeat();
+    for size in [1024, 4096] {
+        // REPEAT 3 TIMES
+        let repeat_times = 3;
+        let mut group = c.benchmark_group(format!("repeat {} times", repeat_times));
+        group.sampling_mode(SamplingMode::Flat);
+        group.sample_size(10);
+        group.measurement_time(Duration::from_secs(10));
+
+        let args = create_args::<i32>(size, 32, repeat_times, true);
+        group.bench_function(
+            &format!(
+                "repeat_string_view [size={}, repeat_times={}]",
+                size, repeat_times
+            ),
+            |b| b.iter(|| black_box(repeat.invoke(&args))),
+        );
+
+        let args = create_args::<i32>(size, 32, repeat_times, false);
+        group.bench_function(
+            &format!(
+                "repeat_string [size={}, repeat_times={}]",
+                size, repeat_times
+            ),
+            |b| b.iter(|| black_box(repeat.invoke(&args))),
+        );
+
+        let args = create_args::<i64>(size, 32, repeat_times, false);
+        group.bench_function(
+            &format!(
+                "repeat_large_string [size={}, repeat_times={}]",
+                size, repeat_times
+            ),
+            |b| b.iter(|| black_box(repeat.invoke(&args))),
+        );
+
+        group.finish();
+
+        // REPEAT 30 TIMES
+        let repeat_times = 30;
+        let mut group = c.benchmark_group(format!("repeat {} times", repeat_times));
+        group.sampling_mode(SamplingMode::Flat);
+        group.sample_size(10);
+        group.measurement_time(Duration::from_secs(10));
+
+        let args = create_args::<i32>(size, 32, repeat_times, true);
+        group.bench_function(
+            &format!(
+                "repeat_string_view [size={}, repeat_times={}]",
+                size, repeat_times
+            ),
+            |b| b.iter(|| black_box(repeat.invoke(&args))),
+        );
+
+        let args = create_args::<i32>(size, 32, repeat_times, false);
+        group.bench_function(
+            &format!(
+                "repeat_string [size={}, repeat_times={}]",
+                size, repeat_times
+            ),
+            |b| b.iter(|| black_box(repeat.invoke(&args))),
+        );
+
+        let args = create_args::<i64>(size, 32, repeat_times, false);
+        group.bench_function(
+            &format!(
+                "repeat_large_string [size={}, repeat_times={}]",
+                size, repeat_times
+            ),
+            |b| b.iter(|| black_box(repeat.invoke(&args))),
+        );
+
+        group.finish();
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs
index 7037c1d1c3c3b..54aebb039046b 100644
--- a/datafusion/functions/src/string/common.rs
+++ b/datafusion/functions/src/string/common.rs
@@ -19,8 +19,9 @@ use std::fmt::{Display, Formatter};
 use std::sync::Arc;
 
 use arrow::array::{
-    new_null_array, Array, ArrayDataBuilder, ArrayRef, GenericStringArray,
-    GenericStringBuilder, OffsetSizeTrait, StringArray,
+    new_null_array, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ArrayRef,
+    GenericStringArray, GenericStringBuilder, OffsetSizeTrait, StringArray,
+    StringViewArray,
 };
 use arrow::buffer::{Buffer, MutableBuffer, NullBuffer};
 use arrow::datatypes::DataType;
@@ -251,6 +252,22 @@ impl<'a> ColumnarValueRef<'a> {
     }
 }
 
+pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
+    fn iter(&self) -> ArrayIter<Self>;
+}
+
+impl<'a, T: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<T> {
+    fn iter(&self) -> ArrayIter<Self> {
+        GenericStringArray::<T>::iter(self)
+    }
+}
+
+impl<'a> StringArrayType<'a> for &'a StringViewArray {
+    fn iter(&self) -> ArrayIter<Self> {
+        StringViewArray::iter(self)
+    }
+}
+
 /// Optimized version of the StringBuilder in Arrow that:
 /// 1. Precalculating the expected length of the result, avoiding reallocations.
 /// 2. Avoids creating / incrementally creating a `NullBufferBuilder`
diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs
index a377dee06f41b..20e4462784b82 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -18,17 +18,20 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
+use arrow::array::{
+    ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
+    OffsetSizeTrait, StringViewArray,
+};
 use arrow::datatypes::DataType;
+use arrow::datatypes::DataType::{Int64, LargeUtf8, Utf8, Utf8View};
 
-use datafusion_common::cast::{
-    as_generic_string_array, as_int64_array, as_string_view_array,
-};
+use datafusion_common::cast::as_int64_array;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
 
+use crate::string::common::StringArrayType;
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
@@ -44,7 +47,6 @@ impl Default for RepeatFunc {
 
 impl RepeatFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
                 vec![
@@ -79,51 +81,53 @@ impl ScalarUDFImpl for RepeatFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8View => make_scalar_function(repeat_utf8view, vec![])(args),
-            DataType::Utf8 => make_scalar_function(repeat::<i32>, vec![])(args),
-            DataType::LargeUtf8 => make_scalar_function(repeat::<i64>, vec![])(args),
-            other => exec_err!("Unsupported data type {other:?} for function repeat. Expected Utf8, Utf8View or LargeUtf8"),
-        }
+        make_scalar_function(repeat, vec![])(args)
     }
 }
 
 /// Repeats string the specified number of times.
 /// repeat('Pg', 4) = 'PgPgPgPg'
-fn repeat<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
+fn repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
     let number_array = as_int64_array(&args[1])?;
-
-    let result = string_array
-        .iter()
-        .zip(number_array.iter())
-        .map(|(string, number)| repeat_common(string, number))
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
+    match args[0].data_type() {
+        Utf8View => {
+            let string_view_array = args[0].as_string_view();
+            repeat_impl::<i32, &StringViewArray>(string_view_array, number_array)
+        }
+        Utf8 => {
+            let string_array = args[0].as_string::<i32>();
+            repeat_impl::<i32, &GenericStringArray<i32>>(string_array, number_array)
+        }
+        LargeUtf8 => {
+            let string_array = args[0].as_string::<i64>();
+            repeat_impl::<i64, &GenericStringArray<i64>>(string_array, number_array)
+        }
+        other => exec_err!(
+            "Unsupported data type {other:?} for function repeat. \
+        Expected Utf8, Utf8View or LargeUtf8."
+        ),
+    }
 }
 
-fn repeat_utf8view(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_view_array = as_string_view_array(&args[0])?;
-    let number_array = as_int64_array(&args[1])?;
-
-    let result = string_view_array
+fn repeat_impl<'a, T, S>(string_array: S, number_array: &Int64Array) -> Result<ArrayRef>
+where
+    T: OffsetSizeTrait,
+    S: StringArrayType<'a>,
+{
+    let mut builder: GenericStringBuilder<T> = GenericStringBuilder::new();
+    string_array
         .iter()
         .zip(number_array.iter())
-        .map(|(string, number)| repeat_common(string, number))
-        .collect::<StringArray>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-fn repeat_common(string: Option<&str>, number: Option<i64>) -> Option<String> {
-    match (string, number) {
-        (Some(string), Some(number)) if number >= 0 => {
-            Some(string.repeat(number as usize))
-        }
-        (Some(_), Some(_)) => Some("".to_string()),
-        _ => None,
-    }
+        .for_each(|(string, number)| match (string, number) {
+            (Some(string), Some(number)) if number >= 0 => {
+                builder.append_value(string.repeat(number as usize))
+            }
+            (Some(_), Some(_)) => builder.append_value(""),
+            _ => builder.append_null(),
+        });
+    let array = builder.finish();
+
+    Ok(Arc::new(array) as ArrayRef)
 }
 
 #[cfg(test)]
diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs
index 521cdc5d0ff03..e102673c42530 100644
--- a/datafusion/functions/src/unicode/lpad.rs
+++ b/datafusion/functions/src/unicode/lpad.rs
@@ -20,8 +20,8 @@ use std::fmt::Write;
 use std::sync::Arc;
 
 use arrow::array::{
-    Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray,
-    GenericStringBuilder, Int64Array, OffsetSizeTrait, StringViewArray,
+    Array, ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
+    OffsetSizeTrait, StringViewArray,
 };
 use arrow::datatypes::DataType;
 use unicode_segmentation::UnicodeSegmentation;
@@ -32,6 +32,7 @@ use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
+use crate::string::common::StringArrayType;
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
@@ -248,20 +249,6 @@ where
     Ok(Arc::new(array) as ArrayRef)
 }
 
-trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
-    fn iter(&self) -> ArrayIter<Self>;
-}
-impl<'a, T: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<T> {
-    fn iter(&self) -> ArrayIter<Self> {
-        GenericStringArray::<T>::iter(self)
-    }
-}
-impl<'a> StringArrayType<'a> for &'a StringViewArray {
-    fn iter(&self) -> ArrayIter<Self> {
-        StringViewArray::iter(self)
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use crate::unicode::lpad::LPadFunc;

From 300a08c622588c935bd481e6565f97556c3e629a Mon Sep 17 00:00:00 2001
From: Dmitry Bugakov <bugakov.dima@gmail.com>
Date: Fri, 16 Aug 2024 13:13:52 +0200
Subject: [PATCH 317/357] support Utf8View (#12019)

---
 datafusion/functions/src/unicode/substr.rs    | 124 +++++++++++++++---
 .../sqllogictest/test_files/string_view.slt   |  28 +++-
 2 files changed, 129 insertions(+), 23 deletions(-)

diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs
index 9d15920bb6550..9fd8c75eab236 100644
--- a/datafusion/functions/src/unicode/substr.rs
+++ b/datafusion/functions/src/unicode/substr.rs
@@ -19,10 +19,12 @@ use std::any::Any;
 use std::cmp::max;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{
+    ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, OffsetSizeTrait,
+};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use datafusion_common::cast::as_int64_array;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
@@ -51,6 +53,8 @@ impl SubstrFunc {
                     Exact(vec![LargeUtf8, Int64]),
                     Exact(vec![Utf8, Int64, Int64]),
                     Exact(vec![LargeUtf8, Int64, Int64]),
+                    Exact(vec![Utf8View, Int64]),
+                    Exact(vec![Utf8View, Int64, Int64]),
                 ],
                 Volatility::Immutable,
             ),
@@ -77,11 +81,7 @@ impl ScalarUDFImpl for SubstrFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(substr::<i32>, vec![])(args),
-            DataType::LargeUtf8 => make_scalar_function(substr::<i64>, vec![])(args),
-            other => exec_err!("Unsupported data type {other:?} for function substr"),
-        }
+        make_scalar_function(substr, vec![])(args)
     }
 
     fn aliases(&self) -> &[String] {
@@ -89,18 +89,39 @@ impl ScalarUDFImpl for SubstrFunc {
     }
 }
 
+pub fn substr(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args[0].data_type() {
+        DataType::Utf8 => {
+            let string_array = args[0].as_string::<i32>();
+            calculate_substr::<_, i32>(string_array, &args[1..])
+        }
+        DataType::LargeUtf8 => {
+            let string_array = args[0].as_string::<i64>();
+            calculate_substr::<_, i64>(string_array, &args[1..])
+        }
+        DataType::Utf8View => {
+            let string_array = args[0].as_string_view();
+            calculate_substr::<_, i32>(string_array, &args[1..])
+        }
+        other => exec_err!("Unsupported data type {other:?} for function substr"),
+    }
+}
+
 /// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).)
 /// substr('alphabet', 3) = 'phabet'
 /// substr('alphabet', 3, 2) = 'ph'
 /// The implementation uses UTF-8 code points as characters
-pub fn substr<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn calculate_substr<'a, V, T>(string_array: V, args: &[ArrayRef]) -> Result<ArrayRef>
+where
+    V: ArrayAccessor<Item = &'a str>,
+    T: OffsetSizeTrait,
+{
     match args.len() {
-        2 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let start_array = as_int64_array(&args[1])?;
+        1 => {
+            let iter = ArrayIter::new(string_array);
+            let start_array = as_int64_array(&args[0])?;
 
-            let result = string_array
-                .iter()
+            let result = iter
                 .zip(start_array.iter())
                 .map(|(string, start)| match (string, start) {
                     (Some(string), Some(start)) => {
@@ -113,16 +134,14 @@ pub fn substr<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                     _ => None,
                 })
                 .collect::<GenericStringArray<T>>();
-
             Ok(Arc::new(result) as ArrayRef)
         }
-        3 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let start_array = as_int64_array(&args[1])?;
-            let count_array = as_int64_array(&args[2])?;
+        2 => {
+            let iter = ArrayIter::new(string_array);
+            let start_array = as_int64_array(&args[0])?;
+            let count_array = as_int64_array(&args[1])?;
 
-            let result = string_array
-                .iter()
+            let result = iter
                 .zip(start_array.iter())
                 .zip(count_array.iter())
                 .map(|((string, start), count)| match (string, start, count) {
@@ -162,6 +181,71 @@ mod tests {
 
     #[test]
     fn test_functions() -> Result<()> {
+        test_function!(
+            SubstrFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8View(None)),
+                ColumnarValue::Scalar(ScalarValue::from(1i64)),
+            ],
+            Ok(None),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            SubstrFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
+                    "alphabet"
+                )))),
+                ColumnarValue::Scalar(ScalarValue::from(0i64)),
+            ],
+            Ok(Some("alphabet")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            SubstrFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
+                    "joséésoj"
+                )))),
+                ColumnarValue::Scalar(ScalarValue::from(5i64)),
+            ],
+            Ok(Some("ésoj")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            SubstrFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
+                    "alphabet"
+                )))),
+                ColumnarValue::Scalar(ScalarValue::from(3i64)),
+                ColumnarValue::Scalar(ScalarValue::from(2i64)),
+            ],
+            Ok(Some("ph")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            SubstrFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
+                    "alphabet"
+                )))),
+                ColumnarValue::Scalar(ScalarValue::from(3i64)),
+                ColumnarValue::Scalar(ScalarValue::from(20i64)),
+            ],
+            Ok(Some("phabet")),
+            &str,
+            Utf8,
+            StringArray
+        );
         test_function!(
             SubstrFunc::new(),
             &[
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index e094bcaf1b5d3..82a714a432baa 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -521,7 +521,30 @@ logical_plan
 01)Projection: test.column1_utf8view LIKE Utf8View("foo") AS like, test.column1_utf8view ILIKE Utf8View("foo") AS ilike
 02)--TableScan: test projection=[column1_utf8view]
 
+## Ensure no casts for SUBSTR
 
+query TT
+EXPLAIN SELECT
+  SUBSTR(column1_utf8view, 1, 3) as c1,
+  SUBSTR(column2_utf8, 1, 3) as c2,
+  SUBSTR(column2_large_utf8, 1, 3) as c3
+FROM test;
+----
+logical_plan
+01)Projection: substr(test.column1_utf8view, Int64(1), Int64(3)) AS c1, substr(test.column2_utf8, Int64(1), Int64(3)) AS c2, substr(test.column2_large_utf8, Int64(1), Int64(3)) AS c3
+02)--TableScan: test projection=[column2_utf8, column2_large_utf8, column1_utf8view]
+
+query TTT
+SELECT
+  SUBSTR(column1_utf8view, 1, 3) as c1,
+  SUBSTR(column2_utf8, 1, 3) as c2,
+  SUBSTR(column2_large_utf8, 1, 3) as c3
+FROM test;
+----
+And X X
+Xia Xia Xia
+Rap R R
+NULL R R
 
 ## Ensure no casts for ASCII
 
@@ -1047,9 +1070,8 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: substr(__common_expr_1, Int64(1)) AS c, substr(__common_expr_1, Int64(1), Int64(2)) AS c2
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
-03)----TableScan: test projection=[column1_utf8view]
+01)Projection: substr(test.column1_utf8view, Int64(1)) AS c, substr(test.column1_utf8view, Int64(1), Int64(2)) AS c2
+02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for SUBSTRINDEX
 query TT

From 08f6e54074ba3207fc68665675c33faaffc7282a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Fri, 16 Aug 2024 23:32:34 +0800
Subject: [PATCH 318/357] Minor: Remove wrong comment on
 `Accumulator::evaluate` and `Accumulator::state` (#12001)

* Remove wrong comment

* Remove wrong comment on Accumulator::state

* Not call twice comment

* Adjust comment order
---
 datafusion/expr-common/src/accumulator.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/datafusion/expr-common/src/accumulator.rs b/datafusion/expr-common/src/accumulator.rs
index 262646d8ba3ae..eac91c4f8efce 100644
--- a/datafusion/expr-common/src/accumulator.rs
+++ b/datafusion/expr-common/src/accumulator.rs
@@ -64,8 +64,8 @@ pub trait Accumulator: Send + Sync + Debug {
     /// For example, the `SUM` accumulator maintains a running sum,
     /// and `evaluate` will produce that running sum as its output.
     ///
-    /// After this call, the accumulator's internal state should be
-    /// equivalent to when it was first created.
+    /// This function should not be called twice, otherwise it will
+    /// result in potentially non-deterministic behavior.
     ///
     /// This function gets `&mut self` to allow for the accumulator to build
     /// arrow compatible internal state that can be returned without copying
@@ -85,8 +85,8 @@ pub trait Accumulator: Send + Sync + Debug {
     /// Returns the intermediate state of the accumulator, consuming the
     /// intermediate state.
     ///
-    /// After this call, the accumulator's internal state should be
-    /// equivalent to when it was first created.
+    /// This function should not be called twice, otherwise it will
+    /// result in potentially non-deterministic behavior.
     ///
     /// This function gets `&mut self` to allow for the accumulator to build
     /// arrow compatible internal state that can be returned without copying

From dc84fa56f53534dcb418e01ad3b37cfadcb493e7 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 16 Aug 2024 11:38:33 -0400
Subject: [PATCH 319/357] Minor: cleanup `.gitignore` (#12035)

---
 .gitignore | 46 +---------------------------------------------
 1 file changed, 1 insertion(+), 45 deletions(-)

diff --git a/.gitignore b/.gitignore
index 05479fd0f07d4..05570eacf630c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,45 +16,11 @@
 # under the License.
 
 apache-rat-*.jar
-arrow-src.tar
-arrow-src.tar.gz
-
-# Compiled source
-*.a
-*.dll
-*.o
-*.py[ocd]
-*.so
-*.so.*
-*.bundle
-*.dylib
-.build_cache_dir
-dependency-reduced-pom.xml
-MANIFEST
-compile_commands.json
-build.ninja
-
-# Generated Visual Studio files
-*.vcxproj
-*.vcxproj.*
-*.sln
-*.iml
 
 # Linux perf sample data
 perf.data
 perf.data.old
 
-cpp/.idea/
-.clangd/
-cpp/.clangd/
-cpp/apidoc/xml/
-docs/example.gz
-docs/example1.dat
-docs/example3.dat
-python/.eggs/
-python/doc/
-# Egg metadata
-*.egg-info
 
 .vscode
 .idea/
@@ -66,16 +32,9 @@ docker_cache
 .*.swp
 .*.swo
 
-site/
-
-# R files
-**/.Rproj.user
-**/*.Rcheck/
-**/.Rhistory
-.Rproj.user
+venv/*
 
 # macOS
-cpp/Brewfile.lock.json
 .DS_Store
 
 # docker volumes used for caching
@@ -90,9 +49,6 @@ rusty-tags.vi
 .history
 .flatbuffers/
 
-.vscode
-venv/*
-
 # apache release artifacts
 dev/dist
 

From 2a16704db7af0045d465cda39b90d1a17e68dbe8 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 16 Aug 2024 13:35:01 -0400
Subject: [PATCH 320/357] Improve documentation about `ParquetExec` / Parquet
 predicate pushdown (#11994)

* Minor: improve ParquetExec docs

* typo

* clippy

* fix whitespace so rustdoc does not treat as tests

* Apply suggestions from code review

Co-authored-by: Oleks V <comphead@users.noreply.github.com>

* expound upon column rewriting in the context of schema evolution

---------

Co-authored-by: Oleks V <comphead@users.noreply.github.com>
---
 datafusion/common/src/tree_node.rs            |   3 +
 .../datasource/physical_plan/parquet/mod.rs   |  60 ++++--
 .../physical_plan/parquet/row_filter.rs       | 194 ++++++++++++++----
 3 files changed, 192 insertions(+), 65 deletions(-)

diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs
index bcf4d7664acc2..88300e3edd0ee 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -486,6 +486,9 @@ pub trait TreeNodeVisitor<'n>: Sized {
 /// A [Visitor](https://en.wikipedia.org/wiki/Visitor_pattern) for recursively
 /// rewriting [`TreeNode`]s via [`TreeNode::rewrite`].
 ///
+/// For example you can implement this trait on a struct to rewrite `Expr` or
+/// `LogicalPlan` that needs to track state during the rewrite.
+///
 /// See [`TreeNode`] for more details on available APIs
 ///
 /// When passed to [`TreeNode::rewrite`], [`TreeNodeRewriter::f_down`] and
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 72aabefba5952..cb026522cfa8d 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -116,13 +116,12 @@ pub use writer::plan_to_parquet;
 ///
 /// Supports the following optimizations:
 ///
-/// * Concurrent reads: Can read from one or more files in parallel as multiple
+/// * Concurrent reads: reads from one or more files in parallel as multiple
 ///   partitions, including concurrently reading multiple row groups from a single
 ///   file.
 ///
-/// * Predicate push down: skips row groups and pages based on
-///   min/max/null_counts in the row group metadata, the page index and bloom
-///   filters.
+/// * Predicate push down: skips row groups, pages, rows based on metadata
+///   and late materialization. See "Predicate Pushdown" below.
 ///
 /// * Projection pushdown: reads and decodes only the columns required.
 ///
@@ -132,9 +131,8 @@ pub use writer::plan_to_parquet;
 ///   coalesce I/O operations, etc. See [`ParquetFileReaderFactory`] for more
 ///   details.
 ///
-/// * Schema adapters: read parquet files with different schemas into a unified
-///   table schema. This can be used to implement "schema evolution". See
-///   [`SchemaAdapterFactory`] for more details.
+/// * Schema evolution: read parquet files with different schemas into a unified
+///   table schema. See [`SchemaAdapterFactory`] for more details.
 ///
 /// * metadata_size_hint: controls the number of bytes read from the end of the
 ///   file in the initial I/O when the default [`ParquetFileReaderFactory`]. If a
@@ -144,6 +142,29 @@ pub use writer::plan_to_parquet;
 /// * User provided  [`ParquetAccessPlan`]s to skip row groups and/or pages
 ///   based on external information. See "Implementing External Indexes" below
 ///
+/// # Predicate Pushdown
+///
+/// `ParquetExec` uses the provided [`PhysicalExpr`] predicate as a filter to
+/// skip reading unnecessary data and improve query performance using several techniques:
+///
+/// * Row group pruning: skips entire row groups based on min/max statistics
+///   found in [`ParquetMetaData`] and any Bloom filters that are present.
+///
+/// * Page pruning: skips individual pages within a ColumnChunk using the
+///   [Parquet PageIndex], if present.
+///
+/// * Row filtering: skips rows within a page using a form of late
+///   materialization. When possible, predicates are applied by the parquet
+///   decoder *during* decode (see [`ArrowPredicate`] and [`RowFilter`] for more
+///   details). This is only enabled if `ParquetScanOptions::pushdown_filters` is set to true.
+///
+/// Note: If the predicate can not be used to accelerate the scan, it is ignored
+/// (no error is raised on predicate evaluation errors).
+///
+/// [`ArrowPredicate`]: parquet::arrow::arrow_reader::ArrowPredicate
+/// [`RowFilter`]: parquet::arrow::arrow_reader::RowFilter
+/// [Parquet PageIndex]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
+///
 /// # Implementing External Indexes
 ///
 /// It is possible to restrict the row groups and selections within those row
@@ -199,10 +220,11 @@ pub use writer::plan_to_parquet;
 ///   applying predicates to metadata. The plan and projections are used to
 ///   determine what pages must be read.
 ///
-/// * Step 4: The stream begins reading data, fetching the required pages
-///   and incrementally decoding them.
+/// * Step 4: The stream begins reading data, fetching the required parquet
+///   pages incrementally decoding them, and applying any row filters (see
+///   [`Self::with_pushdown_filters`]).
 ///
-/// * Step 5: As each [`RecordBatch]` is read, it may be adapted by a
+/// * Step 5: As each [`RecordBatch`] is read, it may be adapted by a
 ///   [`SchemaAdapter`] to match the table schema. By default missing columns are
 ///   filled with nulls, but this can be customized via [`SchemaAdapterFactory`].
 ///
@@ -268,13 +290,10 @@ impl ParquetExecBuilder {
         }
     }
 
-    /// Set the predicate for the scan.
-    ///
-    /// The ParquetExec uses this predicate to filter row groups and data pages
-    /// using the Parquet statistics and bloom filters.
+    /// Set the filter predicate when reading.
     ///
-    /// If the predicate can not be used to prune the scan, it is ignored (no
-    /// error is raised).
+    /// See the "Predicate Pushdown" section of the [`ParquetExec`] documenation
+    /// for more details.
     pub fn with_predicate(mut self, predicate: Arc<dyn PhysicalExpr>) -> Self {
         self.predicate = Some(predicate);
         self
@@ -291,7 +310,7 @@ impl ParquetExecBuilder {
         self
     }
 
-    /// Set the table parquet options that control how the ParquetExec reads.
+    /// Set the options for controlling how the ParquetExec reads parquet files.
     ///
     /// See also [`Self::new_with_options`]
     pub fn with_table_parquet_options(
@@ -480,11 +499,8 @@ impl ParquetExec {
         self
     }
 
-    /// If true, any filter [`Expr`]s on the scan will converted to a
-    /// [`RowFilter`](parquet::arrow::arrow_reader::RowFilter) in the
-    /// `ParquetRecordBatchStream`. These filters are applied by the
-    /// parquet decoder to skip unecessairly decoding other columns
-    /// which would not pass the predicate. Defaults to false
+    /// If true, the predicate will be used during the parquet scan.
+    /// Defaults to false
     ///
     /// [`Expr`]: datafusion_expr::Expr
     pub fn with_pushdown_filters(mut self, pushdown_filters: bool) -> Self {
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
index 9de132169389c..23fdadc2cdeef 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
@@ -15,6 +15,50 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Utilities to push down of DataFusion filter predicates (any DataFusion
+//! `PhysicalExpr` that evaluates to a [`BooleanArray`]) to the parquet decoder
+//! level in `arrow-rs`.
+//!
+//! DataFusion will use a `ParquetRecordBatchStream` to read data from parquet
+//! into [`RecordBatch`]es.
+//!
+//! The `ParquetRecordBatchStream` takes an optional `RowFilter` which is itself
+//! a Vec of `Box<dyn ArrowPredicate>`. During decoding, the predicates are
+//! evaluated in order, to generate a mask which is used to avoid decoding rows
+//! in projected columns which do not pass the filter which can significantly
+//! reduce the amount of compute required for decoding and thus improve query
+//! performance.
+//!
+//! Since the predicates are applied serially in the order defined in the
+//! `RowFilter`, the optimal ordering depends on the exact filters. The best
+//! filters to execute first have two properties:
+//!
+//! 1. They are relatively inexpensive to evaluate (e.g. they read
+//!    column chunks which are relatively small)
+//!
+//! 2. They filter many (contiguous) rows, reducing the amount of decoding
+//!    required for subsequent filters and projected columns
+//!
+//! If requested, this code will reorder the filters based on heuristics try and
+//! reduce the evaluation cost.
+//!
+//! The basic algorithm for constructing the `RowFilter` is as follows
+//!
+//! 1. Break conjunctions into separate predicates. An expression
+//!    like `a = 1 AND (b = 2 AND c = 3)` would be
+//!    separated into the expressions `a = 1`, `b = 2`, and `c = 3`.
+//! 2. Determine whether each predicate can be evaluated as an `ArrowPredicate`.
+//! 3. Determine, for each predicate, the total compressed size of all
+//!    columns required to evaluate the predicate.
+//! 4. Determine, for each predicate, whether all columns required to
+//!    evaluate the expression are sorted.
+//! 5. Re-order the predicate by total size (from step 3).
+//! 6. Partition the predicates according to whether they are sorted (from step 4)
+//! 7. "Compile" each predicate `Expr` to a `DatafusionArrowPredicate`.
+//! 8. Build the `RowFilter` with the sorted predicates followed by
+//!    the unsorted predicates. Within each partition, predicates are
+//!    still be sorted by size.
+
 use std::collections::BTreeSet;
 use std::sync::Arc;
 
@@ -40,41 +84,24 @@ use crate::physical_plan::metrics;
 
 use super::ParquetFileMetrics;
 
-/// This module contains utilities for enabling the pushdown of DataFusion filter predicates (which
-/// can be any DataFusion `Expr` that evaluates to a `BooleanArray`) to the parquet decoder level in `arrow-rs`.
-/// DataFusion will use a `ParquetRecordBatchStream` to read data from parquet into arrow `RecordBatch`es.
-/// When constructing the  `ParquetRecordBatchStream` you can provide a `RowFilter` which is itself just a vector
-/// of `Box<dyn ArrowPredicate>`. During decoding, the predicates are evaluated to generate a mask which is used
-/// to avoid decoding rows in projected columns which are not selected which can significantly reduce the amount
-/// of compute required for decoding.
+/// A "compiled" predicate passed to `ParquetRecordBatchStream` to perform
+/// row-level filtering during parquet decoding.
 ///
-/// Since the predicates are applied serially in the order defined in the `RowFilter`, the optimal ordering
-/// will depend on the exact filters. The best filters to execute first have two properties:
-///     1. The are relatively inexpensive to evaluate (e.g. they read column chunks which are relatively small)
-///     2. They filter a lot of rows, reducing the amount of decoding required for subsequent filters and projected columns
+/// See the module level documentation for more information.
 ///
-/// Given the metadata exposed by parquet, the selectivity of filters is not easy to estimate so the heuristics we use here primarily
-/// focus on the evaluation cost.
+/// Implements the `ArrowPredicate` trait used by the parquet decoder
 ///
-/// The basic algorithm for constructing the `RowFilter` is as follows
-///     1. Recursively break conjunctions into separate predicates. An expression like `a = 1 AND (b = 2 AND c = 3)` would be
-///        separated into the expressions `a = 1`, `b = 2`, and `c = 3`.
-///     2. Determine whether each predicate is suitable as an `ArrowPredicate`. As long as the predicate does not reference any projected columns
-///        or columns with non-primitive types, then it is considered suitable.
-///     3. Determine, for each predicate, the total compressed size of all columns required to evaluate the predicate.
-///     4. Determine, for each predicate, whether all columns required to evaluate the expression are sorted.
-///     5. Re-order the predicate by total size (from step 3).
-///     6. Partition the predicates according to whether they are sorted (from step 4)
-///     7. "Compile" each predicate `Expr` to a `DatafusionArrowPredicate`.
-///     8. Build the `RowFilter` with the sorted predicates followed by the unsorted predicates. Within each partition
-///        the predicates will still be sorted by size.
-
-/// A predicate which can be passed to `ParquetRecordBatchStream` to perform row-level
-/// filtering during parquet decoding.
+/// An expression can be evaluated as a `DatafusionArrowPredicate` if it:
+/// * Does not reference any projected columns
+/// * Does not reference columns with non-primitive types (e.g. structs / lists)
 #[derive(Debug)]
 pub(crate) struct DatafusionArrowPredicate {
+    /// the filter expression
     physical_expr: Arc<dyn PhysicalExpr>,
+    /// Path to the columns in the parquet schema required to evaluate the
+    /// expression
     projection_mask: ProjectionMask,
+    /// Columns required to evaluate the expression in the arrow schema
     projection: Vec<usize>,
     /// how many rows were filtered out by this predicate
     rows_filtered: metrics::Count,
@@ -85,6 +112,7 @@ pub(crate) struct DatafusionArrowPredicate {
 }
 
 impl DatafusionArrowPredicate {
+    /// Create a new `DatafusionArrowPredicate` from a `FilterCandidate`
     pub fn try_new(
         candidate: FilterCandidate,
         schema: &Schema,
@@ -152,9 +180,12 @@ impl ArrowPredicate for DatafusionArrowPredicate {
     }
 }
 
-/// A candidate expression for creating a `RowFilter` contains the
-/// expression as well as data to estimate the cost of evaluating
-/// the resulting expression.
+/// A candidate expression for creating a `RowFilter`.
+///
+/// Each candidate contains the expression as well as data to estimate the cost
+/// of evaluating the resulting expression.
+///
+/// See the module level documentation for more information.
 pub(crate) struct FilterCandidate {
     expr: Arc<dyn PhysicalExpr>,
     required_bytes: usize,
@@ -162,19 +193,55 @@ pub(crate) struct FilterCandidate {
     projection: Vec<usize>,
 }
 
-/// Helper to build a `FilterCandidate`. This will do several things
+/// Helper to build a `FilterCandidate`.
+///
+/// This will do several things
 /// 1. Determine the columns required to evaluate the expression
 /// 2. Calculate data required to estimate the cost of evaluating the filter
-/// 3. Rewrite column expressions in the predicate which reference columns not in the particular file schema.
-///    This is relevant in the case where we have determined the table schema by merging all individual file schemas
-///    and any given file may or may not contain all columns in the merged schema. If a particular column is not present
-///    we replace the column expression with a literal expression that produces a null value.
+/// 3. Rewrite column expressions in the predicate which reference columns not
+///    in the particular file schema.
+///
+/// # Schema Rewrite
+///
+/// When parquet files are read in the context of "schema evolution" there are
+/// potentially wo schemas:
+///
+/// 1. The table schema (the columns of the table that the parquet file is part of)
+/// 2. The file schema (the columns actually in the parquet file)
+///
+/// There are times when the table schema contains columns that are not in the
+/// file schema, such as when new columns have been added in new parquet files
+/// but old files do not have the columns.
+///
+/// When a file is missing a column from the table schema, the value of the
+/// missing column is filled in with `NULL`  via a `SchemaAdapter`.
+///
+/// When a predicate is pushed down to the parquet reader, the predicate is
+/// evaluated in the context of the file schema. If the predicate references a
+/// column that is in the table schema but not in the file schema, the column
+/// reference must be rewritten to a literal expression that represents the
+/// `NULL` value that would be produced by the `SchemaAdapter`.
+///
+/// For example, if:
+/// * The table schema is `id, name, address`
+/// * The file schema is  `id, name` (missing the `address` column)
+/// * predicate is `address = 'foo'`
+///
+/// When evaluating the predicate as a filter on the parquet file, the predicate
+/// must be rewritten to `NULL = 'foo'` as the `address` column will be filled
+/// in with `NULL` values during the rest of the evaluation.
 struct FilterCandidateBuilder<'a> {
     expr: Arc<dyn PhysicalExpr>,
+    /// The schema of this parquet file
     file_schema: &'a Schema,
+    /// The schema of the table (merged schema) -- columns may be in different
+    /// order than in the file and have columns that are not in the file schema
     table_schema: &'a Schema,
     required_column_indices: BTreeSet<usize>,
+    /// Does the expression require any non-primitive columns (like structs)?
     non_primitive_columns: bool,
+    /// Does the expression reference any columns that are in the table
+    /// schema but not in the file schema?
     projected_columns: bool,
 }
 
@@ -194,6 +261,13 @@ impl<'a> FilterCandidateBuilder<'a> {
         }
     }
 
+    /// Attempt to build a `FilterCandidate` from the expression
+    ///
+    /// # Return values
+    ///
+    /// * `Ok(Some(candidate))` if the expression can be used as an ArrowFilter
+    /// * `Ok(None)` if the expression cannot be used as an ArrowFilter
+    /// * `Err(e)` if an error occurs while building the candidate
     pub fn build(
         mut self,
         metadata: &ParquetMetaData,
@@ -217,9 +291,13 @@ impl<'a> FilterCandidateBuilder<'a> {
     }
 }
 
+/// Implement the `TreeNodeRewriter` trait for `FilterCandidateBuilder` that
+/// walks the expression tree and rewrites it in preparation of becoming
+/// `FilterCandidate`.
 impl<'a> TreeNodeRewriter for FilterCandidateBuilder<'a> {
     type Node = Arc<dyn PhysicalExpr>;
 
+    /// Called before visiting each child
     fn f_down(
         &mut self,
         node: Arc<dyn PhysicalExpr>,
@@ -243,13 +321,19 @@ impl<'a> TreeNodeRewriter for FilterCandidateBuilder<'a> {
         Ok(Transformed::no(node))
     }
 
+    /// After visiting all children, rewrite column references to nulls if
+    /// they are not in the file schema
     fn f_up(
         &mut self,
         expr: Arc<dyn PhysicalExpr>,
     ) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
+        // if the expression is a column, is it in the file schema?
         if let Some(column) = expr.as_any().downcast_ref::<Column>() {
             if self.file_schema.field_with_name(column.name()).is_err() {
-                // the column expr must be in the table schema
+                // Replace the column reference with a NULL (using the type from the table schema)
+                // e.g. `column = 'foo'` is rewritten be transformed to `NULL = 'foo'`
+                //
+                // See comments on `FilterCandidateBuilder` for more information
                 return match self.table_schema.field_with_name(column.name()) {
                     Ok(field) => {
                         // return the null value corresponding to the data type
@@ -294,9 +378,11 @@ fn remap_projection(src: &[usize]) -> Vec<usize> {
     projection
 }
 
-/// Calculate the total compressed size of all `Column's required for
-/// predicate `Expr`. This should represent the total amount of file IO
-/// required to evaluate the predicate.
+/// Calculate the total compressed size of all `Column`'s required for
+/// predicate `Expr`.
+///
+/// This value represents the total amount of IO required to evaluate the
+/// predicate.
 fn size_of_columns(
     columns: &BTreeSet<usize>,
     metadata: &ParquetMetaData,
@@ -312,8 +398,10 @@ fn size_of_columns(
     Ok(total_size)
 }
 
-/// For a given set of `Column`s required for predicate `Expr` determine whether all
-/// columns are sorted. Sorted columns may be queried more efficiently in the presence of
+/// For a given set of `Column`s required for predicate `Expr` determine whether
+/// all columns are sorted.
+///
+/// Sorted columns may be queried more efficiently in the presence of
 /// a PageIndex.
 fn columns_sorted(
     _columns: &BTreeSet<usize>,
@@ -323,7 +411,20 @@ fn columns_sorted(
     Ok(false)
 }
 
-/// Build a [`RowFilter`] from the given predicate `Expr`
+/// Build a [`RowFilter`] from the given predicate `Expr` if possible
+///
+/// # returns
+/// * `Ok(Some(row_filter))` if the expression can be used as RowFilter
+/// * `Ok(None)` if the expression cannot be used as an RowFilter
+/// * `Err(e)` if an error occurs while building the filter
+///
+/// Note that the returned `RowFilter` may not contains all conjuncts in the
+/// original expression. This is because some conjuncts may not be able to be
+/// evaluated as an `ArrowPredicate` and will be ignored.
+///
+/// For example, if the expression is `a = 1 AND b = 2 AND c = 3` and `b = 2`
+/// can not be evaluated for some reason, the returned `RowFilter` will contain
+/// `a = 1` and `c = 3`.
 pub fn build_row_filter(
     expr: &Arc<dyn PhysicalExpr>,
     file_schema: &Schema,
@@ -336,8 +437,11 @@ pub fn build_row_filter(
     let rows_filtered = &file_metrics.pushdown_rows_filtered;
     let time = &file_metrics.pushdown_eval_time;
 
+    // Split into conjuncts:
+    // `a = 1 AND b = 2 AND c = 3` -> [`a = 1`, `b = 2`, `c = 3`]
     let predicates = split_conjunction(expr);
 
+    // Determine which conjuncts can be evaluated as ArrowPredicates, if any
     let mut candidates: Vec<FilterCandidate> = predicates
         .into_iter()
         .flat_map(|expr| {
@@ -347,9 +451,11 @@ pub fn build_row_filter(
         })
         .collect();
 
+    // no candidates
     if candidates.is_empty() {
         Ok(None)
     } else if reorder_predicates {
+        // attempt to reorder the predicates by size and whether they are sorted
         candidates.sort_by_key(|c| c.required_bytes);
 
         let (indexed_candidates, other_candidates): (Vec<_>, Vec<_>) =
@@ -385,6 +491,8 @@ pub fn build_row_filter(
 
         Ok(Some(RowFilter::new(filters)))
     } else {
+        // otherwise evaluate the predicates in the order the appeared in the
+        // original expressions
         let mut filters: Vec<Box<dyn ArrowPredicate>> = vec![];
         for candidate in candidates {
             let filter = DatafusionArrowPredicate::try_new(

From b06e8b06b16825cb88050c76cc1536b40ffd466f Mon Sep 17 00:00:00 2001
From: Eduard Karacharov <eduard.karacharov@gmail.com>
Date: Sat, 17 Aug 2024 10:40:24 +0300
Subject: [PATCH 321/357] catalog.has_header true by default (#11919)

---
 datafusion/common/src/config.rs               |  2 +-
 .../common/src/file_options/csv_writer.rs     |  2 +-
 .../core/src/datasource/file_format/csv.rs    | 23 +++++++++++++++++--
 .../tests/user_defined/user_defined_plan.rs   | 13 ++++++++---
 datafusion/sqllogictest/test_files/copy.slt   |  6 ++---
 .../sqllogictest/test_files/csv_files.slt     | 11 +++++----
 datafusion/sqllogictest/test_files/ddl.slt    |  7 ++++--
 .../sqllogictest/test_files/group_by.slt      |  3 ++-
 .../test_files/information_schema.slt         |  4 ++--
 datafusion/sqllogictest/test_files/limit.slt  |  2 +-
 datafusion/sqllogictest/test_files/order.slt  |  6 +++--
 .../sqllogictest/test_files/projection.slt    |  6 +++--
 .../test_files/tpch/create_tables.slt.part    | 16 ++++++-------
 datafusion/sqllogictest/test_files/window.slt |  3 ++-
 docs/source/user-guide/configs.md             |  2 +-
 15 files changed, 71 insertions(+), 35 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index c48845c061e71..37d26c6f00c4a 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -183,7 +183,7 @@ config_namespace! {
 
         /// Default value for `format.has_header` for `CREATE EXTERNAL TABLE`
         /// if not specified explicitly in the statement.
-        pub has_header: bool, default = false
+        pub has_header: bool, default = true
 
         /// Specifies whether newlines in (quoted) CSV values are supported.
         ///
diff --git a/datafusion/common/src/file_options/csv_writer.rs b/datafusion/common/src/file_options/csv_writer.rs
index ae069079a68f8..943288af91642 100644
--- a/datafusion/common/src/file_options/csv_writer.rs
+++ b/datafusion/common/src/file_options/csv_writer.rs
@@ -50,7 +50,7 @@ impl TryFrom<&CsvOptions> for CsvWriterOptions {
 
     fn try_from(value: &CsvOptions) -> Result<Self> {
         let mut builder = WriterBuilder::default()
-            .with_header(value.has_header.unwrap_or(false))
+            .with_header(value.has_header.unwrap_or(true))
             .with_quote(value.quote)
             .with_delimiter(value.delimiter);
 
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index c55f678aef0fb..24d55ea54068a 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -369,7 +369,7 @@ impl FileFormat for CsvFormat {
     async fn create_writer_physical_plan(
         &self,
         input: Arc<dyn ExecutionPlan>,
-        _state: &SessionState,
+        state: &SessionState,
         conf: FileSinkConfig,
         order_requirements: Option<Vec<PhysicalSortRequirement>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
@@ -377,7 +377,26 @@ impl FileFormat for CsvFormat {
             return not_impl_err!("Overwrites are not implemented yet for CSV");
         }
 
-        let writer_options = CsvWriterOptions::try_from(&self.options)?;
+        // `has_header` and `newlines_in_values` fields of CsvOptions may inherit
+        // their values from session from configuration settings. To support
+        // this logic, writer options are built from the copy of `self.options`
+        // with updated values of these special fields.
+        let has_header = self
+            .options()
+            .has_header
+            .unwrap_or(state.config_options().catalog.has_header);
+        let newlines_in_values = self
+            .options()
+            .newlines_in_values
+            .unwrap_or(state.config_options().catalog.newlines_in_values);
+
+        let options = self
+            .options()
+            .clone()
+            .with_has_header(has_header)
+            .with_newlines_in_values(newlines_in_values);
+
+        let writer_options = CsvWriterOptions::try_from(&options)?;
 
         let sink_schema = conf.output_schema().clone();
         let sink = Arc::new(CsvSink::new(conf, writer_options));
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index 1aa33fc75e5d6..62ba113da0d30 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -113,7 +113,11 @@ async fn exec_sql(ctx: &SessionContext, sql: &str) -> Result<String> {
 
 /// Create a test table.
 async fn setup_table(ctx: SessionContext) -> Result<SessionContext> {
-    let sql = "CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT) STORED AS CSV location 'tests/data/customer.csv'";
+    let sql = "
+        CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT)
+        STORED AS CSV location 'tests/data/customer.csv'
+        OPTIONS('format.has_header' 'false')
+    ";
 
     let expected = vec!["++", "++"];
 
@@ -125,8 +129,11 @@ async fn setup_table(ctx: SessionContext) -> Result<SessionContext> {
 }
 
 async fn setup_table_without_schemas(ctx: SessionContext) -> Result<SessionContext> {
-    let sql =
-        "CREATE EXTERNAL TABLE sales STORED AS CSV location 'tests/data/customer.csv'";
+    let sql = "
+        CREATE EXTERNAL TABLE sales
+        STORED AS CSV location 'tests/data/customer.csv'
+        OPTIONS('format.has_header' 'false')
+    ";
 
     let expected = vec!["++", "++"];
 
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index ebb3ca2173b83..d2a3a214d71e0 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -417,7 +417,7 @@ COPY source_table  to 'test_files/scratch/copy/table_csv' STORED AS CSV OPTIONS
 
 # validate folder of csv files
 statement ok
-CREATE EXTERNAL TABLE validate_csv STORED AS csv LOCATION 'test_files/scratch/copy/table_csv' OPTIONS ('format.compression' 'gzip');
+CREATE EXTERNAL TABLE validate_csv STORED AS csv LOCATION 'test_files/scratch/copy/table_csv' OPTIONS ('format.has_header' false, 'format.compression' gzip);
 
 query IT
 select * from validate_csv;
@@ -427,7 +427,7 @@ select * from validate_csv;
 
 # Copy from table to single csv
 query I
-COPY source_table  to 'test_files/scratch/copy/table.csv';
+COPY source_table  to 'test_files/scratch/copy/table.csv' OPTIONS ('format.has_header' false);
 ----
 2
 
@@ -478,7 +478,7 @@ query I
 COPY source_table
 to 'test_files/scratch/copy/table_csv_with_options'
 STORED AS CSV OPTIONS (
-'format.has_header' false,
+'format.has_header' true,
 'format.compression' uncompressed,
 'format.datetime_format' '%FT%H:%M:%S.%9f',
 'format.delimiter' ';',
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
index 3fb9a6f20c24a..7cb21abdba10e 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -117,14 +117,14 @@ CREATE TABLE src_table_2 (
 
 query I
 COPY  src_table_1 TO 'test_files/scratch/csv_files/csv_partitions/1.csv'
-STORED AS CSV;
+STORED AS CSV OPTIONS ('format.has_header' 'false');
 ----
 4
 
 
 query I
 COPY  src_table_2 TO 'test_files/scratch/csv_files/csv_partitions/2.csv'
-STORED AS CSV;
+STORED AS CSV OPTIONS ('format.has_header' 'false');
 ----
 4
 
@@ -210,7 +210,7 @@ COPY (VALUES
   ('#second line is a comment'),
   ('2,3'))
 TO 'test_files/scratch/csv_files/file_with_comments.csv'
-OPTIONS ('format.delimiter' '|');
+OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE stored_table_with_comments (
@@ -219,7 +219,8 @@ CREATE EXTERNAL TABLE stored_table_with_comments (
 ) STORED AS CSV
 LOCATION 'test_files/scratch/csv_files/file_with_comments.csv'
 OPTIONS ('format.comment' '#',
-         'format.delimiter' ',');
+         'format.delimiter' ',',
+         'format.has_header' 'false');
 
 query TT
 SELECT * from stored_table_with_comments;
@@ -315,7 +316,7 @@ col1 TEXT,
 col2 TEXT
 ) STORED AS CSV
 LOCATION '../core/tests/data/newlines_in_values.csv'
-OPTIONS ('format.newlines_in_values' 'true');
+OPTIONS ('format.newlines_in_values' 'true', 'format.has_header' 'false');
 
 query TT
 select * from stored_table_with_newlines_in_values_safe;
diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt
index a35e688479e75..7164425fc0f59 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -470,7 +470,9 @@ statement ok
 CREATE EXTERNAL TABLE csv_with_timestamps (
 name VARCHAR,
 ts TIMESTAMP
-) STORED AS CSV LOCATION '../core/tests/data/timestamps.csv';
+) STORED AS CSV
+LOCATION '../core/tests/data/timestamps.csv'
+OPTIONS('format.has_header' 'false');
 
 query TP
 SELECT * from csv_with_timestamps
@@ -496,7 +498,8 @@ CREATE EXTERNAL TABLE csv_with_timestamps (
 )
 STORED AS CSV
 PARTITIONED BY (c_date)
-LOCATION '../core/tests/data/partitioned_table';
+LOCATION '../core/tests/data/partitioned_table'
+OPTIONS('format.has_header' 'false');
 
 query TPD
 SELECT * from csv_with_timestamps where c_date='2018-11-13'
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 5571315e2accd..3d78bd06c30b8 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -4264,7 +4264,8 @@ CREATE EXTERNAL TABLE csv_with_timestamps (
 )
 STORED AS CSV
 WITH ORDER (ts DESC)
-LOCATION '../core/tests/data/timestamps.csv';
+LOCATION '../core/tests/data/timestamps.csv'
+OPTIONS('format.has_header' 'false');
 
 # below query should run since it operates on a bounded source and have a sort
 # at the top of its plan.
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index ff793a72fd8a5..efd017a90bc4d 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -165,7 +165,7 @@ datafusion.catalog.create_default_catalog_and_schema true
 datafusion.catalog.default_catalog datafusion
 datafusion.catalog.default_schema public
 datafusion.catalog.format NULL
-datafusion.catalog.has_header false
+datafusion.catalog.has_header true
 datafusion.catalog.information_schema true
 datafusion.catalog.location NULL
 datafusion.catalog.newlines_in_values false
@@ -255,7 +255,7 @@ datafusion.catalog.create_default_catalog_and_schema true Whether the default ca
 datafusion.catalog.default_catalog datafusion The default catalog name - this impacts what SQL queries use if not specified
 datafusion.catalog.default_schema public The default schema name - this impacts what SQL queries use if not specified
 datafusion.catalog.format NULL Type of `TableProvider` to use when loading `default` schema
-datafusion.catalog.has_header false Default value for `format.has_header` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement.
+datafusion.catalog.has_header true Default value for `format.has_header` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement.
 datafusion.catalog.information_schema true Should DataFusion provide access to `information_schema` virtual tables for displaying schema information
 datafusion.catalog.location NULL Location scanned to load tables for `default` schema
 datafusion.catalog.newlines_in_values false Specifies whether newlines in (quoted) CSV values are supported. This is the default value for `format.newlines_in_values` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement. Parsing newlines in quoted values may be affected by execution behaviour such as parallel file scanning. Setting this to `true` ensures that newlines in values are parsed successfully, which may reduce performance.
diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt
index 439df7fede511..7341a9d43bac6 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -521,7 +521,7 @@ drop table aggregate_test_100;
 query I
 COPY (select * from (values
    (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')
-)) TO 'test_files/scratch/limit/data.csv' STORED AS CSV;
+)) TO 'test_files/scratch/limit/data.csv' STORED AS CSV OPTIONS ('format.has_header' 'false');
 ----
 5
 
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index 569602166b389..f0151417e5553 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -98,7 +98,8 @@ NULL three
 
 statement ok
 CREATE EXTERNAL TABLE test (c1 int, c2 bigint, c3 boolean)
-STORED AS CSV LOCATION '../core/tests/data/partitioned_csv';
+STORED AS CSV LOCATION '../core/tests/data/partitioned_csv'
+OPTIONS('format.has_header' 'false');
 
 # Demonstrate types
 query TTT
@@ -463,7 +464,8 @@ CREATE EXTERNAL TABLE csv_with_timestamps (
 )
 STORED AS CSV
 WITH ORDER (ts ASC NULLS LAST)
-LOCATION '../core/tests/data/timestamps.csv';
+LOCATION '../core/tests/data/timestamps.csv'
+OPTIONS('format.has_header' 'false');
 
 query TT
 EXPLAIN SELECT DATE_BIN(INTERVAL '15 minutes', ts, TIMESTAMP '2022-08-03 14:40:00Z') as db15
diff --git a/datafusion/sqllogictest/test_files/projection.slt b/datafusion/sqllogictest/test_files/projection.slt
index 3c8855e347121..b5bcb5b4c6f77 100644
--- a/datafusion/sqllogictest/test_files/projection.slt
+++ b/datafusion/sqllogictest/test_files/projection.slt
@@ -64,11 +64,13 @@ CREATE TABLE cpu_load_short(host STRING NOT NULL) AS VALUES
 
 statement ok
 CREATE EXTERNAL TABLE test (c1 int, c2 bigint, c3 boolean)
-STORED AS CSV LOCATION '../core/tests/data/partitioned_csv';
+STORED AS CSV LOCATION '../core/tests/data/partitioned_csv'
+OPTIONS('format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE test_simple (c1 int, c2 bigint, c3 boolean)
-STORED AS CSV LOCATION '../core/tests/data/partitioned_csv/partition-0.csv';
+STORED AS CSV LOCATION '../core/tests/data/partitioned_csv/partition-0.csv'
+OPTIONS('format.has_header' 'false');
 
 # projection same fields
 query I rowsort
diff --git a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
index 75bcbc198bef8..d6249cb579902 100644
--- a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
@@ -31,7 +31,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS supplier (
         s_acctbal DECIMAL(15, 2),
         s_comment VARCHAR,
         s_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/supplier.tbl' OPTIONS ('format.delimiter' '|');
+) STORED AS CSV LOCATION 'test_files/tpch/data/supplier.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS part (
@@ -45,7 +45,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS part (
         p_retailprice DECIMAL(15, 2),
         p_comment VARCHAR,
         p_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/part.tbl' OPTIONS ('format.delimiter' '|');
+) STORED AS CSV LOCATION 'test_files/tpch/data/part.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 
 statement ok
@@ -56,7 +56,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS partsupp (
         ps_supplycost DECIMAL(15, 2),
         ps_comment VARCHAR,
         ps_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/partsupp.tbl' OPTIONS ('format.delimiter' '|');
+) STORED AS CSV LOCATION 'test_files/tpch/data/partsupp.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS customer (
@@ -69,7 +69,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS customer (
         c_mktsegment VARCHAR,
         c_comment VARCHAR,
         c_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/customer.tbl' OPTIONS ('format.delimiter' '|');
+) STORED AS CSV LOCATION 'test_files/tpch/data/customer.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS orders (
@@ -83,7 +83,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS orders (
         o_shippriority INTEGER,
         o_comment VARCHAR,
         o_rev VARCHAR,
-)  STORED AS CSV LOCATION 'test_files/tpch/data/orders.tbl' OPTIONS ('format.delimiter' '|');
+)  STORED AS CSV LOCATION 'test_files/tpch/data/orders.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
@@ -104,7 +104,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
         l_shipmode VARCHAR,
         l_comment VARCHAR,
         l_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/lineitem.tbl' OPTIONS ('format.delimiter' '|');
+) STORED AS CSV LOCATION 'test_files/tpch/data/lineitem.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS nation (
@@ -113,7 +113,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS nation (
         n_regionkey BIGINT,
         n_comment VARCHAR,
         n_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/nation.tbl' OPTIONS ('format.delimiter' '|');
+) STORED AS CSV LOCATION 'test_files/tpch/data/nation.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS region (
@@ -121,4 +121,4 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
         r_name VARCHAR,
         r_comment VARCHAR,
         r_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|');
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index ddf6a7aabffc3..f56ac414a3023 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -49,7 +49,8 @@ OPTIONS ('format.has_header' 'true');
 ### execute_with_partition with 4 partitions
 statement ok
 CREATE EXTERNAL TABLE test (c1 int, c2 bigint, c3 boolean)
-STORED AS CSV LOCATION '../core/tests/data/partitioned_csv';
+STORED AS CSV LOCATION '../core/tests/data/partitioned_csv'
+OPTIONS('format.has_header' 'false');
 
 
 # for window functions without order by the first, last, and nth function call does not make sense
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 6f315f539b118..4255307781b6c 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -43,7 +43,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.catalog.information_schema                                   | false                     | Should DataFusion provide access to `information_schema` virtual tables for displaying schema information                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | datafusion.catalog.location                                             | NULL                      | Location scanned to load tables for `default` schema                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.catalog.format                                               | NULL                      | Type of `TableProvider` to use when loading `default` schema                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| datafusion.catalog.has_header                                           | false                     | Default value for `format.has_header` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.catalog.has_header                                           | true                      | Default value for `format.has_header` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
 | datafusion.catalog.newlines_in_values                                   | false                     | Specifies whether newlines in (quoted) CSV values are supported. This is the default value for `format.newlines_in_values` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement. Parsing newlines in quoted values may be affected by execution behaviour such as parallel file scanning. Setting this to `true` ensures that newlines in values are parsed successfully, which may reduce performance.                                                                                                                                                                             |
 | datafusion.execution.batch_size                                         | 8192                      | Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.execution.coalesce_batches                                   | true                      | When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting                                                                                                                                                                                                                                                                                              |

From 72b6a49e7420fe2750bda88fabea90995079de9f Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Sat, 17 Aug 2024 17:46:09 +0800
Subject: [PATCH 322/357] feat: Add map_extract module and function (#11969)

* feat: Add map_extract module and function

* chore: Fix fmt

* chore: Add tests

* chore: Simplify

* chore: Simplify

* chore: Fix clippy

* doc: Add user doc

* feat: use Signature::user_defined

* chore: Update tests

* chore: Fix fmt

* chore: Fix clippy

* chore

* chore: typo

* chore: Check args len in return_type

* doc: Update doc

* chore: Simplify logic

* chore: check args earlier

* feat: Support UTF8VIEW

* chore: Update doc

* chore: Fic clippy

* refacotr: Use MutableArrayData

* chore

* refactor: Avoid type conversion

* chore: Fix clippy

* chore: Follow DuckDB

* Update datafusion/functions-nested/src/map_extract.rs

Co-authored-by: Jay Zhan <jayzhan211@gmail.com>

* chore: Fix fmt

---------

Co-authored-by: Jay Zhan <jayzhan211@gmail.com>
---
 datafusion/common/src/utils/mod.rs            |  17 +-
 datafusion/functions-nested/src/lib.rs        |   3 +
 .../functions-nested/src/map_extract.rs       | 173 ++++++++++++++++++
 datafusion/sqllogictest/test_files/map.slt    |  81 ++++++++
 .../source/user-guide/sql/scalar_functions.md |  29 +++
 5 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 datafusion/functions-nested/src/map_extract.rs

diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index bf506c0551eb6..d7059e882e555 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -34,7 +34,7 @@ use arrow_array::{
     Array, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait,
     RecordBatchOptions,
 };
-use arrow_schema::DataType;
+use arrow_schema::{DataType, Fields};
 use sqlparser::ast::Ident;
 use sqlparser::dialect::GenericDialect;
 use sqlparser::parser::Parser;
@@ -753,6 +753,21 @@ pub fn combine_limit(
     (combined_skip, combined_fetch)
 }
 
+pub fn get_map_entry_field(data_type: &DataType) -> Result<&Fields> {
+    match data_type {
+        DataType::Map(field, _) => {
+            let field_data_type = field.data_type();
+            match field_data_type {
+                DataType::Struct(fields) => Ok(fields),
+                _ => {
+                    _internal_err!("Expected a Struct type, got {:?}", field_data_type)
+                }
+            }
+        }
+        _ => _internal_err!("Expected a Map type, got {:?}", data_type),
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::ScalarValue::Null;
diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs
index ef2c5e709bc16..cc0a7b55cf866 100644
--- a/datafusion/functions-nested/src/lib.rs
+++ b/datafusion/functions-nested/src/lib.rs
@@ -42,6 +42,7 @@ pub mod flatten;
 pub mod length;
 pub mod make_array;
 pub mod map;
+pub mod map_extract;
 pub mod planner;
 pub mod position;
 pub mod range;
@@ -81,6 +82,7 @@ pub mod expr_fn {
     pub use super::flatten::flatten;
     pub use super::length::array_length;
     pub use super::make_array::make_array;
+    pub use super::map_extract::map_extract;
     pub use super::position::array_position;
     pub use super::position::array_positions;
     pub use super::range::gen_series;
@@ -143,6 +145,7 @@ pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
         replace::array_replace_all_udf(),
         replace::array_replace_udf(),
         map::map_udf(),
+        map_extract::map_extract_udf(),
     ]
 }
 
diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs
new file mode 100644
index 0000000000000..82f0d8d6c15e4
--- /dev/null
+++ b/datafusion/functions-nested/src/map_extract.rs
@@ -0,0 +1,173 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ScalarUDFImpl`] definitions for map_extract functions.
+
+use arrow::array::{ArrayRef, Capacities, MutableArrayData};
+use arrow_array::{make_array, ListArray};
+
+use arrow::datatypes::DataType;
+use arrow_array::{Array, MapArray};
+use arrow_buffer::OffsetBuffer;
+use arrow_schema::Field;
+use datafusion_common::utils::get_map_entry_field;
+
+use datafusion_common::{cast::as_map_array, exec_err, Result};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::sync::Arc;
+use std::vec;
+
+use crate::utils::make_scalar_function;
+
+// Create static instances of ScalarUDFs for each function
+make_udf_expr_and_func!(
+    MapExtract,
+    map_extract,
+    map key,
+    "Return a list containing the value for a given key or an empty list if the key is not contained in the map.",
+    map_extract_udf
+);
+
+#[derive(Debug)]
+pub(super) struct MapExtract {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl MapExtract {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::user_defined(Volatility::Immutable),
+            aliases: vec![String::from("element_at")],
+        }
+    }
+}
+
+impl ScalarUDFImpl for MapExtract {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "map_extract"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        if arg_types.len() != 2 {
+            return exec_err!("map_extract expects two arguments");
+        }
+        let map_type = &arg_types[0];
+        let map_fields = get_map_entry_field(map_type)?;
+        Ok(DataType::List(Arc::new(Field::new(
+            "item",
+            map_fields.last().unwrap().data_type().clone(),
+            true,
+        ))))
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        make_scalar_function(map_extract_inner)(args)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        if arg_types.len() != 2 {
+            return exec_err!("map_extract expects two arguments");
+        }
+
+        let field = get_map_entry_field(&arg_types[0])?;
+        Ok(vec![
+            arg_types[0].clone(),
+            field.first().unwrap().data_type().clone(),
+        ])
+    }
+}
+
+fn general_map_extract_inner(
+    map_array: &MapArray,
+    query_keys_array: &dyn Array,
+) -> Result<ArrayRef> {
+    let keys = map_array.keys();
+    let mut offsets = vec![0_i32];
+
+    let values = map_array.values();
+    let original_data = values.to_data();
+    let capacity = Capacities::Array(original_data.len());
+
+    let mut mutable =
+        MutableArrayData::with_capacities(vec![&original_data], true, capacity);
+
+    for (row_index, offset_window) in map_array.value_offsets().windows(2).enumerate() {
+        let start = offset_window[0] as usize;
+        let end = offset_window[1] as usize;
+        let len = end - start;
+
+        let query_key = query_keys_array.slice(row_index, 1);
+
+        let value_index =
+            (0..len).find(|&i| keys.slice(start + i, 1).as_ref() == query_key.as_ref());
+
+        match value_index {
+            Some(index) => {
+                mutable.extend(0, start + index, start + index + 1);
+            }
+            None => {
+                mutable.extend_nulls(1);
+            }
+        }
+        offsets.push(offsets[row_index] + 1);
+    }
+
+    let data = mutable.freeze();
+
+    Ok(Arc::new(ListArray::new(
+        Arc::new(Field::new("item", map_array.value_type().clone(), true)),
+        OffsetBuffer::<i32>::new(offsets.into()),
+        Arc::new(make_array(data)),
+        None,
+    )))
+}
+
+fn map_extract_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() != 2 {
+        return exec_err!("map_extract expects two arguments");
+    }
+
+    let map_array = match args[0].data_type() {
+        DataType::Map(_, _) => as_map_array(&args[0])?,
+        _ => return exec_err!("The first argument in map_extract must be a map"),
+    };
+
+    let key_type = map_array.key_type();
+
+    if key_type != args[1].data_type() {
+        return exec_err!(
+            "The key type {} does not match the map key type {}",
+            args[1].data_type(),
+            key_type
+        );
+    }
+
+    general_map_extract_inner(map_array, &args[1])
+}
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index 0dc37c68bca4d..b7a0a74913b06 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -15,6 +15,22 @@
 # specific language governing permissions and limitations
 # under the License.
 
+statement ok
+CREATE TABLE map_array_table_1
+AS VALUES
+  (MAP {1: [1, NULL, 3], 2: [4, NULL, 6], 3: [7, 8, 9]}, 1, 1.0, '1'),
+  (MAP {4: [1, NULL, 3], 5: [4, NULL, 6], 6: [7, 8, 9]}, 5, 5.0, '5'),
+  (MAP {7: [1, NULL, 3], 8: [9, NULL, 6], 9: [7, 8, 9]}, 4, 4.0, '4')
+;
+
+statement ok
+CREATE TABLE map_array_table_2
+AS VALUES
+  (MAP {'1': [1, NULL, 3], '2': [4, NULL, 6], '3': [7, 8, 9]}, 1, 1.0, '1'),
+  (MAP {'4': [1, NULL, 3], '5': [4, NULL, 6], '6': [7, 8, 9]}, 5, 5.0, '5'),
+  (MAP {'7': [1, NULL, 3], '8': [9, NULL, 6], '9': [7, 8, 9]}, 4, 4.0, '4')
+;
+
 statement ok
 CREATE EXTERNAL TABLE data
 STORED AS PARQUET
@@ -493,3 +509,68 @@ select cardinality(map([1, 2, 3], ['a', 'b', 'c'])), cardinality(MAP {'a': 1, 'b
        cardinality(MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} });
 ----
 3 2 0 2
+
+# map_extract
+# key is string
+query ????
+select map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'b'),
+       map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'c'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'd');
+----
+[1] [] [3] []
+
+# key is integer
+query ????
+select map_extract(MAP {1: 1, 2: NULL, 3:3}, 1), map_extract(MAP {1: 1, 2: NULL, 3:3}, 2),
+       map_extract(MAP {1: 1, 2: NULL, 3:3}, 3), map_extract(MAP {1: 1, 2: NULL, 3:3}, 4);
+----
+[1] [] [3] []
+
+# value is list
+query ????
+select map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 1), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 2),
+       map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 3), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 4);
+----
+[[1, 2]] [] [[3]] []
+
+# key in map and query key are different types
+query ?????
+select map_extract(MAP {1: 1, 2: 2, 3:3}, '1'), map_extract(MAP {1: 1, 2: 2, 3:3}, 1.0),
+       map_extract(MAP {1.0: 1, 2: 2, 3:3}, '1'), map_extract(MAP {'1': 1, '2': 2, '3':3}, 1.0),
+       map_extract(MAP {arrow_cast('1', 'Utf8View'): 1, arrow_cast('2', 'Utf8View'): 2, arrow_cast('3', 'Utf8View'):3}, '1');
+----
+[1] [1] [1] [] [1]
+
+# map_extract with columns
+query ???
+select map_extract(column1, 1), map_extract(column1, 5), map_extract(column1, 7) from map_array_table_1;
+----
+[[1, , 3]] [] []
+[] [[4, , 6]] []
+[] [] [[1, , 3]]
+
+query ???
+select map_extract(column1, column2), map_extract(column1, column3), map_extract(column1, column4) from map_array_table_1;
+----
+[[1, , 3]] [[1, , 3]] [[1, , 3]]
+[[4, , 6]] [[4, , 6]] [[4, , 6]]
+[] [] []
+
+query ???
+select map_extract(column1, column2), map_extract(column1, column3), map_extract(column1, column4) from map_array_table_2;
+----
+[[1, , 3]] [] [[1, , 3]]
+[[4, , 6]] [] [[4, , 6]]
+[] [] []
+
+query ???
+select map_extract(column1, 1), map_extract(column1, 5), map_extract(column1, 7) from map_array_table_2;
+----
+[[1, , 3]] [] []
+[] [[4, , 6]] []
+[] [] [[1, , 3]]
+
+statement ok
+drop table map_array_table_1;
+
+statement ok
+drop table map_array_table_2;
\ No newline at end of file
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index c7490df04983e..c7b3409ba7cd2 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -3640,6 +3640,7 @@ Unwraps struct fields into columns.
 
 - [map](#map)
 - [make_map](#make_map)
+- [map_extract](#map_extract)
 
 ### `map`
 
@@ -3700,6 +3701,34 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null);
 {POST: 41, HEAD: 33, PATCH: }
 ```
 
+### `map_extract`
+
+Return a list containing the value for a given key or an empty list if the key is not contained in the map.
+
+```
+map_extract(map, key)
+```
+
+#### Arguments
+
+- `map`: Map expression.
+  Can be a constant, column, or function, and any combination of map operators.
+- `key`: Key to extract from the map.
+  Can be a constant, column, or function, any combination of arithmetic or
+  string operators, or a named expression of previous listed.
+
+#### Example
+
+```
+SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a');
+----
+[1]
+```
+
+#### Aliases
+
+- element_at
+
 ## Hashing Functions
 
 - [digest](#digest)

From e4bc62247c63e857432710f2f4d50ba1de5dc960 Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Sat, 17 Aug 2024 17:48:28 +0800
Subject: [PATCH 323/357] refactor:  Move `LimitedDistinctAggregation` to
 `physical-optimizer` crate (#12036)

* refactor:  Move LimitedDistinctAggregation to physical-optimizer crate

* chore: Update cargo.lock

* chore: Fix clippy

* Update datafusion/physical-optimizer/src/limited_distinct_aggregation.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* chore: Clean import

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion-cli/Cargo.lock                     |  89 +--
 .../limited_distinct_aggregation.rs           | 611 ------------------
 datafusion/core/src/physical_optimizer/mod.rs |   1 -
 .../limited_distinct_aggregation.rs           | 440 +++++++++++++
 .../core/tests/physical_optimizer/mod.rs      |   2 +
 .../tests/physical_optimizer/test_util.rs     |  57 ++
 datafusion/physical-optimizer/Cargo.toml      |   1 +
 datafusion/physical-optimizer/src/lib.rs      |   1 +
 .../src/limited_distinct_aggregation.rs       | 192 ++++++
 9 files changed, 742 insertions(+), 652 deletions(-)
 delete mode 100644 datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
 create mode 100644 datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
 create mode 100644 datafusion/core/tests/physical_optimizer/test_util.rs
 create mode 100644 datafusion/physical-optimizer/src/limited_distinct_aggregation.rs

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 90995c1d116ae..22f889de804bb 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -272,7 +272,7 @@ dependencies = [
  "arrow-schema",
  "chrono",
  "half",
- "indexmap 2.3.0",
+ "indexmap 2.4.0",
  "lexical-core",
  "num",
  "serde",
@@ -875,12 +875,13 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.10"
+version = "1.1.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292"
+checksum = "72db2f7947ecee9b03b510377e8bb9077afa27176fdbff55c51027e976fdcc48"
 dependencies = [
  "jobserver",
  "libc",
+ "shlex",
 ]
 
 [[package]]
@@ -1163,7 +1164,7 @@ dependencies = [
  "glob",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.3.0",
+ "indexmap 2.4.0",
  "itertools 0.12.1",
  "log",
  "num-traits",
@@ -1386,7 +1387,7 @@ dependencies = [
  "datafusion-expr",
  "datafusion-physical-expr",
  "hashbrown 0.14.5",
- "indexmap 2.3.0",
+ "indexmap 2.4.0",
  "itertools 0.12.1",
  "log",
  "paste",
@@ -1415,7 +1416,7 @@ dependencies = [
  "half",
  "hashbrown 0.14.5",
  "hex",
- "indexmap 2.3.0",
+ "indexmap 2.4.0",
  "itertools 0.12.1",
  "log",
  "paste",
@@ -1457,6 +1458,7 @@ dependencies = [
  "datafusion-execution",
  "datafusion-physical-expr",
  "datafusion-physical-plan",
+ "itertools 0.12.1",
 ]
 
 [[package]]
@@ -1483,7 +1485,7 @@ dependencies = [
  "futures",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.3.0",
+ "indexmap 2.4.0",
  "itertools 0.12.1",
  "log",
  "once_cell",
@@ -1848,7 +1850,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "http 0.2.12",
- "indexmap 2.3.0",
+ "indexmap 2.4.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -1867,7 +1869,7 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "http 1.1.0",
- "indexmap 2.3.0",
+ "indexmap 2.4.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -2159,9 +2161,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.3.0"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0"
+checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c"
 dependencies = [
  "equivalent",
  "hashbrown 0.14.5",
@@ -2226,9 +2228,9 @@ dependencies = [
 
 [[package]]
 name = "js-sys"
-version = "0.3.69"
+version = "0.3.70"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
+checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a"
 dependencies = [
  "wasm-bindgen",
 ]
@@ -2305,9 +2307,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.155"
+version = "0.2.156"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+checksum = "a5f43f184355eefb8d17fc948dbecf6c13be3c141f20d834ae842193a448c72a"
 
 [[package]]
 name = "libflate"
@@ -2729,7 +2731,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
 dependencies = [
  "fixedbitset",
- "indexmap 2.3.0",
+ "indexmap 2.4.0",
 ]
 
 [[package]]
@@ -3388,18 +3390,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 
 [[package]]
 name = "serde"
-version = "1.0.207"
+version = "1.0.208"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2"
+checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.207"
+version = "1.0.208"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e"
+checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3408,9 +3410,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.124"
+version = "1.0.125"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d"
+checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed"
 dependencies = [
  "itoa",
  "memchr",
@@ -3441,6 +3443,12 @@ dependencies = [
  "digest",
 ]
 
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
 [[package]]
 name = "signal-hook-registry"
 version = "1.4.2"
@@ -3847,15 +3855,15 @@ dependencies = [
 
 [[package]]
 name = "tower-layer"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
 
 [[package]]
 name = "tower-service"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
 
 [[package]]
 name = "tracing"
@@ -4057,19 +4065,20 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
+checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5"
 dependencies = [
  "cfg-if",
+ "once_cell",
  "wasm-bindgen-macro",
 ]
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
+checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b"
 dependencies = [
  "bumpalo",
  "log",
@@ -4082,9 +4091,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.42"
+version = "0.4.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0"
+checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -4094,9 +4103,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
+checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -4104,9 +4113,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
+checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4117,9 +4126,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
+checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484"
 
 [[package]]
 name = "wasm-streams"
@@ -4136,9 +4145,9 @@ dependencies = [
 
 [[package]]
 name = "web-sys"
-version = "0.3.69"
+version = "0.3.70"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef"
+checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
diff --git a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
deleted file mode 100644
index b181ad9051edd..0000000000000
--- a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
+++ /dev/null
@@ -1,611 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! A special-case optimizer rule that pushes limit into a grouped aggregation
-//! which has no aggregate expressions or sorting requirements
-
-use std::sync::Arc;
-
-use crate::physical_plan::aggregates::AggregateExec;
-use crate::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
-
-use datafusion_common::config::ConfigOptions;
-use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::Result;
-
-use datafusion_physical_optimizer::PhysicalOptimizerRule;
-use itertools::Itertools;
-
-/// An optimizer rule that passes a `limit` hint into grouped aggregations which don't require all
-/// rows in the group to be processed for correctness. Example queries fitting this description are:
-/// `SELECT distinct l_orderkey FROM lineitem LIMIT 10;`
-/// `SELECT l_orderkey FROM lineitem GROUP BY l_orderkey LIMIT 10;`
-pub struct LimitedDistinctAggregation {}
-
-impl LimitedDistinctAggregation {
-    /// Create a new `LimitedDistinctAggregation`
-    pub fn new() -> Self {
-        Self {}
-    }
-
-    fn transform_agg(
-        aggr: &AggregateExec,
-        limit: usize,
-    ) -> Option<Arc<dyn ExecutionPlan>> {
-        // rules for transforming this Aggregate are held in this method
-        if !aggr.is_unordered_unfiltered_group_by_distinct() {
-            return None;
-        }
-
-        // We found what we want: clone, copy the limit down, and return modified node
-        let new_aggr = AggregateExec::try_new(
-            *aggr.mode(),
-            aggr.group_expr().clone(),
-            aggr.aggr_expr().to_vec(),
-            aggr.filter_expr().to_vec(),
-            aggr.input().clone(),
-            aggr.input_schema(),
-        )
-        .expect("Unable to copy Aggregate!")
-        .with_limit(Some(limit));
-        Some(Arc::new(new_aggr))
-    }
-
-    /// transform_limit matches an `AggregateExec` as the child of a `LocalLimitExec`
-    /// or `GlobalLimitExec` and pushes the limit into the aggregation as a soft limit when
-    /// there is a group by, but no sorting, no aggregate expressions, and no filters in the
-    /// aggregation
-    fn transform_limit(plan: Arc<dyn ExecutionPlan>) -> Option<Arc<dyn ExecutionPlan>> {
-        let limit: usize;
-        let mut global_fetch: Option<usize> = None;
-        let mut global_skip: usize = 0;
-        let children: Vec<Arc<dyn ExecutionPlan>>;
-        let mut is_global_limit = false;
-        if let Some(local_limit) = plan.as_any().downcast_ref::<LocalLimitExec>() {
-            limit = local_limit.fetch();
-            children = local_limit.children().into_iter().cloned().collect();
-        } else if let Some(global_limit) = plan.as_any().downcast_ref::<GlobalLimitExec>()
-        {
-            global_fetch = global_limit.fetch();
-            global_fetch?;
-            global_skip = global_limit.skip();
-            // the aggregate must read at least fetch+skip number of rows
-            limit = global_fetch.unwrap() + global_skip;
-            children = global_limit.children().into_iter().cloned().collect();
-            is_global_limit = true
-        } else {
-            return None;
-        }
-        let child = children.iter().exactly_one().ok()?;
-        // ensure there is no output ordering; can this rule be relaxed?
-        if plan.output_ordering().is_some() {
-            return None;
-        }
-        // ensure no ordering is required on the input
-        if plan.required_input_ordering()[0].is_some() {
-            return None;
-        }
-
-        // if found_match_aggr is true, match_aggr holds a parent aggregation whose group_by
-        // must match that of a child aggregation in order to rewrite the child aggregation
-        let mut match_aggr: Arc<dyn ExecutionPlan> = plan;
-        let mut found_match_aggr = false;
-
-        let mut rewrite_applicable = true;
-        let closure = |plan: Arc<dyn ExecutionPlan>| {
-            if !rewrite_applicable {
-                return Ok(Transformed::no(plan));
-            }
-            if let Some(aggr) = plan.as_any().downcast_ref::<AggregateExec>() {
-                if found_match_aggr {
-                    if let Some(parent_aggr) =
-                        match_aggr.as_any().downcast_ref::<AggregateExec>()
-                    {
-                        if !parent_aggr.group_expr().eq(aggr.group_expr()) {
-                            // a partial and final aggregation with different groupings disqualifies
-                            // rewriting the child aggregation
-                            rewrite_applicable = false;
-                            return Ok(Transformed::no(plan));
-                        }
-                    }
-                }
-                // either we run into an Aggregate and transform it, or disable the rewrite
-                // for subsequent children
-                match Self::transform_agg(aggr, limit) {
-                    None => {}
-                    Some(new_aggr) => {
-                        match_aggr = plan;
-                        found_match_aggr = true;
-                        return Ok(Transformed::yes(new_aggr));
-                    }
-                }
-            }
-            rewrite_applicable = false;
-            Ok(Transformed::no(plan))
-        };
-        let child = child.clone().transform_down(closure).data().ok()?;
-        if is_global_limit {
-            return Some(Arc::new(GlobalLimitExec::new(
-                child,
-                global_skip,
-                global_fetch,
-            )));
-        }
-        Some(Arc::new(LocalLimitExec::new(child, limit)))
-    }
-}
-
-impl Default for LimitedDistinctAggregation {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl PhysicalOptimizerRule for LimitedDistinctAggregation {
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        config: &ConfigOptions,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if config.optimizer.enable_distinct_aggregation_soft_limit {
-            plan.transform_down(|plan| {
-                Ok(
-                    if let Some(plan) =
-                        LimitedDistinctAggregation::transform_limit(plan.clone())
-                    {
-                        Transformed::yes(plan)
-                    } else {
-                        Transformed::no(plan)
-                    },
-                )
-            })
-            .data()
-        } else {
-            Ok(plan)
-        }
-    }
-
-    fn name(&self) -> &str {
-        "LimitedDistinctAggregation"
-    }
-
-    fn schema_check(&self) -> bool {
-        true
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_optimizer::enforce_distribution::tests::{
-        parquet_exec_with_sort, schema, trim_plan_display,
-    };
-    use crate::physical_plan::aggregates::PhysicalGroupBy;
-    use crate::physical_plan::collect;
-    use crate::physical_plan::memory::MemoryExec;
-    use crate::prelude::SessionContext;
-    use crate::test_util::TestAggregate;
-
-    use arrow::array::Int32Array;
-    use arrow::compute::SortOptions;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow::record_batch::RecordBatch;
-    use arrow::util::pretty::pretty_format_batches;
-    use arrow_schema::SchemaRef;
-    use datafusion_execution::config::SessionConfig;
-    use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::{cast, col};
-    use datafusion_physical_expr::{expressions, PhysicalExpr, PhysicalSortExpr};
-    use datafusion_physical_plan::aggregates::AggregateMode;
-    use datafusion_physical_plan::displayable;
-
-    fn mock_data() -> Result<Arc<MemoryExec>> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Int32, true),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![
-                Arc::new(Int32Array::from(vec![
-                    Some(1),
-                    Some(2),
-                    None,
-                    Some(1),
-                    Some(4),
-                    Some(5),
-                ])),
-                Arc::new(Int32Array::from(vec![
-                    Some(1),
-                    None,
-                    Some(6),
-                    Some(2),
-                    Some(8),
-                    Some(9),
-                ])),
-            ],
-        )?;
-
-        Ok(Arc::new(MemoryExec::try_new(
-            &[vec![batch]],
-            Arc::clone(&schema),
-            None,
-        )?))
-    }
-
-    fn assert_plan_matches_expected(
-        plan: &Arc<dyn ExecutionPlan>,
-        expected: &[&str],
-    ) -> Result<()> {
-        let expected_lines: Vec<&str> = expected.to_vec();
-        let session_ctx = SessionContext::new();
-        let state = session_ctx.state();
-
-        let optimized = LimitedDistinctAggregation::new()
-            .optimize(Arc::clone(plan), state.config_options())?;
-
-        let optimized_result = displayable(optimized.as_ref()).indent(true).to_string();
-        let actual_lines = trim_plan_display(&optimized_result);
-
-        assert_eq!(
-            &expected_lines, &actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-
-        Ok(())
-    }
-
-    async fn assert_results_match_expected(
-        plan: Arc<dyn ExecutionPlan>,
-        expected: &str,
-    ) -> Result<()> {
-        let cfg = SessionConfig::new().with_target_partitions(1);
-        let ctx = SessionContext::new_with_config(cfg);
-        let batches = collect(plan, ctx.task_ctx()).await?;
-        let actual = format!("{}", pretty_format_batches(&batches)?);
-        assert_eq!(actual, expected);
-        Ok(())
-    }
-
-    pub fn build_group_by(
-        input_schema: &SchemaRef,
-        columns: Vec<String>,
-    ) -> PhysicalGroupBy {
-        let mut group_by_expr: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![];
-        for column in columns.iter() {
-            group_by_expr.push((col(column, input_schema).unwrap(), column.to_string()));
-        }
-        PhysicalGroupBy::new_single(group_by_expr.clone())
-    }
-
-    #[tokio::test]
-    async fn test_partial_final() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Partial/Final AggregateExec
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],                /* aggr_expr */
-            vec![],                /* filter_expr */
-            Arc::new(partial_agg), /* input */
-            schema.clone(),        /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(final_agg),
-            4, // fetch
-        );
-        // expected to push the limit to the Partial and Final AggregateExecs
-        let expected = [
-            "LocalLimitExec: fetch=4",
-            "AggregateExec: mode=Final, gby=[a@0 as a], aggr=[], lim=[4]",
-            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[], lim=[4]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        let expected = r#"
-+---+
-| a |
-+---+
-| 1 |
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-        .trim();
-        assert_results_match_expected(plan, expected).await?;
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_single_local() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Single AggregateExec
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(single_agg),
-            4, // fetch
-        );
-        // expected to push the limit to the AggregateExec
-        let expected = [
-            "LocalLimitExec: fetch=4",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        let expected = r#"
-+---+
-| a |
-+---+
-| 1 |
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-        .trim();
-        assert_results_match_expected(plan, expected).await?;
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_single_global() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Single AggregateExec
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let limit_exec = GlobalLimitExec::new(
-            Arc::new(single_agg),
-            1,       // skip
-            Some(3), // fetch
-        );
-        // expected to push the skip+fetch limit to the AggregateExec
-        let expected = [
-            "GlobalLimitExec: skip=1, fetch=3",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        let expected = r#"
-+---+
-| a |
-+---+
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-        .trim();
-        assert_results_match_expected(plan, expected).await?;
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_distinct_cols_different_than_group_by_cols() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT distinct a FROM MemoryExec GROUP BY a, b LIMIT 4;`, Single/Single AggregateExec
-        let group_by_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string(), "b".to_string()]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let distinct_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],                 /* aggr_expr */
-            vec![],                 /* filter_expr */
-            Arc::new(group_by_agg), /* input */
-            schema.clone(),         /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(distinct_agg),
-            4, // fetch
-        );
-        // expected to push the limit to the outer AggregateExec only
-        let expected = [
-            "LocalLimitExec: fetch=4",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
-            "AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        let expected = r#"
-+---+
-| a |
-+---+
-| 1 |
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-        .trim();
-        assert_results_match_expected(plan, expected).await?;
-        Ok(())
-    }
-
-    #[test]
-    fn test_no_group_by() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT <aggregate with no expressions> FROM MemoryExec LIMIT 10;`, Single AggregateExec
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec![]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(single_agg),
-            10, // fetch
-        );
-        // expected not to push the limit to the AggregateExec
-        let expected = [
-            "LocalLimitExec: fetch=10",
-            "AggregateExec: mode=Single, gby=[], aggr=[]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        Ok(())
-    }
-
-    #[test]
-    fn test_has_aggregate_expression() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_star();
-
-        // `SELECT <aggregate with no expressions> FROM MemoryExec LIMIT 10;`, Single AggregateExec
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![agg.count_expr(&schema)], /* aggr_expr */
-            vec![None],                    /* filter_expr */
-            source,                        /* input */
-            schema.clone(),                /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(single_agg),
-            10, // fetch
-        );
-        // expected not to push the limit to the AggregateExec
-        let expected = [
-            "LocalLimitExec: fetch=10",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        Ok(())
-    }
-
-    #[test]
-    fn test_has_filter() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT a FROM MemoryExec WHERE a > 1 GROUP BY a LIMIT 10;`, Single AggregateExec
-        // the `a > 1` filter is applied in the AggregateExec
-        let filter_expr = Some(expressions::binary(
-            expressions::col("a", &schema)?,
-            Operator::Gt,
-            cast(expressions::lit(1u32), &schema, DataType::Int32)?,
-            &schema,
-        )?);
-        let agg = TestAggregate::new_count_star();
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![agg.count_expr(&schema)], /* aggr_expr */
-            vec![filter_expr],             /* filter_expr */
-            source,                        /* input */
-            schema.clone(),                /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(single_agg),
-            10, // fetch
-        );
-        // expected not to push the limit to the AggregateExec
-        // TODO(msirek): open an issue for `filter_expr` of `AggregateExec` not printing out
-        let expected = [
-            "LocalLimitExec: fetch=10",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        Ok(())
-    }
-
-    #[test]
-    fn test_has_order_by() -> Result<()> {
-        let sort_key = vec![PhysicalSortExpr {
-            expr: expressions::col("a", &schema()).unwrap(),
-            options: SortOptions::default(),
-        }];
-        let source = parquet_exec_with_sort(vec![sort_key]);
-        let schema = source.schema();
-
-        // `SELECT a FROM MemoryExec WHERE a > 1 GROUP BY a LIMIT 10;`, Single AggregateExec
-        // the `a > 1` filter is applied in the AggregateExec
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(single_agg),
-            10, // fetch
-        );
-        // expected not to push the limit to the AggregateExec
-        let expected = [
-            "LocalLimitExec: fetch=10",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], ordering_mode=Sorted",
-            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        Ok(())
-    }
-}
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index 0e68a05d855c7..c32c77043f150 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -26,7 +26,6 @@ pub mod combine_partial_final_agg;
 pub mod enforce_distribution;
 pub mod enforce_sorting;
 pub mod join_selection;
-pub mod limited_distinct_aggregation;
 pub mod optimizer;
 pub mod projection_pushdown;
 pub mod pruning;
diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
new file mode 100644
index 0000000000000..48389b0304f62
--- /dev/null
+++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
@@ -0,0 +1,440 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for the limited distinct aggregation optimizer rule
+
+use super::test_util::{parquet_exec_with_sort, schema, trim_plan_display};
+
+use std::sync::Arc;
+
+use arrow::{
+    array::Int32Array,
+    compute::SortOptions,
+    datatypes::{DataType, Field, Schema},
+    record_batch::RecordBatch,
+    util::pretty::pretty_format_batches,
+};
+use arrow_schema::SchemaRef;
+use datafusion::{prelude::SessionContext, test_util::TestAggregate};
+use datafusion_common::Result;
+use datafusion_execution::config::SessionConfig;
+use datafusion_expr::Operator;
+use datafusion_physical_expr::{
+    expressions::{cast, col},
+    PhysicalExpr, PhysicalSortExpr,
+};
+use datafusion_physical_optimizer::{
+    limited_distinct_aggregation::LimitedDistinctAggregation, PhysicalOptimizerRule,
+};
+use datafusion_physical_plan::{
+    aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy},
+    collect, displayable, expressions,
+    limit::{GlobalLimitExec, LocalLimitExec},
+    memory::MemoryExec,
+    ExecutionPlan,
+};
+
+fn mock_data() -> Result<Arc<MemoryExec>> {
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Int32, true),
+        Field::new("b", DataType::Int32, true),
+    ]));
+
+    let batch = RecordBatch::try_new(
+        Arc::clone(&schema),
+        vec![
+            Arc::new(Int32Array::from(vec![
+                Some(1),
+                Some(2),
+                None,
+                Some(1),
+                Some(4),
+                Some(5),
+            ])),
+            Arc::new(Int32Array::from(vec![
+                Some(1),
+                None,
+                Some(6),
+                Some(2),
+                Some(8),
+                Some(9),
+            ])),
+        ],
+    )?;
+
+    Ok(Arc::new(MemoryExec::try_new(
+        &[vec![batch]],
+        Arc::clone(&schema),
+        None,
+    )?))
+}
+
+fn assert_plan_matches_expected(
+    plan: &Arc<dyn ExecutionPlan>,
+    expected: &[&str],
+) -> Result<()> {
+    let expected_lines: Vec<&str> = expected.to_vec();
+    let session_ctx = SessionContext::new();
+    let state = session_ctx.state();
+
+    let optimized = LimitedDistinctAggregation::new()
+        .optimize(Arc::clone(plan), state.config_options())?;
+
+    let optimized_result = displayable(optimized.as_ref()).indent(true).to_string();
+    let actual_lines = trim_plan_display(&optimized_result);
+
+    assert_eq!(
+        &expected_lines, &actual_lines,
+        "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+        expected_lines, actual_lines
+    );
+
+    Ok(())
+}
+
+async fn assert_results_match_expected(
+    plan: Arc<dyn ExecutionPlan>,
+    expected: &str,
+) -> Result<()> {
+    let cfg = SessionConfig::new().with_target_partitions(1);
+    let ctx = SessionContext::new_with_config(cfg);
+    let batches = collect(plan, ctx.task_ctx()).await?;
+    let actual = format!("{}", pretty_format_batches(&batches)?);
+    assert_eq!(actual, expected);
+    Ok(())
+}
+
+pub fn build_group_by(input_schema: &SchemaRef, columns: Vec<String>) -> PhysicalGroupBy {
+    let mut group_by_expr: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![];
+    for column in columns.iter() {
+        group_by_expr.push((col(column, input_schema).unwrap(), column.to_string()));
+    }
+    PhysicalGroupBy::new_single(group_by_expr.clone())
+}
+
+#[tokio::test]
+async fn test_partial_final() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+
+    // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Partial/Final AggregateExec
+    let partial_agg = AggregateExec::try_new(
+        AggregateMode::Partial,
+        build_group_by(&schema.clone(), vec!["a".to_string()]),
+        vec![],         /* aggr_expr */
+        vec![],         /* filter_expr */
+        source,         /* input */
+        schema.clone(), /* input_schema */
+    )?;
+    let final_agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        build_group_by(&schema.clone(), vec!["a".to_string()]),
+        vec![],                /* aggr_expr */
+        vec![],                /* filter_expr */
+        Arc::new(partial_agg), /* input */
+        schema.clone(),        /* input_schema */
+    )?;
+    let limit_exec = LocalLimitExec::new(
+        Arc::new(final_agg),
+        4, // fetch
+    );
+    // expected to push the limit to the Partial and Final AggregateExecs
+    let expected = [
+        "LocalLimitExec: fetch=4",
+        "AggregateExec: mode=Final, gby=[a@0 as a], aggr=[], lim=[4]",
+        "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[], lim=[4]",
+        "MemoryExec: partitions=1, partition_sizes=[1]",
+    ];
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+    assert_plan_matches_expected(&plan, &expected)?;
+    let expected = r#"
++---+
+| a |
++---+
+| 1 |
+| 2 |
+|   |
+| 4 |
++---+
+"#
+    .trim();
+    assert_results_match_expected(plan, expected).await?;
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_single_local() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+
+    // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Single AggregateExec
+    let single_agg = AggregateExec::try_new(
+        AggregateMode::Single,
+        build_group_by(&schema.clone(), vec!["a".to_string()]),
+        vec![],         /* aggr_expr */
+        vec![],         /* filter_expr */
+        source,         /* input */
+        schema.clone(), /* input_schema */
+    )?;
+    let limit_exec = LocalLimitExec::new(
+        Arc::new(single_agg),
+        4, // fetch
+    );
+    // expected to push the limit to the AggregateExec
+    let expected = [
+        "LocalLimitExec: fetch=4",
+        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
+        "MemoryExec: partitions=1, partition_sizes=[1]",
+    ];
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+    assert_plan_matches_expected(&plan, &expected)?;
+    let expected = r#"
++---+
+| a |
++---+
+| 1 |
+| 2 |
+|   |
+| 4 |
++---+
+"#
+    .trim();
+    assert_results_match_expected(plan, expected).await?;
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_single_global() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+
+    // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Single AggregateExec
+    let single_agg = AggregateExec::try_new(
+        AggregateMode::Single,
+        build_group_by(&schema.clone(), vec!["a".to_string()]),
+        vec![],         /* aggr_expr */
+        vec![],         /* filter_expr */
+        source,         /* input */
+        schema.clone(), /* input_schema */
+    )?;
+    let limit_exec = GlobalLimitExec::new(
+        Arc::new(single_agg),
+        1,       // skip
+        Some(3), // fetch
+    );
+    // expected to push the skip+fetch limit to the AggregateExec
+    let expected = [
+        "GlobalLimitExec: skip=1, fetch=3",
+        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
+        "MemoryExec: partitions=1, partition_sizes=[1]",
+    ];
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+    assert_plan_matches_expected(&plan, &expected)?;
+    let expected = r#"
++---+
+| a |
++---+
+| 2 |
+|   |
+| 4 |
++---+
+"#
+    .trim();
+    assert_results_match_expected(plan, expected).await?;
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_distinct_cols_different_than_group_by_cols() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+
+    // `SELECT distinct a FROM MemoryExec GROUP BY a, b LIMIT 4;`, Single/Single AggregateExec
+    let group_by_agg = AggregateExec::try_new(
+        AggregateMode::Single,
+        build_group_by(&schema.clone(), vec!["a".to_string(), "b".to_string()]),
+        vec![],         /* aggr_expr */
+        vec![],         /* filter_expr */
+        source,         /* input */
+        schema.clone(), /* input_schema */
+    )?;
+    let distinct_agg = AggregateExec::try_new(
+        AggregateMode::Single,
+        build_group_by(&schema.clone(), vec!["a".to_string()]),
+        vec![],                 /* aggr_expr */
+        vec![],                 /* filter_expr */
+        Arc::new(group_by_agg), /* input */
+        schema.clone(),         /* input_schema */
+    )?;
+    let limit_exec = LocalLimitExec::new(
+        Arc::new(distinct_agg),
+        4, // fetch
+    );
+    // expected to push the limit to the outer AggregateExec only
+    let expected = [
+        "LocalLimitExec: fetch=4",
+        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
+        "AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[]",
+        "MemoryExec: partitions=1, partition_sizes=[1]",
+    ];
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+    assert_plan_matches_expected(&plan, &expected)?;
+    let expected = r#"
++---+
+| a |
++---+
+| 1 |
+| 2 |
+|   |
+| 4 |
++---+
+"#
+    .trim();
+    assert_results_match_expected(plan, expected).await?;
+    Ok(())
+}
+
+#[test]
+fn test_no_group_by() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+
+    // `SELECT <aggregate with no expressions> FROM MemoryExec LIMIT 10;`, Single AggregateExec
+    let single_agg = AggregateExec::try_new(
+        AggregateMode::Single,
+        build_group_by(&schema.clone(), vec![]),
+        vec![],         /* aggr_expr */
+        vec![],         /* filter_expr */
+        source,         /* input */
+        schema.clone(), /* input_schema */
+    )?;
+    let limit_exec = LocalLimitExec::new(
+        Arc::new(single_agg),
+        10, // fetch
+    );
+    // expected not to push the limit to the AggregateExec
+    let expected = [
+        "LocalLimitExec: fetch=10",
+        "AggregateExec: mode=Single, gby=[], aggr=[]",
+        "MemoryExec: partitions=1, partition_sizes=[1]",
+    ];
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+    assert_plan_matches_expected(&plan, &expected)?;
+    Ok(())
+}
+
+#[test]
+fn test_has_aggregate_expression() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+    let agg = TestAggregate::new_count_star();
+
+    // `SELECT <aggregate with no expressions> FROM MemoryExec LIMIT 10;`, Single AggregateExec
+    let single_agg = AggregateExec::try_new(
+        AggregateMode::Single,
+        build_group_by(&schema.clone(), vec!["a".to_string()]),
+        vec![agg.count_expr(&schema)], /* aggr_expr */
+        vec![None],                    /* filter_expr */
+        source,                        /* input */
+        schema.clone(),                /* input_schema */
+    )?;
+    let limit_exec = LocalLimitExec::new(
+        Arc::new(single_agg),
+        10, // fetch
+    );
+    // expected not to push the limit to the AggregateExec
+    let expected = [
+        "LocalLimitExec: fetch=10",
+        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]",
+        "MemoryExec: partitions=1, partition_sizes=[1]",
+    ];
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+    assert_plan_matches_expected(&plan, &expected)?;
+    Ok(())
+}
+
+#[test]
+fn test_has_filter() -> Result<()> {
+    let source = mock_data()?;
+    let schema = source.schema();
+
+    // `SELECT a FROM MemoryExec WHERE a > 1 GROUP BY a LIMIT 10;`, Single AggregateExec
+    // the `a > 1` filter is applied in the AggregateExec
+    let filter_expr = Some(expressions::binary(
+        expressions::col("a", &schema)?,
+        Operator::Gt,
+        cast(expressions::lit(1u32), &schema, DataType::Int32)?,
+        &schema,
+    )?);
+    let agg = TestAggregate::new_count_star();
+    let single_agg = AggregateExec::try_new(
+        AggregateMode::Single,
+        build_group_by(&schema.clone(), vec!["a".to_string()]),
+        vec![agg.count_expr(&schema)], /* aggr_expr */
+        vec![filter_expr],             /* filter_expr */
+        source,                        /* input */
+        schema.clone(),                /* input_schema */
+    )?;
+    let limit_exec = LocalLimitExec::new(
+        Arc::new(single_agg),
+        10, // fetch
+    );
+    // expected not to push the limit to the AggregateExec
+    // TODO(msirek): open an issue for `filter_expr` of `AggregateExec` not printing out
+    let expected = [
+        "LocalLimitExec: fetch=10",
+        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]",
+        "MemoryExec: partitions=1, partition_sizes=[1]",
+    ];
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+    assert_plan_matches_expected(&plan, &expected)?;
+    Ok(())
+}
+
+#[test]
+fn test_has_order_by() -> Result<()> {
+    let sort_key = vec![PhysicalSortExpr {
+        expr: expressions::col("a", &schema()).unwrap(),
+        options: SortOptions::default(),
+    }];
+    let source = parquet_exec_with_sort(vec![sort_key]);
+    let schema = source.schema();
+
+    // `SELECT a FROM MemoryExec WHERE a > 1 GROUP BY a LIMIT 10;`, Single AggregateExec
+    // the `a > 1` filter is applied in the AggregateExec
+    let single_agg = AggregateExec::try_new(
+        AggregateMode::Single,
+        build_group_by(&schema.clone(), vec!["a".to_string()]),
+        vec![],         /* aggr_expr */
+        vec![],         /* filter_expr */
+        source,         /* input */
+        schema.clone(), /* input_schema */
+    )?;
+    let limit_exec = LocalLimitExec::new(
+        Arc::new(single_agg),
+        10, // fetch
+    );
+    // expected not to push the limit to the AggregateExec
+    let expected = [
+            "LocalLimitExec: fetch=10",
+            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], ordering_mode=Sorted",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
+        ];
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+    assert_plan_matches_expected(&plan, &expected)?;
+    Ok(())
+}
diff --git a/datafusion/core/tests/physical_optimizer/mod.rs b/datafusion/core/tests/physical_optimizer/mod.rs
index 904a8b9fbb380..149103cf34823 100644
--- a/datafusion/core/tests/physical_optimizer/mod.rs
+++ b/datafusion/core/tests/physical_optimizer/mod.rs
@@ -17,3 +17,5 @@
 
 mod aggregate_statistics;
 mod limit_pushdown;
+mod limited_distinct_aggregation;
+mod test_util;
diff --git a/datafusion/core/tests/physical_optimizer/test_util.rs b/datafusion/core/tests/physical_optimizer/test_util.rs
new file mode 100644
index 0000000000000..131b887c4ec72
--- /dev/null
+++ b/datafusion/core/tests/physical_optimizer/test_util.rs
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Test utilities for physical optimizer tests
+
+use std::sync::Arc;
+
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use datafusion::datasource::{
+    listing::PartitionedFile,
+    physical_plan::{FileScanConfig, ParquetExec},
+};
+use datafusion_execution::object_store::ObjectStoreUrl;
+use datafusion_physical_expr::PhysicalSortExpr;
+
+/// create a single parquet file that is sorted
+pub(crate) fn parquet_exec_with_sort(
+    output_ordering: Vec<Vec<PhysicalSortExpr>>,
+) -> Arc<ParquetExec> {
+    ParquetExec::builder(
+        FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
+            .with_file(PartitionedFile::new("x".to_string(), 100))
+            .with_output_ordering(output_ordering),
+    )
+    .build_arc()
+}
+
+pub(crate) fn schema() -> SchemaRef {
+    Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Int64, true),
+        Field::new("b", DataType::Int64, true),
+        Field::new("c", DataType::Int64, true),
+        Field::new("d", DataType::Int32, true),
+        Field::new("e", DataType::Boolean, true),
+    ]))
+}
+
+pub(crate) fn trim_plan_display(plan: &str) -> Vec<&str> {
+    plan.split('\n')
+        .map(|s| s.trim())
+        .filter(|s| !s.is_empty())
+        .collect()
+}
diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml
index 125ea6acc77fd..a7f675b37a606 100644
--- a/datafusion/physical-optimizer/Cargo.toml
+++ b/datafusion/physical-optimizer/Cargo.toml
@@ -36,3 +36,4 @@ datafusion-common = { workspace = true, default-features = true }
 datafusion-execution = { workspace = true }
 datafusion-physical-expr = { workspace = true }
 datafusion-physical-plan = { workspace = true }
+itertools = { workspace = true }
diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs
index d54e6dbcab8fc..caebdcc927ae9 100644
--- a/datafusion/physical-optimizer/src/lib.rs
+++ b/datafusion/physical-optimizer/src/lib.rs
@@ -19,6 +19,7 @@
 
 pub mod aggregate_statistics;
 pub mod limit_pushdown;
+pub mod limited_distinct_aggregation;
 mod optimizer;
 pub mod output_requirements;
 
diff --git a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
new file mode 100644
index 0000000000000..e18e530072dbb
--- /dev/null
+++ b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
@@ -0,0 +1,192 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! A special-case optimizer rule that pushes limit into a grouped aggregation
+//! which has no aggregate expressions or sorting requirements
+
+use std::sync::Arc;
+
+use datafusion_physical_plan::aggregates::AggregateExec;
+use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
+
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+use datafusion_common::Result;
+
+use crate::PhysicalOptimizerRule;
+use itertools::Itertools;
+
+/// An optimizer rule that passes a `limit` hint into grouped aggregations which don't require all
+/// rows in the group to be processed for correctness. Example queries fitting this description are:
+/// `SELECT distinct l_orderkey FROM lineitem LIMIT 10;`
+/// `SELECT l_orderkey FROM lineitem GROUP BY l_orderkey LIMIT 10;`
+pub struct LimitedDistinctAggregation {}
+
+impl LimitedDistinctAggregation {
+    /// Create a new `LimitedDistinctAggregation`
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    fn transform_agg(
+        aggr: &AggregateExec,
+        limit: usize,
+    ) -> Option<Arc<dyn ExecutionPlan>> {
+        // rules for transforming this Aggregate are held in this method
+        if !aggr.is_unordered_unfiltered_group_by_distinct() {
+            return None;
+        }
+
+        // We found what we want: clone, copy the limit down, and return modified node
+        let new_aggr = AggregateExec::try_new(
+            *aggr.mode(),
+            aggr.group_expr().clone(),
+            aggr.aggr_expr().to_vec(),
+            aggr.filter_expr().to_vec(),
+            aggr.input().to_owned(),
+            aggr.input_schema(),
+        )
+        .expect("Unable to copy Aggregate!")
+        .with_limit(Some(limit));
+        Some(Arc::new(new_aggr))
+    }
+
+    /// transform_limit matches an `AggregateExec` as the child of a `LocalLimitExec`
+    /// or `GlobalLimitExec` and pushes the limit into the aggregation as a soft limit when
+    /// there is a group by, but no sorting, no aggregate expressions, and no filters in the
+    /// aggregation
+    fn transform_limit(plan: Arc<dyn ExecutionPlan>) -> Option<Arc<dyn ExecutionPlan>> {
+        let limit: usize;
+        let mut global_fetch: Option<usize> = None;
+        let mut global_skip: usize = 0;
+        let children: Vec<Arc<dyn ExecutionPlan>>;
+        let mut is_global_limit = false;
+        if let Some(local_limit) = plan.as_any().downcast_ref::<LocalLimitExec>() {
+            limit = local_limit.fetch();
+            children = local_limit.children().into_iter().cloned().collect();
+        } else if let Some(global_limit) = plan.as_any().downcast_ref::<GlobalLimitExec>()
+        {
+            global_fetch = global_limit.fetch();
+            global_fetch?;
+            global_skip = global_limit.skip();
+            // the aggregate must read at least fetch+skip number of rows
+            limit = global_fetch.unwrap() + global_skip;
+            children = global_limit.children().into_iter().cloned().collect();
+            is_global_limit = true
+        } else {
+            return None;
+        }
+        let child = children.iter().exactly_one().ok()?;
+        // ensure there is no output ordering; can this rule be relaxed?
+        if plan.output_ordering().is_some() {
+            return None;
+        }
+        // ensure no ordering is required on the input
+        if plan.required_input_ordering()[0].is_some() {
+            return None;
+        }
+
+        // if found_match_aggr is true, match_aggr holds a parent aggregation whose group_by
+        // must match that of a child aggregation in order to rewrite the child aggregation
+        let mut match_aggr: Arc<dyn ExecutionPlan> = plan;
+        let mut found_match_aggr = false;
+
+        let mut rewrite_applicable = true;
+        let closure = |plan: Arc<dyn ExecutionPlan>| {
+            if !rewrite_applicable {
+                return Ok(Transformed::no(plan));
+            }
+            if let Some(aggr) = plan.as_any().downcast_ref::<AggregateExec>() {
+                if found_match_aggr {
+                    if let Some(parent_aggr) =
+                        match_aggr.as_any().downcast_ref::<AggregateExec>()
+                    {
+                        if !parent_aggr.group_expr().eq(aggr.group_expr()) {
+                            // a partial and final aggregation with different groupings disqualifies
+                            // rewriting the child aggregation
+                            rewrite_applicable = false;
+                            return Ok(Transformed::no(plan));
+                        }
+                    }
+                }
+                // either we run into an Aggregate and transform it, or disable the rewrite
+                // for subsequent children
+                match Self::transform_agg(aggr, limit) {
+                    None => {}
+                    Some(new_aggr) => {
+                        match_aggr = plan;
+                        found_match_aggr = true;
+                        return Ok(Transformed::yes(new_aggr));
+                    }
+                }
+            }
+            rewrite_applicable = false;
+            Ok(Transformed::no(plan))
+        };
+        let child = child.to_owned().transform_down(closure).data().ok()?;
+        if is_global_limit {
+            return Some(Arc::new(GlobalLimitExec::new(
+                child,
+                global_skip,
+                global_fetch,
+            )));
+        }
+        Some(Arc::new(LocalLimitExec::new(child, limit)))
+    }
+}
+
+impl Default for LimitedDistinctAggregation {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl PhysicalOptimizerRule for LimitedDistinctAggregation {
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        config: &ConfigOptions,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        if config.optimizer.enable_distinct_aggregation_soft_limit {
+            plan.transform_down(|plan| {
+                Ok(
+                    if let Some(plan) =
+                        LimitedDistinctAggregation::transform_limit(plan.to_owned())
+                    {
+                        Transformed::yes(plan)
+                    } else {
+                        Transformed::no(plan)
+                    },
+                )
+            })
+            .data()
+        } else {
+            Ok(plan)
+        }
+    }
+
+    fn name(&self) -> &str {
+        "LimitedDistinctAggregation"
+    }
+
+    fn schema_check(&self) -> bool {
+        true
+    }
+}
+
+// See tests in datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs

From bd2d4eeaf39bc4fbc4694aa332c695019847e25b Mon Sep 17 00:00:00 2001
From: jcsherin <jacob@protoship.io>
Date: Sat, 17 Aug 2024 15:19:25 +0530
Subject: [PATCH 324/357] Convert built-in `row_number` to user-defined window
 function (#12030)

* Adds new crate for window functions

* Moves `row_number` to window functions crate

* Fixes build errors

* Regenerates protobuf

* Makes `row_number` no-op temporarily

* Minor: fixes formatting

* Implements `WindowUDF` for `row_number`

* Minor: fixes formatting

* Adds singleton instance of UDWF: `row_number`

* Adds partition evaluator

* Registers default window functions

* Implements `evaluate_all`

* Fixes: allow non-uppercase globals

* Minor: prefix underscore for unused variable

* Minor: fixes formatting

* Uses `row_number_udwf`

* Fixes: unparser test for `row_number`

* Uses row number to represent functional dependency

* Minor: fixes formatting

* Removes `row_number` from case-insensitive name test

* Deletes wrapper for `row_number` window expression

* Fixes: lowercase name in error statement

* Fixes: `row_number` fields are not nullable

* Fixes: lowercase name in explain output

* Updates Cargo.lock

* Fixes: lowercase name in explain output

* Adds support for result ordering

* Minor: add newline between methods

* Fixes: re-export crate name in doc comments

* Adds doc comment for `WindowUDFImpl::nullable`

* Minor: renames variable

* Minor: update doc comments

* Deletes code

* Minor: update doc comments

* Minor: adds period

* Adds doc comment for `row_number` window UDF

* Adds fluent API for creating `row_number` expression

* Minor: removes unnecessary path prefix

* Adds roundtrip logical plan test case

* Updates unit tests for `row_number`

* Deletes code

* Minor: copy edit doc comments

* Minor: deletes comment

* Minor: copy edits udwf doc comments
---
 Cargo.toml                                    |   2 +
 datafusion-cli/Cargo.lock                     |  11 ++
 datafusion/core/Cargo.toml                    |   1 +
 .../core/src/execution/session_state.rs       |   1 +
 .../src/execution/session_state_defaults.rs   |   9 +-
 datafusion/core/src/lib.rs                    |   5 +
 .../core/tests/fuzz_cases/window_fuzz.rs      |  13 +-
 .../expr/src/built_in_window_function.rs      |  10 +-
 datafusion/expr/src/expr.rs                   |   1 -
 datafusion/expr/src/logical_plan/plan.rs      |  14 +-
 datafusion/expr/src/udwf.rs                   |  34 ++++
 datafusion/expr/src/window_function.rs        |   8 -
 datafusion/functions-window/Cargo.toml        |  47 +++++
 datafusion/functions-window/README.md         |  26 +++
 datafusion/functions-window/src/lib.rs        |  58 ++++++
 datafusion/functions-window/src/row_number.rs | 183 ++++++++++++++++++
 .../physical-expr/src/expressions/mod.rs      |   1 -
 datafusion/physical-expr/src/window/mod.rs    |   1 -
 .../physical-expr/src/window/row_number.rs    | 166 ----------------
 .../physical-expr/src/window/window_expr.rs   |   6 -
 datafusion/physical-plan/src/windows/mod.rs   |  21 +-
 datafusion/proto/proto/datafusion.proto       |   3 +-
 datafusion/proto/src/generated/pbjson.rs      |   6 +-
 datafusion/proto/src/generated/prost.rs       |   8 +-
 .../proto/src/logical_plan/from_proto.rs      |   2 +-
 datafusion/proto/src/logical_plan/to_proto.rs |   1 -
 .../proto/src/physical_plan/to_proto.rs       |   7 +-
 .../tests/cases/roundtrip_logical_plan.rs     |   2 +
 datafusion/sql/Cargo.toml                     |   1 +
 datafusion/sql/src/unparser/expr.rs           |   7 +-
 .../sqllogictest/test_files/functions.slt     |   2 +-
 .../sqllogictest/test_files/group_by.slt      |   8 +-
 datafusion/sqllogictest/test_files/joins.slt  |  56 +++---
 datafusion/sqllogictest/test_files/window.slt | 124 ++++++------
 34 files changed, 519 insertions(+), 326 deletions(-)
 create mode 100644 datafusion/functions-window/Cargo.toml
 create mode 100644 datafusion/functions-window/README.md
 create mode 100644 datafusion/functions-window/src/lib.rs
 create mode 100644 datafusion/functions-window/src/row_number.rs
 delete mode 100644 datafusion/physical-expr/src/window/row_number.rs

diff --git a/Cargo.toml b/Cargo.toml
index 02b1f1ccd92a9..dfb70dd1eb7bf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,6 +29,7 @@ members = [
     "datafusion/functions-aggregate",
     "datafusion/functions-aggregate-common",
     "datafusion/functions-nested",
+    "datafusion/functions-window",
     "datafusion/optimizer",
     "datafusion/physical-expr",
     "datafusion/physical-expr-common",
@@ -102,6 +103,7 @@ datafusion-functions = { path = "datafusion/functions", version = "41.0.0" }
 datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "41.0.0" }
 datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "41.0.0" }
 datafusion-functions-nested = { path = "datafusion/functions-nested", version = "41.0.0" }
+datafusion-functions-window = { path = "datafusion/functions-window", version = "41.0.0" }
 datafusion-optimizer = { path = "datafusion/optimizer", version = "41.0.0", default-features = false }
 datafusion-physical-expr = { path = "datafusion/physical-expr", version = "41.0.0", default-features = false }
 datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "41.0.0", default-features = false }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 22f889de804bb..cda57ba9b443a 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1152,6 +1152,7 @@ dependencies = [
  "datafusion-functions",
  "datafusion-functions-aggregate",
  "datafusion-functions-nested",
+ "datafusion-functions-window",
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
@@ -1376,6 +1377,16 @@ dependencies = [
  "rand",
 ]
 
+[[package]]
+name = "datafusion-functions-window"
+version = "41.0.0"
+dependencies = [
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-physical-expr-common",
+ "log",
+]
+
 [[package]]
 name = "datafusion-optimizer"
 version = "41.0.0"
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index e678c93ede8be..adbba3eb31d6c 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -106,6 +106,7 @@ datafusion-expr = { workspace = true }
 datafusion-functions = { workspace = true }
 datafusion-functions-aggregate = { workspace = true }
 datafusion-functions-nested = { workspace = true, optional = true }
+datafusion-functions-window = { workspace = true }
 datafusion-optimizer = { workspace = true }
 datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index e9c876291845a..88a90e1e1d09f 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -1044,6 +1044,7 @@ impl SessionStateBuilder {
         self.scalar_functions = Some(SessionStateDefaults::default_scalar_functions());
         self.aggregate_functions =
             Some(SessionStateDefaults::default_aggregate_functions());
+        self.window_functions = Some(SessionStateDefaults::default_window_functions());
         self
     }
 
diff --git a/datafusion/core/src/execution/session_state_defaults.rs b/datafusion/core/src/execution/session_state_defaults.rs
index 07420afe842f7..bc7e194caeaee 100644
--- a/datafusion/core/src/execution/session_state_defaults.rs
+++ b/datafusion/core/src/execution/session_state_defaults.rs
@@ -29,12 +29,12 @@ use crate::datasource::provider::DefaultTableFactory;
 use crate::execution::context::SessionState;
 #[cfg(feature = "nested_expressions")]
 use crate::functions_nested;
-use crate::{functions, functions_aggregate};
+use crate::{functions, functions_aggregate, functions_window};
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_expr::planner::ExprPlanner;
-use datafusion_expr::{AggregateUDF, ScalarUDF};
+use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
 use std::collections::HashMap;
 use std::sync::Arc;
 use url::Url;
@@ -112,6 +112,11 @@ impl SessionStateDefaults {
         functions_aggregate::all_default_aggregate_functions()
     }
 
+    /// returns the list of default [`WindowUDF']'s
+    pub fn default_window_functions() -> Vec<Arc<WindowUDF>> {
+        functions_window::all_default_window_functions()
+    }
+
     /// returns the list of default [`FileFormatFactory']'s
     pub fn default_file_formats() -> Vec<Arc<dyn FileFormatFactory>> {
         let file_formats: Vec<Arc<dyn FileFormatFactory>> = vec![
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 6b3773e4f6d56..daeb21db9d05c 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -603,6 +603,11 @@ pub mod functions_aggregate {
     pub use datafusion_functions_aggregate::*;
 }
 
+/// re-export of [`datafusion_functions_window`] crate
+pub mod functions_window {
+    pub use datafusion_functions_window::*;
+}
+
 #[cfg(test)]
 pub mod test;
 pub mod test_util;
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index d75d8e43370d1..a6c2cf700cc4e 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -44,6 +44,7 @@ use datafusion_physical_expr::expressions::{cast, col, lit};
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
 use test_utils::add_empty_batches;
 
+use datafusion::functions_window::row_number::row_number_udwf;
 use hashbrown::HashMap;
 use rand::distributions::Alphanumeric;
 use rand::rngs::StdRng;
@@ -180,12 +181,10 @@ async fn bounded_window_causal_non_causal() -> Result<()> {
         //     ROWS BETWEEN UNBOUNDED PRECEDING AND <end_bound> PRECEDING/FOLLOWING
         // )
         (
-            // Window function
-            WindowFunctionDefinition::BuiltInWindowFunction(
-                BuiltInWindowFunction::RowNumber,
-            ),
+            // user-defined window function
+            WindowFunctionDefinition::WindowUDF(row_number_udwf()),
             // its name
-            "ROW_NUMBER",
+            "row_number",
             // no argument
             vec![],
             // Expected causality, for None cases causality will be determined from window frame boundaries
@@ -377,9 +376,7 @@ fn get_random_function(
         window_fn_map.insert(
             "row_number",
             (
-                WindowFunctionDefinition::BuiltInWindowFunction(
-                    BuiltInWindowFunction::RowNumber,
-                ),
+                WindowFunctionDefinition::WindowUDF(row_number_udwf()),
                 vec![],
             ),
         );
diff --git a/datafusion/expr/src/built_in_window_function.rs b/datafusion/expr/src/built_in_window_function.rs
index 3885d70049f35..597e4e68a0c69 100644
--- a/datafusion/expr/src/built_in_window_function.rs
+++ b/datafusion/expr/src/built_in_window_function.rs
@@ -40,8 +40,6 @@ impl fmt::Display for BuiltInWindowFunction {
 /// [window function]: https://en.wikipedia.org/wiki/Window_function_(SQL)
 #[derive(Debug, Clone, PartialEq, Eq, Hash, EnumIter)]
 pub enum BuiltInWindowFunction {
-    /// number of the current row within its partition, counting from 1
-    RowNumber,
     /// rank of the current row with gaps; same as row_number of its first peer
     Rank,
     /// rank of the current row without gaps; this function counts peer groups
@@ -74,7 +72,6 @@ impl BuiltInWindowFunction {
     pub fn name(&self) -> &str {
         use BuiltInWindowFunction::*;
         match self {
-            RowNumber => "ROW_NUMBER",
             Rank => "RANK",
             DenseRank => "DENSE_RANK",
             PercentRank => "PERCENT_RANK",
@@ -93,7 +90,6 @@ impl FromStr for BuiltInWindowFunction {
     type Err = DataFusionError;
     fn from_str(name: &str) -> Result<BuiltInWindowFunction> {
         Ok(match name.to_uppercase().as_str() {
-            "ROW_NUMBER" => BuiltInWindowFunction::RowNumber,
             "RANK" => BuiltInWindowFunction::Rank,
             "DENSE_RANK" => BuiltInWindowFunction::DenseRank,
             "PERCENT_RANK" => BuiltInWindowFunction::PercentRank,
@@ -131,8 +127,7 @@ impl BuiltInWindowFunction {
             })?;
 
         match self {
-            BuiltInWindowFunction::RowNumber
-            | BuiltInWindowFunction::Rank
+            BuiltInWindowFunction::Rank
             | BuiltInWindowFunction::DenseRank
             | BuiltInWindowFunction::Ntile => Ok(DataType::UInt64),
             BuiltInWindowFunction::PercentRank | BuiltInWindowFunction::CumeDist => {
@@ -150,8 +145,7 @@ impl BuiltInWindowFunction {
     pub fn signature(&self) -> Signature {
         // note: the physical expression must accept the type returned by this function or the execution panics.
         match self {
-            BuiltInWindowFunction::RowNumber
-            | BuiltInWindowFunction::Rank
+            BuiltInWindowFunction::Rank
             | BuiltInWindowFunction::DenseRank
             | BuiltInWindowFunction::PercentRank
             | BuiltInWindowFunction::CumeDist => Signature::any(0, Volatility::Immutable),
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index b4d489cc7c1e5..88939ccf41b8c 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -2896,7 +2896,6 @@ mod test {
     #[test]
     fn test_window_function_case_insensitive() -> Result<()> {
         let names = vec![
-            "row_number",
             "rank",
             "dense_rank",
             "percent_rank",
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 2bab6d516a73e..f9b30351677d0 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -36,9 +36,9 @@ use crate::utils::{
     split_conjunction,
 };
 use crate::{
-    build_join_schema, expr_vec_fmt, BinaryExpr, BuiltInWindowFunction,
-    CreateMemoryTable, CreateView, Expr, ExprSchemable, LogicalPlanBuilder, Operator,
-    TableProviderFilterPushDown, TableSource, WindowFunctionDefinition,
+    build_join_schema, expr_vec_fmt, BinaryExpr, CreateMemoryTable, CreateView, Expr,
+    ExprSchemable, LogicalPlanBuilder, Operator, TableProviderFilterPushDown,
+    TableSource, WindowFunctionDefinition,
 };
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
@@ -2214,18 +2214,14 @@ impl Window {
             .enumerate()
             .filter_map(|(idx, expr)| {
                 if let Expr::WindowFunction(WindowFunction {
-                    // Function is ROW_NUMBER
-                    fun:
-                        WindowFunctionDefinition::BuiltInWindowFunction(
-                            BuiltInWindowFunction::RowNumber,
-                        ),
+                    fun: WindowFunctionDefinition::WindowUDF(udwf),
                     partition_by,
                     ..
                 }) = expr
                 {
                     // When there is no PARTITION BY, row number will be unique
                     // across the entire table.
-                    if partition_by.is_empty() {
+                    if udwf.name() == "row_number" && partition_by.is_empty() {
                         return Some(idx + input_len);
                     }
                 }
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 9e6d963ccf7f4..aa754a57086f6 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -17,6 +17,7 @@
 
 //! [`WindowUDF`]: User Defined Window Functions
 
+use arrow::compute::SortOptions;
 use std::hash::{DefaultHasher, Hash, Hasher};
 use std::{
     any::Any,
@@ -176,6 +177,21 @@ impl WindowUDF {
     pub fn partition_evaluator_factory(&self) -> Result<Box<dyn PartitionEvaluator>> {
         self.inner.partition_evaluator()
     }
+
+    /// Returns if column values are nullable for this window function.
+    ///
+    /// See [`WindowUDFImpl::nullable`] for more details.
+    pub fn nullable(&self) -> bool {
+        self.inner.nullable()
+    }
+
+    /// Returns custom result ordering introduced by this window function
+    /// which is used to update ordering equivalences.
+    ///
+    /// See [`WindowUDFImpl::sort_options`] for more details.
+    pub fn sort_options(&self) -> Option<SortOptions> {
+        self.inner.sort_options()
+    }
 }
 
 impl<F> From<F> for WindowUDF
@@ -319,6 +335,24 @@ pub trait WindowUDFImpl: Debug + Send + Sync {
         self.signature().hash(hasher);
         hasher.finish()
     }
+
+    /// Allows customizing nullable of column for this window UDF.
+    ///
+    /// By default, the final result of evaluating the window UDF is
+    /// allowed to have null values. But if that is not the case then
+    /// it can be customized in the window UDF implementation.
+    fn nullable(&self) -> bool {
+        true
+    }
+
+    /// Allows the window UDF to define a custom result ordering.
+    ///
+    /// By default, a window UDF doesn't introduce an ordering.
+    /// But when specified by a window UDF this is used to update
+    /// ordering equivalences.
+    fn sort_options(&self) -> Option<SortOptions> {
+        None
+    }
 }
 
 /// WindowUDF that adds an alias to the underlying function. It is better to
diff --git a/datafusion/expr/src/window_function.rs b/datafusion/expr/src/window_function.rs
index 5e81464d39c25..a80718147c3a4 100644
--- a/datafusion/expr/src/window_function.rs
+++ b/datafusion/expr/src/window_function.rs
@@ -19,14 +19,6 @@ use datafusion_common::ScalarValue;
 
 use crate::{expr::WindowFunction, BuiltInWindowFunction, Expr, Literal};
 
-/// Create an expression to represent the `row_number` window function
-pub fn row_number() -> Expr {
-    Expr::WindowFunction(WindowFunction::new(
-        BuiltInWindowFunction::RowNumber,
-        vec![],
-    ))
-}
-
 /// Create an expression to represent the `rank` window function
 pub fn rank() -> Expr {
     Expr::WindowFunction(WindowFunction::new(BuiltInWindowFunction::Rank, vec![]))
diff --git a/datafusion/functions-window/Cargo.toml b/datafusion/functions-window/Cargo.toml
new file mode 100644
index 0000000000000..94dd421284fd6
--- /dev/null
+++ b/datafusion/functions-window/Cargo.toml
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-functions-window"
+description = "Window function packages for the DataFusion query engine"
+keywords = ["datafusion", "logical", "plan", "expressions"]
+readme = "README.md"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+rust-version = { workspace = true }
+
+[lints]
+workspace = true
+
+[lib]
+name = "datafusion_functions_window"
+path = "src/lib.rs"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+datafusion-common = { workspace = true }
+datafusion-expr = { workspace = true }
+datafusion-physical-expr-common = { workspace = true }
+log = { workspace = true }
+
+[dev-dependencies]
+arrow = { workspace = true }
diff --git a/datafusion/functions-window/README.md b/datafusion/functions-window/README.md
new file mode 100644
index 0000000000000..18590983ca473
--- /dev/null
+++ b/datafusion/functions-window/README.md
@@ -0,0 +1,26 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# DataFusion Window Function Library
+
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+
+This crate contains user-defined window functions.
+
+[df]: https://crates.io/crates/datafusion
diff --git a/datafusion/functions-window/src/lib.rs b/datafusion/functions-window/src/lib.rs
new file mode 100644
index 0000000000000..790a500f1f3f4
--- /dev/null
+++ b/datafusion/functions-window/src/lib.rs
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Window Function packages for [DataFusion].
+//!
+//! This crate contains a collection of various window function packages for DataFusion,
+//! implemented using the extension API.
+//!
+//! [DataFusion]: https://crates.io/crates/datafusion
+//!
+use std::sync::Arc;
+
+use log::debug;
+
+use datafusion_expr::registry::FunctionRegistry;
+use datafusion_expr::WindowUDF;
+
+pub mod row_number;
+
+/// Fluent-style API for creating `Expr`s
+pub mod expr_fn {
+    pub use super::row_number::row_number;
+}
+
+/// Returns all default window functions
+pub fn all_default_window_functions() -> Vec<Arc<WindowUDF>> {
+    vec![row_number::row_number_udwf()]
+}
+/// Registers all enabled packages with a [`FunctionRegistry`]
+pub fn register_all(
+    registry: &mut dyn FunctionRegistry,
+) -> datafusion_common::Result<()> {
+    let functions: Vec<Arc<WindowUDF>> = all_default_window_functions();
+
+    functions.into_iter().try_for_each(|fun| {
+        let existing_udwf = registry.register_udwf(fun)?;
+        if let Some(existing_udwf) = existing_udwf {
+            debug!("Overwrite existing UDWF: {}", existing_udwf.name());
+        }
+        Ok(()) as datafusion_common::Result<()>
+    })?;
+
+    Ok(())
+}
diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs
new file mode 100644
index 0000000000000..08d711182434e
--- /dev/null
+++ b/datafusion/functions-window/src/row_number.rs
@@ -0,0 +1,183 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines physical expression for `row_number` that can evaluated at runtime during query execution
+
+use std::any::Any;
+use std::fmt::Debug;
+use std::ops::Range;
+
+use datafusion_common::arrow::array::ArrayRef;
+use datafusion_common::arrow::array::UInt64Array;
+use datafusion_common::arrow::compute::SortOptions;
+use datafusion_common::arrow::datatypes::DataType;
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::expr::WindowFunction;
+use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDFImpl};
+
+/// Create a [`WindowFunction`](Expr::WindowFunction) expression for
+/// `row_number` user-defined window function.
+pub fn row_number(args: Vec<Expr>) -> Expr {
+    Expr::WindowFunction(WindowFunction::new(row_number_udwf(), args))
+}
+
+/// Singleton instance of `row_number`, ensures the UDWF is only created once.
+#[allow(non_upper_case_globals)]
+static STATIC_RowNumber: std::sync::OnceLock<std::sync::Arc<datafusion_expr::WindowUDF>> =
+    std::sync::OnceLock::new();
+
+/// Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for `row_number`
+/// user-defined window function.
+pub fn row_number_udwf() -> std::sync::Arc<datafusion_expr::WindowUDF> {
+    STATIC_RowNumber
+        .get_or_init(|| {
+            std::sync::Arc::new(datafusion_expr::WindowUDF::from(RowNumber::default()))
+        })
+        .clone()
+}
+
+/// row_number expression
+#[derive(Debug)]
+struct RowNumber {
+    signature: Signature,
+}
+
+impl RowNumber {
+    /// Create a new `row_number` function
+    fn new() -> Self {
+        Self {
+            signature: Signature::any(0, Volatility::Immutable),
+        }
+    }
+}
+
+impl Default for RowNumber {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl WindowUDFImpl for RowNumber {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "row_number"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::UInt64)
+    }
+
+    fn partition_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> {
+        Ok(Box::<NumRowsEvaluator>::default())
+    }
+
+    fn nullable(&self) -> bool {
+        false
+    }
+
+    fn sort_options(&self) -> Option<SortOptions> {
+        Some(SortOptions {
+            descending: false,
+            nulls_first: false,
+        })
+    }
+}
+
+/// State for the `row_number` built-in window function.
+#[derive(Debug, Default)]
+struct NumRowsEvaluator {
+    n_rows: usize,
+}
+
+impl PartitionEvaluator for NumRowsEvaluator {
+    fn is_causal(&self) -> bool {
+        // The row_number function doesn't need "future" values to emit results:
+        true
+    }
+
+    fn evaluate_all(
+        &mut self,
+        _values: &[ArrayRef],
+        num_rows: usize,
+    ) -> Result<ArrayRef> {
+        Ok(std::sync::Arc::new(UInt64Array::from_iter_values(
+            1..(num_rows as u64) + 1,
+        )))
+    }
+
+    fn evaluate(
+        &mut self,
+        _values: &[ArrayRef],
+        _range: &Range<usize>,
+    ) -> Result<ScalarValue> {
+        self.n_rows += 1;
+        Ok(ScalarValue::UInt64(Some(self.n_rows as u64)))
+    }
+
+    fn supports_bounded_execution(&self) -> bool {
+        true
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use datafusion_common::arrow::array::{Array, BooleanArray};
+    use datafusion_common::cast::as_uint64_array;
+
+    use super::*;
+
+    #[test]
+    fn row_number_all_null() -> Result<()> {
+        let values: ArrayRef = Arc::new(BooleanArray::from(vec![
+            None, None, None, None, None, None, None, None,
+        ]));
+        let num_rows = values.len();
+
+        let actual = RowNumber::default()
+            .partition_evaluator()?
+            .evaluate_all(&[values], num_rows)?;
+        let actual = as_uint64_array(&actual)?;
+
+        assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8], *actual.values());
+        Ok(())
+    }
+
+    #[test]
+    fn row_number_all_values() -> Result<()> {
+        let values: ArrayRef = Arc::new(BooleanArray::from(vec![
+            true, false, true, false, false, true, false, true,
+        ]));
+        let num_rows = values.len();
+
+        let actual = RowNumber::default()
+            .partition_evaluator()?
+            .evaluate_all(&[values], num_rows)?;
+        let actual = as_uint64_array(&actual)?;
+
+        assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8], *actual.values());
+        Ok(())
+    }
+}
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index 9e65889d87583..87d8f04a6858a 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -40,7 +40,6 @@ pub use crate::window::lead_lag::{lag, lead, WindowShift};
 pub use crate::window::nth_value::NthValue;
 pub use crate::window::ntile::Ntile;
 pub use crate::window::rank::{dense_rank, percent_rank, rank, Rank, RankType};
-pub use crate::window::row_number::RowNumber;
 pub use crate::PhysicalSortExpr;
 
 pub use binary::{binary, BinaryExpr};
diff --git a/datafusion/physical-expr/src/window/mod.rs b/datafusion/physical-expr/src/window/mod.rs
index 644edae36c9ca..2aeb053331027 100644
--- a/datafusion/physical-expr/src/window/mod.rs
+++ b/datafusion/physical-expr/src/window/mod.rs
@@ -23,7 +23,6 @@ pub(crate) mod lead_lag;
 pub(crate) mod nth_value;
 pub(crate) mod ntile;
 pub(crate) mod rank;
-pub(crate) mod row_number;
 mod sliding_aggregate;
 mod window_expr;
 
diff --git a/datafusion/physical-expr/src/window/row_number.rs b/datafusion/physical-expr/src/window/row_number.rs
deleted file mode 100644
index 0a1255018d309..0000000000000
--- a/datafusion/physical-expr/src/window/row_number.rs
+++ /dev/null
@@ -1,166 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expression for `row_number` that can evaluated at runtime during query execution
-
-use crate::expressions::Column;
-use crate::window::window_expr::NumRowsState;
-use crate::window::BuiltInWindowFunctionExpr;
-use crate::{PhysicalExpr, PhysicalSortExpr};
-
-use arrow::array::{ArrayRef, UInt64Array};
-use arrow::datatypes::{DataType, Field};
-use arrow_schema::{SchemaRef, SortOptions};
-use datafusion_common::{Result, ScalarValue};
-use datafusion_expr::PartitionEvaluator;
-
-use std::any::Any;
-use std::ops::Range;
-use std::sync::Arc;
-
-/// row_number expression
-#[derive(Debug)]
-pub struct RowNumber {
-    name: String,
-    /// Output data type
-    data_type: DataType,
-}
-
-impl RowNumber {
-    /// Create a new ROW_NUMBER function
-    pub fn new(name: impl Into<String>, data_type: &DataType) -> Self {
-        Self {
-            name: name.into(),
-            data_type: data_type.clone(),
-        }
-    }
-}
-
-impl BuiltInWindowFunctionExpr for RowNumber {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        let nullable = false;
-        Ok(Field::new(self.name(), self.data_type.clone(), nullable))
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![]
-    }
-
-    fn name(&self) -> &str {
-        &self.name
-    }
-
-    fn get_result_ordering(&self, schema: &SchemaRef) -> Option<PhysicalSortExpr> {
-        // The built-in ROW_NUMBER window function introduces a new ordering:
-        schema.column_with_name(self.name()).map(|(idx, field)| {
-            let expr = Arc::new(Column::new(field.name(), idx));
-            let options = SortOptions {
-                descending: false,
-                nulls_first: false,
-            }; // ASC, NULLS LAST
-            PhysicalSortExpr { expr, options }
-        })
-    }
-
-    fn create_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> {
-        Ok(Box::<NumRowsEvaluator>::default())
-    }
-}
-
-#[derive(Default, Debug)]
-pub(crate) struct NumRowsEvaluator {
-    state: NumRowsState,
-}
-
-impl PartitionEvaluator for NumRowsEvaluator {
-    fn is_causal(&self) -> bool {
-        // The ROW_NUMBER function doesn't need "future" values to emit results:
-        true
-    }
-
-    /// evaluate window function result inside given range
-    fn evaluate(
-        &mut self,
-        _values: &[ArrayRef],
-        _range: &Range<usize>,
-    ) -> Result<ScalarValue> {
-        self.state.n_rows += 1;
-        Ok(ScalarValue::UInt64(Some(self.state.n_rows as u64)))
-    }
-
-    fn evaluate_all(
-        &mut self,
-        _values: &[ArrayRef],
-        num_rows: usize,
-    ) -> Result<ArrayRef> {
-        Ok(Arc::new(UInt64Array::from_iter_values(
-            1..(num_rows as u64) + 1,
-        )))
-    }
-
-    fn supports_bounded_execution(&self) -> bool {
-        true
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::{array::*, datatypes::*};
-    use datafusion_common::cast::as_uint64_array;
-
-    #[test]
-    fn row_number_all_null() -> Result<()> {
-        let arr: ArrayRef = Arc::new(BooleanArray::from(vec![
-            None, None, None, None, None, None, None, None,
-        ]));
-        let schema = Schema::new(vec![Field::new("arr", DataType::Boolean, true)]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![arr])?;
-        let row_number = RowNumber::new("row_number".to_owned(), &DataType::UInt64);
-        let values = row_number.evaluate_args(&batch)?;
-        let result = row_number
-            .create_evaluator()?
-            .evaluate_all(&values, batch.num_rows())?;
-        let result = as_uint64_array(&result)?;
-        let result = result.values();
-        assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8], *result);
-        Ok(())
-    }
-
-    #[test]
-    fn row_number_all_values() -> Result<()> {
-        let arr: ArrayRef = Arc::new(BooleanArray::from(vec![
-            true, false, true, false, false, true, false, true,
-        ]));
-        let schema = Schema::new(vec![Field::new("arr", DataType::Boolean, false)]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![arr])?;
-        let row_number = RowNumber::new("row_number".to_owned(), &DataType::UInt64);
-        let values = row_number.evaluate_args(&batch)?;
-        let result = row_number
-            .create_evaluator()?
-            .evaluate_all(&values, batch.num_rows())?;
-        let result = as_uint64_array(&result)?;
-        let result = result.values();
-        assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8], *result);
-        Ok(())
-    }
-}
diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs
index 7020f7f5cf830..8f6f78df8cb85 100644
--- a/datafusion/physical-expr/src/window/window_expr.rs
+++ b/datafusion/physical-expr/src/window/window_expr.rs
@@ -543,12 +543,6 @@ pub struct RankState {
     pub n_rank: usize,
 }
 
-/// State for the 'ROW_NUMBER' built-in window function.
-#[derive(Debug, Clone, Default)]
-pub struct NumRowsState {
-    pub n_rows: usize,
-}
-
 /// Tag to differentiate special use cases of the NTH_VALUE built-in window function.
 #[derive(Debug, Copy, Clone)]
 pub enum NthValueKind {
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 03090faf3efdc..154beb79f729e 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 use crate::{
     expressions::{
         cume_dist, dense_rank, lag, lead, percent_rank, rank, Literal, NthValue, Ntile,
-        PhysicalSortExpr, RowNumber,
+        PhysicalSortExpr,
     },
     ExecutionPlan, ExecutionPlanProperties, InputOrderMode, PhysicalExpr,
 };
@@ -49,6 +49,7 @@ mod bounded_window_agg_exec;
 mod window_agg_exec;
 
 pub use bounded_window_agg_exec::BoundedWindowAggExec;
+use datafusion_physical_expr::expressions::Column;
 pub use datafusion_physical_expr::window::{
     BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr,
 };
@@ -218,7 +219,6 @@ fn create_built_in_window_expr(
     let out_data_type: &DataType = input_schema.field_with_name(&name)?.data_type();
 
     Ok(match fun {
-        BuiltInWindowFunction::RowNumber => Arc::new(RowNumber::new(name, out_data_type)),
         BuiltInWindowFunction::Rank => Arc::new(rank(name, out_data_type)),
         BuiltInWindowFunction::DenseRank => Arc::new(dense_rank(name, out_data_type)),
         BuiltInWindowFunction::PercentRank => Arc::new(percent_rank(name, out_data_type)),
@@ -356,8 +356,11 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr {
     }
 
     fn field(&self) -> Result<Field> {
-        let nullable = true;
-        Ok(Field::new(&self.name, self.data_type.clone(), nullable))
+        Ok(Field::new(
+            &self.name,
+            self.data_type.clone(),
+            self.fun.nullable(),
+        ))
     }
 
     fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
@@ -375,6 +378,16 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr {
     fn reverse_expr(&self) -> Option<Arc<dyn BuiltInWindowFunctionExpr>> {
         None
     }
+
+    fn get_result_ordering(&self, schema: &SchemaRef) -> Option<PhysicalSortExpr> {
+        self.fun
+            .sort_options()
+            .zip(schema.column_with_name(self.name()))
+            .map(|(options, (idx, field))| {
+                let expr = Arc::new(Column::new(field.name(), idx));
+                PhysicalSortExpr { expr, options }
+            })
+    }
 }
 
 pub(crate) fn calc_requirements<
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 819130b08e861..acf540d444656 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -481,7 +481,8 @@ message ScalarUDFExprNode {
 }
 
 enum BuiltInWindowFunction {
-  ROW_NUMBER = 0;
+  UNSPECIFIED = 0; // https://protobuf.dev/programming-guides/dos-donts/#unspecified-enum
+  // ROW_NUMBER = 0;
   RANK = 1;
   DENSE_RANK = 2;
   PERCENT_RANK = 3;
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 521a0d90c1ed6..489b6c67534f2 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -1659,7 +1659,7 @@ impl serde::Serialize for BuiltInWindowFunction {
         S: serde::Serializer,
     {
         let variant = match self {
-            Self::RowNumber => "ROW_NUMBER",
+            Self::Unspecified => "UNSPECIFIED",
             Self::Rank => "RANK",
             Self::DenseRank => "DENSE_RANK",
             Self::PercentRank => "PERCENT_RANK",
@@ -1681,7 +1681,7 @@ impl<'de> serde::Deserialize<'de> for BuiltInWindowFunction {
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
-            "ROW_NUMBER",
+            "UNSPECIFIED",
             "RANK",
             "DENSE_RANK",
             "PERCENT_RANK",
@@ -1732,7 +1732,7 @@ impl<'de> serde::Deserialize<'de> for BuiltInWindowFunction {
                 E: serde::de::Error,
             {
                 match value {
-                    "ROW_NUMBER" => Ok(BuiltInWindowFunction::RowNumber),
+                    "UNSPECIFIED" => Ok(BuiltInWindowFunction::Unspecified),
                     "RANK" => Ok(BuiltInWindowFunction::Rank),
                     "DENSE_RANK" => Ok(BuiltInWindowFunction::DenseRank),
                     "PERCENT_RANK" => Ok(BuiltInWindowFunction::PercentRank),
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 070c9b31d3d48..c98c950d35f90 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -1919,7 +1919,9 @@ pub struct PartitionStats {
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
 #[repr(i32)]
 pub enum BuiltInWindowFunction {
-    RowNumber = 0,
+    /// <https://protobuf.dev/programming-guides/dos-donts/#unspecified-enum>
+    Unspecified = 0,
+    /// ROW_NUMBER = 0;
     Rank = 1,
     DenseRank = 2,
     PercentRank = 3,
@@ -1938,7 +1940,7 @@ impl BuiltInWindowFunction {
     /// (if the ProtoBuf definition does not change) and safe for programmatic use.
     pub fn as_str_name(&self) -> &'static str {
         match self {
-            BuiltInWindowFunction::RowNumber => "ROW_NUMBER",
+            BuiltInWindowFunction::Unspecified => "UNSPECIFIED",
             BuiltInWindowFunction::Rank => "RANK",
             BuiltInWindowFunction::DenseRank => "DENSE_RANK",
             BuiltInWindowFunction::PercentRank => "PERCENT_RANK",
@@ -1954,7 +1956,7 @@ impl BuiltInWindowFunction {
     /// Creates an enum from field names used in the ProtoBuf definition.
     pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
         match value {
-            "ROW_NUMBER" => Some(Self::RowNumber),
+            "UNSPECIFIED" => Some(Self::Unspecified),
             "RANK" => Some(Self::Rank),
             "DENSE_RANK" => Some(Self::DenseRank),
             "PERCENT_RANK" => Some(Self::PercentRank),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 6cbea5f0cfcce..b74237b5281b8 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -141,7 +141,7 @@ impl From<&protobuf::StringifiedPlan> for StringifiedPlan {
 impl From<protobuf::BuiltInWindowFunction> for BuiltInWindowFunction {
     fn from(built_in_function: protobuf::BuiltInWindowFunction) -> Self {
         match built_in_function {
-            protobuf::BuiltInWindowFunction::RowNumber => Self::RowNumber,
+            protobuf::BuiltInWindowFunction::Unspecified => todo!(),
             protobuf::BuiltInWindowFunction::Rank => Self::Rank,
             protobuf::BuiltInWindowFunction::PercentRank => Self::PercentRank,
             protobuf::BuiltInWindowFunction::DenseRank => Self::DenseRank,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index c7361c89c328c..bb7bf84a33874 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -120,7 +120,6 @@ impl From<&BuiltInWindowFunction> for protobuf::BuiltInWindowFunction {
             BuiltInWindowFunction::Ntile => Self::Ntile,
             BuiltInWindowFunction::CumeDist => Self::CumeDist,
             BuiltInWindowFunction::PercentRank => Self::PercentRank,
-            BuiltInWindowFunction::RowNumber => Self::RowNumber,
             BuiltInWindowFunction::Rank => Self::Rank,
             BuiltInWindowFunction::Lag => Self::Lag,
             BuiltInWindowFunction::Lead => Self::Lead,
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 57cd22a99ae1b..7949a457f40f3 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -25,7 +25,7 @@ use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr};
 use datafusion::physical_plan::expressions::{
     BinaryExpr, CaseExpr, CastExpr, Column, CumeDist, InListExpr, IsNotNullExpr,
     IsNullExpr, Literal, NegativeExpr, NotExpr, NthValue, Ntile, Rank, RankType,
-    RowNumber, TryCastExpr, WindowShift,
+    TryCastExpr, WindowShift,
 };
 use datafusion::physical_plan::udaf::AggregateFunctionExpr;
 use datafusion::physical_plan::windows::{BuiltInWindowExpr, PlainAggregateWindowExpr};
@@ -117,9 +117,8 @@ pub fn serialize_physical_window_expr(
         let expr = built_in_window_expr.get_built_in_func_expr();
         let built_in_fn_expr = expr.as_any();
 
-        let builtin_fn = if built_in_fn_expr.downcast_ref::<RowNumber>().is_some() {
-            protobuf::BuiltInWindowFunction::RowNumber
-        } else if let Some(rank_expr) = built_in_fn_expr.downcast_ref::<Rank>() {
+        let builtin_fn = if let Some(rank_expr) = built_in_fn_expr.downcast_ref::<Rank>()
+        {
             match rank_expr.get_type() {
                 RankType::Basic => protobuf::BuiltInWindowFunction::Rank,
                 RankType::Dense => protobuf::BuiltInWindowFunction::DenseRank,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index eb7cc5c4b9c5f..4b20f14ddeb4c 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -47,6 +47,7 @@ use datafusion::functions_aggregate::expr_fn::{
 };
 use datafusion::functions_aggregate::min_max::max_udaf;
 use datafusion::functions_nested::map::map;
+use datafusion::functions_window::row_number::row_number;
 use datafusion::prelude::*;
 use datafusion::test_util::{TestTableFactory, TestTableProvider};
 use datafusion_common::config::TableOptions;
@@ -903,6 +904,7 @@ async fn roundtrip_expr_api() -> Result<()> {
             vec![lit(1), lit(2), lit(3)],
             vec![lit(10), lit(20), lit(30)],
         ),
+        row_number(vec![col("a")]),
     ];
 
     // ensure expressions created with the expr api can be round tripped
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index c4ae3a8134a6b..5c4b83fe38e11 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -55,6 +55,7 @@ strum = { version = "0.26.1", features = ["derive"] }
 ctor = { workspace = true }
 datafusion-functions = { workspace = true, default-features = true }
 datafusion-functions-aggregate = { workspace = true }
+datafusion-functions-window = { workspace = true }
 env_logger = { workspace = true }
 paste = "^1.0"
 rstest = { workspace = true }
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 39511ea4d03ac..c941d7098dbe0 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -1531,6 +1531,7 @@ mod tests {
     use datafusion_expr::{interval_month_day_nano_lit, ExprFunctionExt};
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_functions_aggregate::expr_fn::sum;
+    use datafusion_functions_window::row_number::row_number_udwf;
 
     use crate::unparser::dialect::{CustomDialect, CustomDialectBuilder};
 
@@ -1793,16 +1794,14 @@ mod tests {
             ),
             (
                 Expr::WindowFunction(WindowFunction {
-                    fun: WindowFunctionDefinition::BuiltInWindowFunction(
-                        datafusion_expr::BuiltInWindowFunction::RowNumber,
-                    ),
+                    fun: WindowFunctionDefinition::WindowUDF(row_number_udwf()),
                     args: vec![col("col")],
                     partition_by: vec![],
                     order_by: vec![],
                     window_frame: WindowFrame::new(None),
                     null_treatment: None,
                 }),
-                r#"ROW_NUMBER(col) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)"#,
+                r#"row_number(col) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)"#,
             ),
             (
                 Expr::WindowFunction(WindowFunction {
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index cb592fdda0c88..074e4ef834816 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -587,7 +587,7 @@ statement error
 SELECT v1, v2, SUMM(v2) OVER(ORDER BY v1) from test;
 
 # Window function
-statement error Did you mean 'ROW_NUMBER'?
+statement error Did you mean 'row_number'?
 SELECT v1, v2, ROWNUMBER() OVER(ORDER BY v1) from test;
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 3d78bd06c30b8..efcc0e5b0f58f 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -3861,8 +3861,8 @@ logical_plan
 06)----------Inner Join: l.d = r.d Filter: CAST(l.a AS Int64) >= CAST(r.a AS Int64) - Int64(10)
 07)------------SubqueryAlias: l
 08)--------------TableScan: multiple_ordered_table projection=[a, d]
-09)------------Projection: r.a, r.d, ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_n
-10)--------------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+09)------------Projection: r.a, r.d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_n
+10)--------------WindowAggr: windowExpr=[[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 11)----------------SubqueryAlias: r
 12)------------------TableScan: multiple_ordered_table projection=[a, d]
 physical_plan
@@ -3871,8 +3871,8 @@ physical_plan
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
-06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
+07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 08)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 # reset partition number to 8.
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 441ccb7d99d5b..fa0d4cbc49f87 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3235,8 +3235,8 @@ logical_plan
 01)Sort: l_table.rn1 ASC NULLS LAST
 02)--Inner Join: l_table.a = r_table.a
 03)----SubqueryAlias: l_table
-04)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+04)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+05)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 06)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 07)----SubqueryAlias: r_table
 08)------TableScan: annotated_data projection=[a0, a, b, c, d]
@@ -3246,8 +3246,8 @@ physical_plan
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-07)------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+06)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 09)----CoalesceBatchesExec: target_batch_size=2
 10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST
@@ -3271,8 +3271,8 @@ logical_plan
 03)----SubqueryAlias: l_table
 04)------TableScan: annotated_data projection=[a0, a, b, c, d]
 05)----SubqueryAlias: r_table
-06)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-07)--------WindowAggr: windowExpr=[[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+06)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+07)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 08)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)SortPreservingMergeExec: [rn1@10 ASC NULLS LAST]
@@ -3284,8 +3284,8 @@ physical_plan
 07)----CoalesceBatchesExec: target_batch_size=2
 08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST
 09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-11)------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+10)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 12)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
 statement ok
@@ -3308,12 +3308,12 @@ logical_plan
 01)Sort: l_table.a ASC NULLS FIRST, l_table.b ASC NULLS LAST, l_table.c ASC NULLS LAST, r_table.rn1 ASC NULLS LAST
 02)--Inner Join: l_table.a = r_table.a
 03)----SubqueryAlias: l_table
-04)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+04)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+05)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 06)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 07)----SubqueryAlias: r_table
-08)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-09)--------WindowAggr: windowExpr=[[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+08)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+09)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 10)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)SortPreservingMergeExec: [a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@11 ASC NULLS LAST]
@@ -3323,15 +3323,15 @@ physical_plan
 05)--------CoalesceBatchesExec: target_batch_size=2
 06)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
 07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-09)----------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+08)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 11)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
 12)--------CoalesceBatchesExec: target_batch_size=2
 13)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
 14)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-15)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-16)----------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+15)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+16)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 17)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
 statement ok
@@ -3358,15 +3358,15 @@ logical_plan
 03)----SubqueryAlias: l_table
 04)------TableScan: annotated_data projection=[a0, a, b, c, d]
 05)----SubqueryAlias: r_table
-06)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-07)--------WindowAggr: windowExpr=[[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+06)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+07)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 08)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=2
 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@1, a@1)]
 03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
-04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 06)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
 # hash join should propagate ordering equivalence of the right side for RIGHT ANTI join.
@@ -3385,15 +3385,15 @@ logical_plan
 03)----SubqueryAlias: l_table
 04)------TableScan: annotated_data projection=[a]
 05)----SubqueryAlias: r_table
-06)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-07)--------WindowAggr: windowExpr=[[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+06)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+07)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 08)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=2
 02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(a@0, a@1)]
 03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC], has_header=true
-04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 06)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
 query TT
@@ -3457,8 +3457,8 @@ logical_plan
 06)----------Inner Join: l.d = r.d Filter: CAST(l.a AS Int64) >= CAST(r.a AS Int64) - Int64(10)
 07)------------SubqueryAlias: l
 08)--------------TableScan: multiple_ordered_table projection=[a, d]
-09)------------Projection: r.a, r.d, ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_n
-10)--------------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+09)------------Projection: r.a, r.d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_n
+10)--------------WindowAggr: windowExpr=[[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 11)----------------SubqueryAlias: r
 12)------------------TableScan: multiple_ordered_table projection=[a, d]
 physical_plan
@@ -3467,8 +3467,8 @@ physical_plan
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
-06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
+07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 08)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 # run query above in multiple partitions
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index f56ac414a3023..0bf7a8a1eb1ba 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -342,8 +342,8 @@ logical_plan
 03)----Aggregate: groupBy=[[d.b]], aggr=[[max(d.a), max(d.seq)]]
 04)------SubqueryAlias: d
 05)--------SubqueryAlias: _data2
-06)----------Projection: ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS seq, s.a, s.b
-07)------------WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+06)----------Projection: row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS seq, s.a, s.b
+07)------------WindowAggr: windowExpr=[[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 08)--------------SubqueryAlias: s
 09)----------------SubqueryAlias: _sample_data
 10)------------------Union
@@ -359,8 +359,8 @@ physical_plan
 01)SortPreservingMergeExec: [b@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[b@0 as b, max(d.a)@1 as max_a, max(d.seq)@2 as max(d.seq)]
 03)----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[max(d.a), max(d.seq)], ordering_mode=Sorted
-04)------ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b]
-05)--------BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+04)------ProjectionExec: expr=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b]
+05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 06)----------SortExec: expr=[b@1 ASC NULLS LAST,a@0 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([b@1], 4), input_partitions=4
@@ -1419,17 +1419,17 @@ EXPLAIN SELECT
     LIMIT 5
 ----
 logical_plan
-01)Projection: aggregate_test_100.c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS rn1, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS rn2
+01)Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS rn1, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS rn2
 02)--Limit: skip=0, fetch=5
-03)----WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
-04)------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
+03)----WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
+04)------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
 05)--------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as rn1, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as rn2]
+01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as rn1, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as rn2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
 06)----------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -1460,18 +1460,18 @@ EXPLAIN SELECT
     LIMIT 5
 ----
 logical_plan
-01)Projection: aggregate_test_100.c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sum2, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS rn2
+01)Projection: aggregate_test_100.c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sum2, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS rn2
 02)--Limit: skip=0, fetch=5
 03)----WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
-04)------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
+04)------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
 05)--------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
 06)----------TableScan: aggregate_test_100 projection=[c1, c2, c9]
 physical_plan
-01)ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2]
+01)ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2]
 02)--GlobalLimitExec: skip=0, fetch=5
 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@2 ASC NULLS LAST,c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
 07)------------SortExec: expr=[c9@2 DESC,c1@0 DESC], preserve_partitioning=[false]
 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9], has_header=true
@@ -1863,13 +1863,13 @@ EXPLAIN SELECT c1, ROW_NUMBER() OVER (PARTITION BY c1) as rn1 FROM aggregate_tes
 ----
 logical_plan
 01)Sort: aggregate_test_100.c1 ASC NULLS LAST
-02)--Projection: aggregate_test_100.c1, ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-03)----WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+02)--Projection: aggregate_test_100.c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+03)----WindowAggr: windowExpr=[[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 04)------TableScan: aggregate_test_100 projection=[c1]
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
-02)--ProjectionExec: expr=[c1@0 as c1, ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
+03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -1992,13 +1992,13 @@ EXPLAIN SELECT c1, ROW_NUMBER() OVER (PARTITION BY c1) as rn1 FROM aggregate_tes
 ----
 logical_plan
 01)Sort: aggregate_test_100.c1 ASC NULLS LAST
-02)--Projection: aggregate_test_100.c1, ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-03)----WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+02)--Projection: aggregate_test_100.c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+03)----WindowAggr: windowExpr=[[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 04)------TableScan: aggregate_test_100 projection=[c1]
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST,rn1@1 ASC NULLS LAST]
-02)--ProjectionExec: expr=[c1@0 as c1, ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
+03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -2249,13 +2249,13 @@ logical_plan
 01)Limit: skip=0, fetch=5
 02)--Sort: rn1 ASC NULLS LAST, fetch=5
 03)----Sort: aggregate_test_100.c9 ASC NULLS LAST
-04)------Projection: aggregate_test_100.c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2288,13 +2288,13 @@ logical_plan
 01)Limit: skip=0, fetch=5
 02)--Sort: rn1 ASC NULLS LAST, fetch=5
 03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2327,13 +2327,13 @@ logical_plan
 01)Limit: skip=0, fetch=5
 02)--Sort: rn1 DESC NULLS FIRST, fetch=5
 03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[rn1@1 DESC], preserve_partitioning=[false]
-02)--ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2369,13 +2369,13 @@ logical_plan
 01)Limit: skip=0, fetch=5
 02)--Sort: rn1 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST, fetch=5
 03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST,c9@0 ASC NULLS LAST], preserve_partitioning=[false]
-02)--ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2422,13 +2422,13 @@ logical_plan
 01)Limit: skip=0, fetch=5
 02)--Sort: rn1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST, fetch=5
 03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
+01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2446,13 +2446,13 @@ logical_plan
 01)Limit: skip=0, fetch=5
 02)--Sort: rn1 ASC NULLS LAST, CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST, fetch=5
 03)----Sort: CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c5, aggregate_test_100.c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------Projection: aggregate_test_100.c5, aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c5, c9]
 physical_plan
-01)ProjectionExec: expr=[c5@0 as c5, c9@1 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rn1]
+01)ProjectionExec: expr=[c5@0 as c5, c9@1 as c9, row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[CAST(c9@1 AS Int32) + c5@0 DESC], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5, c9], has_header=true
 
@@ -2469,13 +2469,13 @@ logical_plan
 01)Limit: skip=0, fetch=5
 02)--Sort: rn1 ASC NULLS LAST, fetch=5
 03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, CAST(ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS Int64) AS rn1
-05)--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------Projection: aggregate_test_100.c9, CAST(row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS Int64) AS rn1
+05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 06)----------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)ProjectionExec: expr=[c9@0 as c9, CAST(ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 AS Int64) as rn1]
+01)ProjectionExec: expr=[c9@0 as c9, CAST(row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 AS Int64) as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2661,14 +2661,14 @@ EXPLAIN SELECT
 logical_plan
 01)Limit: skip=0, fetch=5
 02)--Sort: annotated_data_finite.ts DESC NULLS FIRST, fetch=5
-03)----Projection: annotated_data_finite.ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv2, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rn1, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS leadr2
-04)------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
+03)----Projection: annotated_data_finite.ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS leadr2
+04)------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
 05)--------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
 06)----------TableScan: annotated_data_finite projection=[ts, inc_col]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[ts@0 DESC], preserve_partitioning=[false]
-02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2]
-03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2]
+03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }], mode=[Sorted]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
@@ -3124,15 +3124,15 @@ logical_plan
 02)--Filter: rn1 < UInt64(50)
 03)----Limit: skip=0, fetch=5
 04)------Sort: rn1 ASC NULLS LAST, fetch=5
-05)--------Projection: annotated_data_infinite2.a0, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d, ROW_NUMBER() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-06)----------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+05)--------Projection: annotated_data_infinite2.a0, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+06)----------WindowAggr: windowExpr=[[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 07)------------TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=4096
 02)--FilterExec: rn1@5 < 50
-03)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1]
+03)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1]
 04)------GlobalLimitExec: skip=0, fetch=5
-05)--------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 06)----------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 
 # this is a negative test for asserting that window functions (other than ROW_NUMBER)
@@ -4147,13 +4147,13 @@ query TT
 EXPLAIN select ROW_NUMBER() over (partition by a) from (select * from a where a = 1);
 ----
 logical_plan
-01)Projection: ROW_NUMBER() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-02)--WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+01)Projection: row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+02)--WindowAggr: windowExpr=[[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 03)----Filter: a.a = Int64(1)
 04)------TableScan: a projection=[a]
 physical_plan
-01)ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as ROW_NUMBER() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-02)--BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+01)ProjectionExec: expr=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
+02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1

From 48416e57f4671286a292c565682feae53af88a69 Mon Sep 17 00:00:00 2001
From: WeblWabl <devandbenz@gmail.com>
Date: Sat, 17 Aug 2024 05:45:01 -0500
Subject: [PATCH 325/357] Fix projection name with DataFrame::with_column and
 window functions (#12000)

* fix/11982: resolves projection issue found in with_column window fn usage

Signed-off-by: Devan <devandbenz@gmail.com>

* fix/11982: resolves projection issue found in with_column window fn usage

Signed-off-by: Devan <devandbenz@gmail.com>

* fmt

Signed-off-by: Devan <devandbenz@gmail.com>

* fmt

Signed-off-by: Devan <devandbenz@gmail.com>

* refactor to get tests working

Signed-off-by: Devan <devandbenz@gmail.com>

* change test to use test harness

Signed-off-by: Devan <devandbenz@gmail.com>

* use row_number method and add comment about test

Signed-off-by: Devan <devandbenz@gmail.com>

* add back import

Signed-off-by: Devan <devandbenz@gmail.com>

---------

Signed-off-by: Devan <devandbenz@gmail.com>
---
 datafusion/core/src/dataframe/mod.rs | 44 +++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 3705873ce3bc9..760ebd7392e56 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1441,14 +1441,18 @@ impl DataFrame {
     /// ```
     pub fn with_column(self, name: &str, expr: Expr) -> Result<DataFrame> {
         let window_func_exprs = find_window_exprs(&[expr.clone()]);
-        let plan = if window_func_exprs.is_empty() {
-            self.plan
+
+        let (plan, mut col_exists, window_func) = if window_func_exprs.is_empty() {
+            (self.plan, false, false)
         } else {
-            LogicalPlanBuilder::window_plan(self.plan, window_func_exprs)?
+            (
+                LogicalPlanBuilder::window_plan(self.plan, window_func_exprs)?,
+                true,
+                true,
+            )
         };
 
         let new_column = expr.alias(name);
-        let mut col_exists = false;
         let mut fields: Vec<Expr> = plan
             .schema()
             .iter()
@@ -1456,6 +1460,8 @@ impl DataFrame {
                 if field.name() == name {
                     col_exists = true;
                     new_column.clone()
+                } else if window_func && qualifier.is_none() {
+                    col(Column::from((qualifier, field))).alias(name)
                 } else {
                     col(Column::from((qualifier, field)))
                 }
@@ -1704,6 +1710,7 @@ mod tests {
     use datafusion_common::{Constraint, Constraints, ScalarValue};
     use datafusion_common_runtime::SpawnedTask;
     use datafusion_expr::expr::WindowFunction;
+    use datafusion_expr::window_function::row_number;
     use datafusion_expr::{
         cast, create_udf, expr, lit, BuiltInWindowFunction, ExprFunctionExt,
         ScalarFunctionImplementation, Volatility, WindowFrame, WindowFrameBound,
@@ -2956,6 +2963,35 @@ mod tests {
         Ok(())
     }
 
+    // Test issue: https://github.com/apache/datafusion/issues/11982
+    // Window function was creating unwanted projection when using with_column() method.
+    #[tokio::test]
+    async fn test_window_function_with_column() -> Result<()> {
+        let df = test_table().await?.select_columns(&["c1", "c2", "c3"])?;
+        let ctx = SessionContext::new();
+        let df_impl = DataFrame::new(ctx.state(), df.plan.clone());
+        let func = row_number().alias("row_num");
+
+        // Should create an additional column with alias 'r' that has window func results
+        let df = df_impl.with_column("r", func)?.limit(0, Some(2))?;
+        assert_eq!(4, df.schema().fields().len());
+
+        let df_results = df.clone().collect().await?;
+        assert_batches_sorted_eq!(
+            [
+                "+----+----+-----+---+",
+                "| c1 | c2 | c3  | r |",
+                "+----+----+-----+---+",
+                "| c  | 2  | 1   | 1 |",
+                "| d  | 5  | -40 | 2 |",
+                "+----+----+-----+---+",
+            ],
+            &df_results
+        );
+
+        Ok(())
+    }
+
     // Test issue: https://github.com/apache/datafusion/issues/7790
     // The join operation outputs two identical column names, but they belong to different relations.
     #[tokio::test]

From 7fa7689c41903bd7b4b6e049c60890d0042df03f Mon Sep 17 00:00:00 2001
From: Samuel Colvin <s@muelcolvin.com>
Date: Sat, 17 Aug 2024 11:46:27 +0100
Subject: [PATCH 326/357] Update to `sqlparser-rs` v0.50.0 (#12014)

* Support HEAD of sqlparser main

* special case ID as a non-keyword when unparsing

* fix EXTRACT expresssions

* TODO REVERT: comment out failing test

Making this commit just to let tests progress.

* use sqlparser-rs v0.50.0
---
 Cargo.toml                                  |  2 +-
 datafusion-cli/Cargo.lock                   |  4 +-
 datafusion/sql/src/expr/mod.rs              |  2 +-
 datafusion/sql/src/planner.rs               |  3 +-
 datafusion/sql/src/relation/mod.rs          |  1 +
 datafusion/sql/src/statement.rs             |  2 +-
 datafusion/sql/src/unparser/ast.rs          |  5 +-
 datafusion/sql/src/unparser/dialect.rs      |  5 +-
 datafusion/sql/src/unparser/expr.rs         |  1 +
 datafusion/sql/src/unparser/plan.rs         |  2 +
 datafusion/sql/src/utils.rs                 |  1 +
 datafusion/sqllogictest/test_files/expr.slt | 52 +++++++++++++++------
 12 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index dfb70dd1eb7bf..ae344a46a1bd3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -135,7 +135,7 @@ rand = "0.8"
 regex = "1.8"
 rstest = "0.22.0"
 serde_json = "1"
-sqlparser = { version = "0.49", features = ["visitor"] }
+sqlparser = { version = "0.50.0", features = ["visitor"] }
 tempfile = "3"
 thiserror = "1.0.44"
 tokio = { version = "1.36", features = ["macros", "rt", "sync"] }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index cda57ba9b443a..52e4a000355d7 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -3542,9 +3542,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
 [[package]]
 name = "sqlparser"
-version = "0.49.0"
+version = "0.50.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4a404d0e14905361b918cb8afdb73605e25c1d5029312bd9785142dcb3aa49e"
+checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac"
 dependencies = [
  "log",
  "sqlparser_derive",
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 7c94e5ead5c35..035fd3816c6cc 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -178,7 +178,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             SQLExpr::Value(value) => {
                 self.parse_value(value, planner_context.prepare_param_data_types())
             }
-            SQLExpr::Extract { field, expr } => {
+            SQLExpr::Extract { field, expr, .. } => {
                 let mut extract_args = vec![
                     Expr::Literal(ScalarValue::from(format!("{field}"))),
                     self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index bf7c3fe0be4f6..9ad515087a364 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -438,7 +438,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
             SQLDataType::Bytea => Ok(DataType::Binary),
             SQLDataType::Interval => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
-            SQLDataType::Struct(fields) => {
+            SQLDataType::Struct(fields, _) => {
                 let fields = fields
                     .iter()
                     .enumerate()
@@ -513,6 +513,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             | SQLDataType::Union(_)
             | SQLDataType::Nullable(_)
             | SQLDataType::LowCardinality(_)
+            | SQLDataType::Trigger
             => not_impl_err!(
                 "Unsupported SQL type {sql_type:?}"
             ),
diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs
index 5d7b3d5918d3f..50027a0efd845 100644
--- a/datafusion/sql/src/relation/mod.rs
+++ b/datafusion/sql/src/relation/mod.rs
@@ -36,6 +36,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 if let Some(func_args) = args {
                     let tbl_func_name = name.0.first().unwrap().value.to_string();
                     let args = func_args
+                        .args
                         .into_iter()
                         .flat_map(|arg| {
                             if let FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)) = arg
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 6d47232ec2700..e75a96e78d483 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -198,8 +198,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         match statement {
             Statement::ExplainTable {
                 describe_alias: DescribeAlias::Describe, // only parse 'DESCRIBE table_name' and not 'EXPLAIN table_name'
-                hive_format: _,
                 table_name,
+                ..
             } => self.describe_table_to_plan(table_name),
             Statement::Explain {
                 verbose,
diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs
index c10db9831457b..71ff712985cdb 100644
--- a/datafusion/sql/src/unparser/ast.rs
+++ b/datafusion/sql/src/unparser/ast.rs
@@ -428,7 +428,10 @@ impl TableRelationBuilder {
                 None => return Err(Into::into(UninitializedFieldError::from("name"))),
             },
             alias: self.alias.clone(),
-            args: self.args.clone(),
+            args: self.args.clone().map(|args| ast::TableFunctionArgs {
+                args,
+                settings: None,
+            }),
             with_hints: self.with_hints.clone(),
             version: self.version.clone(),
             partitions: self.partitions.clone(),
diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index 7eca326386fc5..74f154d7870f5 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -131,7 +131,10 @@ pub struct DefaultDialect {}
 impl Dialect for DefaultDialect {
     fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
         let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
-        if ALL_KEYWORDS.contains(&identifier.to_uppercase().as_str())
+        let id_upper = identifier.to_uppercase();
+        // special case ignore "ID", see https://github.com/sqlparser-rs/sqlparser-rs/issues/1382
+        // ID is a keyword in ClickHouse, but we don't want to quote it when unparsing SQL here
+        if (id_upper != "ID" && ALL_KEYWORDS.contains(&id_upper.as_str()))
             || !identifier_regex.is_match(identifier)
         {
             Some('"')
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index c941d7098dbe0..9ce627aecc760 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -592,6 +592,7 @@ impl Unparser<'_> {
                 return Some(ast::Expr::Extract {
                     field,
                     expr: Box::new(date_expr),
+                    syntax: ast::ExtractSyntax::From,
                 });
             }
         }
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 024f33fb2c7df..8b5a5b0942b8f 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -403,6 +403,7 @@ impl Unparser<'_> {
 
                 let ast_join = ast::Join {
                     relation,
+                    global: false,
                     join_operator: self
                         .join_operator_to_sql(join.join_type, join_constraint),
                 };
@@ -435,6 +436,7 @@ impl Unparser<'_> {
 
                 let ast_join = ast::Join {
                     relation,
+                    global: false,
                     join_operator: self.join_operator_to_sql(
                         JoinType::Inner,
                         ast::JoinConstraint::On(ast::Expr::Value(ast::Value::Boolean(
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 5cdc546e02675..af161bba45c14 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -268,6 +268,7 @@ pub(crate) fn value_to_string(value: &Value) -> Option<String> {
         Value::SingleQuotedString(s) => Some(s.to_string()),
         Value::DollarQuotedString(s) => Some(s.to_string()),
         Value::Number(_, _) | Value::Boolean(_) => Some(value.to_string()),
+        Value::UnicodeStringLiteral(s) => Some(s.to_string()),
         Value::DoubleQuotedString(_)
         | Value::EscapedStringLiteral(_)
         | Value::NationalStringLiteral(_)
diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt
index 3c3b0631e3ff7..81ae60f3ba93c 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -848,8 +848,10 @@ SELECT EXTRACT("year" FROM  timestamp '2020-09-08T12:00:00+00:00')
 ----
 2020
 
-query error
+query R
 SELECT EXTRACT('year' FROM  timestamp '2020-09-08T12:00:00+00:00')
+----
+2020
 
 query R
 SELECT date_part('QUARTER', CAST('2000-01-01' AS DATE))
@@ -866,8 +868,10 @@ SELECT EXTRACT("quarter" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
 ----
 3
 
-query error
+query R
 SELECT EXTRACT('quarter' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
+----
+3
 
 query R
 SELECT date_part('MONTH', CAST('2000-01-01' AS DATE))
@@ -884,8 +888,10 @@ SELECT EXTRACT("month" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
 ----
 9
 
-query error
+query R
 SELECT EXTRACT('month' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
+----
+9
 
 query R
 SELECT date_part('WEEK', CAST('2003-01-01' AS DATE))
@@ -902,8 +908,10 @@ SELECT EXTRACT("WEEK" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
 ----
 37
 
-query error
+query R
 SELECT EXTRACT('WEEK' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
+----
+37
 
 query R
 SELECT date_part('DAY', CAST('2000-01-01' AS DATE))
@@ -920,8 +928,10 @@ SELECT EXTRACT("day" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
 ----
 8
 
-query error
+query R
 SELECT EXTRACT('day' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
+----
+8
 
 query R
 SELECT date_part('DOY', CAST('2000-01-01' AS DATE))
@@ -938,8 +948,10 @@ SELECT EXTRACT("doy" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
 ----
 252
 
-query error
+query R
 SELECT EXTRACT('doy' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
+----
+252
 
 query R
 SELECT date_part('DOW', CAST('2000-01-01' AS DATE))
@@ -956,8 +968,10 @@ SELECT EXTRACT("dow" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
 ----
 2
 
-query error
+query R
 SELECT EXTRACT('dow' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
+----
+2
 
 query R
 SELECT date_part('HOUR', CAST('2000-01-01' AS DATE))
@@ -974,8 +988,10 @@ SELECT EXTRACT("hour" FROM to_timestamp('2020-09-08T12:03:03+00:00'))
 ----
 12
 
-query error
+query R
 SELECT EXTRACT('hour' FROM to_timestamp('2020-09-08T12:03:03+00:00'))
+----
+12
 
 query R
 SELECT EXTRACT(minute FROM to_timestamp('2020-09-08T12:12:00+00:00'))
@@ -987,8 +1003,10 @@ SELECT EXTRACT("minute" FROM to_timestamp('2020-09-08T12:12:00+00:00'))
 ----
 12
 
-query error
+query R
 SELECT EXTRACT('minute' FROM to_timestamp('2020-09-08T12:12:00+00:00'))
+----
+12
 
 query R
 SELECT date_part('minute', to_timestamp('2020-09-08T12:12:00+00:00'))
@@ -1035,17 +1053,25 @@ SELECT EXTRACT("nanosecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
 ----
 12123456780
 
-query error
+query R
 SELECT EXTRACT('second' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
+----
+12.12345678
 
-query error
+query R
 SELECT EXTRACT('millisecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
+----
+12123.45678
 
-query error
+query R
 SELECT EXTRACT('microsecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
+----
+12123456.78
 
-query error
+query R
 SELECT EXTRACT('nanosecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
+----
+12123456780
 
 # Keep precision when coercing Utf8 to Timestamp
 query R

From 186ba4c3ae642ac470ff26142e89051e9b333dc9 Mon Sep 17 00:00:00 2001
From: Matt Green <emgeee@users.noreply.github.com>
Date: Sat, 17 Aug 2024 03:46:55 -0700
Subject: [PATCH 327/357] Minor: make some physical-plan properties public
 (#12022)

* Minor: make some physical-plan properties public

* add Default for GroupOrderingFull

* make groups and null_expr private again

* remove pub label
---
 datafusion/expr/src/logical_plan/builder.rs        |  2 +-
 datafusion/physical-plan/src/aggregates/mod.rs     | 14 +++++++-------
 .../physical-plan/src/aggregates/order/full.rs     |  8 +++++++-
 .../physical-plan/src/aggregates/order/mod.rs      |  8 ++++----
 .../physical-plan/src/aggregates/order/partial.rs  |  2 +-
 datafusion/physical-plan/src/filter.rs             |  2 +-
 6 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 2e53a682854ce..f9769560b2512 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1288,7 +1288,7 @@ pub fn build_join_schema(
 ///
 /// This allows MySQL style selects like
 /// `SELECT col FROM t WHERE pk = 5` if col is unique
-fn add_group_by_exprs_from_dependencies(
+pub fn add_group_by_exprs_from_dependencies(
     mut group_expr: Vec<Expr>,
     schema: &DFSchemaRef,
 ) -> Result<Vec<Expr>> {
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 4d39eff42b5f4..89d4c452cca65 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -48,9 +48,9 @@ use datafusion_physical_expr::{
 
 use itertools::Itertools;
 
-mod group_values;
+pub mod group_values;
 mod no_grouping;
-mod order;
+pub mod order;
 mod row_hash;
 mod topk;
 mod topk_stream;
@@ -925,7 +925,7 @@ pub fn concat_slices<T: Clone>(lhs: &[T], rhs: &[T]) -> Vec<T> {
 ///
 /// A `LexRequirement` instance, which is the requirement that satisfies all the
 /// aggregate requirements. Returns an error in case of conflicting requirements.
-fn get_finer_aggregate_exprs_requirement(
+pub fn get_finer_aggregate_exprs_requirement(
     aggr_exprs: &mut [Arc<dyn AggregateExpr>],
     group_by: &PhysicalGroupBy,
     eq_properties: &EquivalenceProperties,
@@ -998,7 +998,7 @@ fn get_finer_aggregate_exprs_requirement(
 /// The expressions are different depending on `mode`:
 /// * Partial: AggregateExpr::expressions
 /// * Final: columns of `AggregateExpr::state_fields()`
-fn aggregate_expressions(
+pub fn aggregate_expressions(
     aggr_expr: &[Arc<dyn AggregateExpr>],
     mode: &AggregateMode,
     col_idx_base: usize,
@@ -1051,9 +1051,9 @@ fn merge_expressions(
     })
 }
 
-pub(crate) type AccumulatorItem = Box<dyn Accumulator>;
+pub type AccumulatorItem = Box<dyn Accumulator>;
 
-fn create_accumulators(
+pub fn create_accumulators(
     aggr_expr: &[Arc<dyn AggregateExpr>],
 ) -> Result<Vec<AccumulatorItem>> {
     aggr_expr
@@ -1064,7 +1064,7 @@ fn create_accumulators(
 
 /// returns a vector of ArrayRefs, where each entry corresponds to either the
 /// final value (mode = Final, FinalPartitioned and Single) or states (mode = Partial)
-fn finalize_aggregation(
+pub fn finalize_aggregation(
     accumulators: &mut [AccumulatorItem],
     mode: &AggregateMode,
 ) -> Result<Vec<ArrayRef>> {
diff --git a/datafusion/physical-plan/src/aggregates/order/full.rs b/datafusion/physical-plan/src/aggregates/order/full.rs
index c15538e8ab8ef..e86d7677479aa 100644
--- a/datafusion/physical-plan/src/aggregates/order/full.rs
+++ b/datafusion/physical-plan/src/aggregates/order/full.rs
@@ -54,7 +54,7 @@ use datafusion_expr::EmitTo;
 /// `0..12` can be emitted. Note that `13` can not yet be emitted as
 /// there may be more values in the next batch with the same group_id.
 #[derive(Debug)]
-pub(crate) struct GroupOrderingFull {
+pub struct GroupOrderingFull {
     state: State,
 }
 
@@ -142,3 +142,9 @@ impl GroupOrderingFull {
         std::mem::size_of::<Self>()
     }
 }
+
+impl Default for GroupOrderingFull {
+    fn default() -> Self {
+        Self::new()
+    }
+}
diff --git a/datafusion/physical-plan/src/aggregates/order/mod.rs b/datafusion/physical-plan/src/aggregates/order/mod.rs
index 1d94d56df1383..483150ee61af6 100644
--- a/datafusion/physical-plan/src/aggregates/order/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/order/mod.rs
@@ -25,12 +25,12 @@ mod full;
 mod partial;
 
 use crate::InputOrderMode;
-pub(crate) use full::GroupOrderingFull;
-pub(crate) use partial::GroupOrderingPartial;
+pub use full::GroupOrderingFull;
+pub use partial::GroupOrderingPartial;
 
 /// Ordering information for each group in the hash table
 #[derive(Debug)]
-pub(crate) enum GroupOrdering {
+pub enum GroupOrdering {
     /// Groups are not ordered
     None,
     /// Groups are ordered by some pre-set of the group keys
@@ -117,7 +117,7 @@ impl GroupOrdering {
     }
 
     /// Return the size of memory used by the ordering state, in bytes
-    pub(crate) fn size(&self) -> usize {
+    pub fn size(&self) -> usize {
         std::mem::size_of::<Self>()
             + match self {
                 GroupOrdering::None => 0,
diff --git a/datafusion/physical-plan/src/aggregates/order/partial.rs b/datafusion/physical-plan/src/aggregates/order/partial.rs
index f8fd86ff8b50a..73a157f3aa966 100644
--- a/datafusion/physical-plan/src/aggregates/order/partial.rs
+++ b/datafusion/physical-plan/src/aggregates/order/partial.rs
@@ -60,7 +60,7 @@ use std::sync::Arc;
 ///      order)                                    recent group index
 ///```
 #[derive(Debug)]
-pub(crate) struct GroupOrderingPartial {
+pub struct GroupOrderingPartial {
     /// State machine
     state: State,
 
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 568987b147980..6aba3d8177104 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -347,7 +347,7 @@ struct FilterExecStream {
     baseline_metrics: BaselineMetrics,
 }
 
-pub(crate) fn batch_filter(
+pub fn batch_filter(
     batch: &RecordBatch,
     predicate: &Arc<dyn PhysicalExpr>,
 ) -> Result<RecordBatch> {

From 1f90b0060baf94d1b5d7388718c26ae53d08c080 Mon Sep 17 00:00:00 2001
From: Cancai Cai <77189278+caicancai@users.noreply.github.com>
Date: Sat, 17 Aug 2024 18:48:24 +0800
Subject: [PATCH 328/357] chore: improve variable naming conventions (#12042)

---
 datafusion-examples/examples/catalog.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/datafusion-examples/examples/catalog.rs b/datafusion-examples/examples/catalog.rs
index f770056026ed4..8c2b1aad56c64 100644
--- a/datafusion-examples/examples/catalog.rs
+++ b/datafusion-examples/examples/catalog.rs
@@ -46,11 +46,11 @@ async fn main() -> Result<()> {
 
     let ctx = SessionContext::new();
     let state = ctx.state();
-    let catlist = Arc::new(CustomCatalogProviderList::new());
+    let cataloglist = Arc::new(CustomCatalogProviderList::new());
 
     // use our custom catalog list for context. each context has a single catalog list.
     // context will by default have [`MemoryCatalogProviderList`]
-    ctx.register_catalog_list(catlist.clone());
+    ctx.register_catalog_list(cataloglist.clone());
 
     // initialize our catalog and schemas
     let catalog = DirCatalog::new();
@@ -81,7 +81,7 @@ async fn main() -> Result<()> {
     ctx.register_catalog("dircat", Arc::new(catalog));
     {
         // catalog was passed down into our custom catalog list since we override the ctx's default
-        let catalogs = catlist.catalogs.read().unwrap();
+        let catalogs = cataloglist.catalogs.read().unwrap();
         assert!(catalogs.contains_key("dircat"));
     };
 
@@ -143,8 +143,8 @@ impl DirSchema {
     async fn create(state: &SessionState, opts: DirSchemaOpts<'_>) -> Result<Arc<Self>> {
         let DirSchemaOpts { ext, dir, format } = opts;
         let mut tables = HashMap::new();
-        let listdir = std::fs::read_dir(dir).unwrap();
-        for res in listdir {
+        let direntries = std::fs::read_dir(dir).unwrap();
+        for res in direntries {
             let entry = res.unwrap();
             let filename = entry.file_name().to_str().unwrap().to_string();
             if !filename.ends_with(ext) {

From e84f3436c079a8264bf3052f64db31ba6fbddbff Mon Sep 17 00:00:00 2001
From: HuSen <husen.xjtu@gmail.com>
Date: Sat, 17 Aug 2024 18:48:55 +0800
Subject: [PATCH 329/357] Fix: handle NULL input for regex match operations
 (#12028)

---
 .../physical-expr/src/expressions/binary.rs   | 12 +++--
 datafusion/sqllogictest/test_files/regexp.slt | 45 +++++++++++++++++++
 2 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index 06f54481a6faf..26885ae1350ce 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -318,10 +318,14 @@ impl PhysicalExpr for BinaryExpr {
         // Attempt to use special kernels if one input is scalar and the other is an array
         let scalar_result = match (&lhs, &rhs) {
             (ColumnarValue::Array(array), ColumnarValue::Scalar(scalar)) => {
-                // if left is array and right is literal - use scalar operations
-                self.evaluate_array_scalar(array, scalar.clone())?.map(|r| {
-                    r.and_then(|a| to_result_type_array(&self.op, a, &result_type))
-                })
+                // if left is array and right is literal(not NULL) - use scalar operations
+                if scalar.is_null() {
+                    None
+                } else {
+                    self.evaluate_array_scalar(array, scalar.clone())?.map(|r| {
+                        r.and_then(|a| to_result_type_array(&self.op, a, &result_type))
+                    })
+                }
             }
             (_, _) => None, // default to array implementation
         };
diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt
index c04021651a507..1685ed51afef9 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -48,6 +48,51 @@ true
 true
 true
 
+query B
+SELECT str ~ NULL FROM t;
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
+query B
+select str ~ right('foo', NULL) FROM t;
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
+query B
+select right('foo', NULL) !~ str FROM t;
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
 query B
 SELECT regexp_like('foobarbequebaz', '');
 ----

From cb1e3f0e5cfd04195ae0c933b74f3eb3fcbe1b45 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 17 Aug 2024 07:41:19 -0400
Subject: [PATCH 330/357] Fix compilation, change row_number() expr_fn to 0
 args (#12043)

---
 datafusion/core/src/dataframe/mod.rs                   | 2 +-
 datafusion/functions-window/src/row_number.rs          | 4 ++--
 datafusion/proto/tests/cases/roundtrip_logical_plan.rs | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 760ebd7392e56..8920a89af11ec 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1710,13 +1710,13 @@ mod tests {
     use datafusion_common::{Constraint, Constraints, ScalarValue};
     use datafusion_common_runtime::SpawnedTask;
     use datafusion_expr::expr::WindowFunction;
-    use datafusion_expr::window_function::row_number;
     use datafusion_expr::{
         cast, create_udf, expr, lit, BuiltInWindowFunction, ExprFunctionExt,
         ScalarFunctionImplementation, Volatility, WindowFrame, WindowFrameBound,
         WindowFrameUnits, WindowFunctionDefinition,
     };
     use datafusion_functions_aggregate::expr_fn::{array_agg, count_distinct};
+    use datafusion_functions_window::expr_fn::row_number;
     use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
     use sqlparser::ast::NullTreatment;
diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs
index 08d711182434e..aea3d4a59e028 100644
--- a/datafusion/functions-window/src/row_number.rs
+++ b/datafusion/functions-window/src/row_number.rs
@@ -31,8 +31,8 @@ use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDF
 
 /// Create a [`WindowFunction`](Expr::WindowFunction) expression for
 /// `row_number` user-defined window function.
-pub fn row_number(args: Vec<Expr>) -> Expr {
-    Expr::WindowFunction(WindowFunction::new(row_number_udwf(), args))
+pub fn row_number() -> Expr {
+    Expr::WindowFunction(WindowFunction::new(row_number_udwf(), vec![]))
 }
 
 /// Singleton instance of `row_number`, ensures the UDWF is only created once.
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 4b20f14ddeb4c..09c5f0f8bd3d6 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -904,7 +904,7 @@ async fn roundtrip_expr_api() -> Result<()> {
             vec![lit(1), lit(2), lit(3)],
             vec![lit(10), lit(20), lit(30)],
         ),
-        row_number(vec![col("a")]),
+        row_number(),
     ];
 
     // ensure expressions created with the expr api can be round tripped

From cd9237f96d60d48711d38b27707ad14896d8e609 Mon Sep 17 00:00:00 2001
From: Tai Le Manh <49281946+tlm365@users.noreply.github.com>
Date: Sun, 18 Aug 2024 17:49:24 +0700
Subject: [PATCH 331/357] Minor: Remove warning when building datafusion-cli
 from Dockerfile (#12018)

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>
---
 datafusion-cli/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion-cli/Dockerfile b/datafusion-cli/Dockerfile
index d231da62a2fd4..7adead64db57c 100644
--- a/datafusion-cli/Dockerfile
+++ b/datafusion-cli/Dockerfile
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM rust:1.78-bookworm as builder
+FROM rust:1.78-bookworm AS builder
 
 COPY . /usr/src/datafusion
 COPY ./datafusion /usr/src/datafusion/datafusion

From 950dc73c7f763ae8dc56c0a99de864dff444f22b Mon Sep 17 00:00:00 2001
From: Matt Green <emgeee@users.noreply.github.com>
Date: Sun, 18 Aug 2024 03:49:43 -0700
Subject: [PATCH 332/357] add getter method for LogicalPlanBuilder.plan
 (#12038)

---
 datafusion/expr/src/logical_plan/builder.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index f9769560b2512..aa28c1c192429 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -107,6 +107,11 @@ impl LogicalPlanBuilder {
         self.plan.schema()
     }
 
+    /// Return the LogicalPlan of the plan build so far
+    pub fn plan(&self) -> &LogicalPlan {
+        &self.plan
+    }
+
     /// Create an empty relation.
     ///
     /// `produce_one_row` set to true means this empty node needs to produce a placeholder row.

From a91be04ced3746c673788d5da124c6d30009d9ff Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Sun, 18 Aug 2024 20:47:21 +0800
Subject: [PATCH 333/357] Window UDF signature check (#12045)

* udwf sig

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add coerce_types

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/expr/src/expr_schema.rs            | 17 ++++-
 .../expr/src/type_coercion/functions.rs       | 74 +++++++++++++++++--
 datafusion/expr/src/udwf.rs                   | 30 +++++++-
 datafusion/sqllogictest/test_files/window.slt | 18 +++++
 4 files changed, 130 insertions(+), 9 deletions(-)

diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index af35b9a9910d7..f6489fef14a16 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -22,7 +22,7 @@ use crate::expr::{
 };
 use crate::type_coercion::binary::get_result_type;
 use crate::type_coercion::functions::{
-    data_types_with_aggregate_udf, data_types_with_scalar_udf,
+    data_types_with_aggregate_udf, data_types_with_scalar_udf, data_types_with_window_udf,
 };
 use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition};
 use arrow::compute::can_cast_types;
@@ -191,6 +191,21 @@ impl ExprSchemable for Expr {
                         })?;
                         Ok(fun.return_type(&new_types, &nullability)?)
                     }
+                    WindowFunctionDefinition::WindowUDF(udwf) => {
+                        let new_types = data_types_with_window_udf(&data_types, udwf)
+                            .map_err(|err| {
+                                plan_datafusion_err!(
+                                    "{} {}",
+                                    err,
+                                    utils::generate_signature_error_msg(
+                                        fun.name(),
+                                        fun.signature().clone(),
+                                        &data_types
+                                    )
+                                )
+                            })?;
+                        Ok(fun.return_type(&new_types, &nullability)?)
+                    }
                     _ => fun.return_type(&data_types, &nullability),
                 }
             }
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index 190374b01dd24..b0b14a1a4e6ec 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -15,22 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::sync::Arc;
-
-use crate::{AggregateUDF, ScalarUDF, Signature, TypeSignature};
+use super::binary::{binary_numeric_coercion, comparison_coercion};
+use crate::{AggregateUDF, ScalarUDF, Signature, TypeSignature, WindowUDF};
 use arrow::{
     compute::can_cast_types,
     datatypes::{DataType, TimeUnit},
 };
-use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims};
 use datafusion_common::{
-    exec_err, internal_datafusion_err, internal_err, plan_err, Result,
+    exec_err, internal_datafusion_err, internal_err, plan_err,
+    utils::{coerced_fixed_size_list_to_list, list_ndims},
+    Result,
 };
 use datafusion_expr_common::signature::{
     ArrayFunctionSignature, FIXED_SIZE_LIST_WILDCARD, TIMEZONE_WILDCARD,
 };
-
-use super::binary::{binary_numeric_coercion, comparison_coercion};
+use std::sync::Arc;
 
 /// Performs type coercion for scalar function arguments.
 ///
@@ -66,6 +65,13 @@ pub fn data_types_with_scalar_udf(
     try_coerce_types(valid_types, current_types, &signature.type_signature)
 }
 
+/// Performs type coercion for aggregate function arguments.
+///
+/// Returns the data types to which each argument must be coerced to
+/// match `signature`.
+///
+/// For more details on coercion in general, please see the
+/// [`type_coercion`](crate::type_coercion) module.
 pub fn data_types_with_aggregate_udf(
     current_types: &[DataType],
     func: &AggregateUDF,
@@ -95,6 +101,39 @@ pub fn data_types_with_aggregate_udf(
     try_coerce_types(valid_types, current_types, &signature.type_signature)
 }
 
+/// Performs type coercion for window function arguments.
+///
+/// Returns the data types to which each argument must be coerced to
+/// match `signature`.
+///
+/// For more details on coercion in general, please see the
+/// [`type_coercion`](crate::type_coercion) module.
+pub fn data_types_with_window_udf(
+    current_types: &[DataType],
+    func: &WindowUDF,
+) -> Result<Vec<DataType>> {
+    let signature = func.signature();
+
+    if current_types.is_empty() {
+        if signature.type_signature.supports_zero_argument() {
+            return Ok(vec![]);
+        } else {
+            return plan_err!("{} does not support zero arguments.", func.name());
+        }
+    }
+
+    let valid_types =
+        get_valid_types_with_window_udf(&signature.type_signature, current_types, func)?;
+    if valid_types
+        .iter()
+        .any(|data_type| data_type == current_types)
+    {
+        return Ok(current_types.to_vec());
+    }
+
+    try_coerce_types(valid_types, current_types, &signature.type_signature)
+}
+
 /// Performs type coercion for function arguments.
 ///
 /// Returns the data types to which each argument must be coerced to
@@ -205,6 +244,27 @@ fn get_valid_types_with_aggregate_udf(
     Ok(valid_types)
 }
 
+fn get_valid_types_with_window_udf(
+    signature: &TypeSignature,
+    current_types: &[DataType],
+    func: &WindowUDF,
+) -> Result<Vec<Vec<DataType>>> {
+    let valid_types = match signature {
+        TypeSignature::UserDefined => match func.coerce_types(current_types) {
+            Ok(coerced_types) => vec![coerced_types],
+            Err(e) => return exec_err!("User-defined coercion failed with {:?}", e),
+        },
+        TypeSignature::OneOf(signatures) => signatures
+            .iter()
+            .filter_map(|t| get_valid_types_with_window_udf(t, current_types, func).ok())
+            .flatten()
+            .collect::<Vec<_>>(),
+        _ => get_valid_types(signature, current_types)?,
+    };
+
+    Ok(valid_types)
+}
+
 /// Returns a Vec of all possible valid argument types for the given signature.
 fn get_valid_types(
     signature: &TypeSignature,
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index aa754a57086f6..88b3d613cb435 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -27,7 +27,7 @@ use std::{
 
 use arrow::datatypes::DataType;
 
-use datafusion_common::Result;
+use datafusion_common::{not_impl_err, Result};
 
 use crate::expr::WindowFunction;
 use crate::{
@@ -192,6 +192,11 @@ impl WindowUDF {
     pub fn sort_options(&self) -> Option<SortOptions> {
         self.inner.sort_options()
     }
+
+    /// See [`WindowUDFImpl::coerce_types`] for more details.
+    pub fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        self.inner.coerce_types(arg_types)
+    }
 }
 
 impl<F> From<F> for WindowUDF
@@ -353,6 +358,29 @@ pub trait WindowUDFImpl: Debug + Send + Sync {
     fn sort_options(&self) -> Option<SortOptions> {
         None
     }
+
+    /// Coerce arguments of a function call to types that the function can evaluate.
+    ///
+    /// This function is only called if [`WindowUDFImpl::signature`] returns [`crate::TypeSignature::UserDefined`]. Most
+    /// UDWFs should return one of the other variants of `TypeSignature` which handle common
+    /// cases
+    ///
+    /// See the [type coercion module](crate::type_coercion)
+    /// documentation for more details on type coercion
+    ///
+    /// For example, if your function requires a floating point arguments, but the user calls
+    /// it like `my_func(1::int)` (aka with `1` as an integer), coerce_types could return `[DataType::Float64]`
+    /// to ensure the argument was cast to `1::double`
+    ///
+    /// # Parameters
+    /// * `arg_types`: The argument types of the arguments  this function with
+    ///
+    /// # Return value
+    /// A Vec the same length as `arg_types`. DataFusion will `CAST` the function call
+    /// arguments to these specific types.
+    fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        not_impl_err!("Function {} does not implement coerce_types", self.name())
+    }
 }
 
 /// WindowUDF that adds an alias to the underlying function. It is better to
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 0bf7a8a1eb1ba..ef6746730eb60 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -4879,3 +4879,21 @@ SELECT lead(column2, 1.1) OVER (order by column1) FROM t;
 
 query error DataFusion error: Execution error: Expected an integer value
 SELECT nth_value(column2, 1.1) OVER (order by column1) FROM t;
+
+statement ok
+drop table t;
+
+statement ok
+create table t(a int, b int) as values (1, 2)
+
+query II
+select a, row_number() over (order by b) as rn from t;
+----
+1 1
+
+# RowNumber expect 0 args.
+query error
+select a, row_number(a) over (order by b) as rn from t;
+
+statement ok
+drop table t;

From 574dfeb29ffe80c0f223fe7551b9ab23f7f29eaa Mon Sep 17 00:00:00 2001
From: Namgung Chan <33323415+getChan@users.noreply.github.com>
Date: Mon, 19 Aug 2024 09:48:25 +0900
Subject: [PATCH 334/357] Fix: generate_series function support string type
 (#12002)

* fix: sqllogictest

* Revert "fix: sqllogictest"

This reverts commit 4957a1d24a5102408ac3897ff93b2f2e5ad477ff.

* fix: sqllogictest

* remove any type signature

* coerce type from null  to date32

* fmt

* slt

* Revert "coerce type from null  to date32"

This reverts commit bccdc2e56b415066f1cdeb4ab671894c4562b1fe.

* replace type coerce by `coerce_types` method

* fmt

* fix underscored param
---
 datafusion/functions-nested/src/range.rs     | 93 +++++++++++++-------
 datafusion/sqllogictest/test_files/array.slt |  9 +-
 2 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs
index 5b7315719631e..90cf8bcbd0572 100644
--- a/datafusion/functions-nested/src/range.rs
+++ b/datafusion/functions-nested/src/range.rs
@@ -23,13 +23,12 @@ use arrow::datatypes::{DataType, Field};
 use arrow_array::types::{Date32Type, IntervalMonthDayNanoType};
 use arrow_array::NullArray;
 use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
-use arrow_schema::DataType::{Date32, Int64, Interval, List};
+use arrow_schema::DataType::*;
 use arrow_schema::IntervalUnit::MonthDayNano;
 use datafusion_common::cast::{as_date32_array, as_int64_array, as_interval_mdn_array};
 use datafusion_common::{exec_err, not_impl_datafusion_err, Result};
-use datafusion_expr::{
-    ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility,
-};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use itertools::Itertools;
 use std::any::Any;
 use std::iter::from_fn;
 use std::sync::Arc;
@@ -49,16 +48,7 @@ pub(super) struct Range {
 impl Range {
     pub fn new() -> Self {
         Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Exact(vec![Int64]),
-                    TypeSignature::Exact(vec![Int64, Int64]),
-                    TypeSignature::Exact(vec![Int64, Int64, Int64]),
-                    TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
-                    TypeSignature::Any(3),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::user_defined(Volatility::Immutable),
             aliases: vec![],
         }
     }
@@ -75,9 +65,34 @@ impl ScalarUDFImpl for Range {
         &self.signature
     }
 
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        arg_types
+            .iter()
+            .map(|arg_type| match arg_type {
+                Null => Ok(Null),
+                Int8 => Ok(Int64),
+                Int16 => Ok(Int64),
+                Int32 => Ok(Int64),
+                Int64 => Ok(Int64),
+                UInt8 => Ok(Int64),
+                UInt16 => Ok(Int64),
+                UInt32 => Ok(Int64),
+                UInt64 => Ok(Int64),
+                Timestamp(_, _) => Ok(Date32),
+                Date32 => Ok(Date32),
+                Date64 => Ok(Date32),
+                Utf8 => Ok(Date32),
+                LargeUtf8 => Ok(Date32),
+                Utf8View => Ok(Date32),
+                Interval(_) => Ok(Interval(MonthDayNano)),
+                _ => exec_err!("Unsupported DataType"),
+            })
+            .try_collect()
+    }
+
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        if arg_types.iter().any(|t| t.eq(&DataType::Null)) {
-            Ok(DataType::Null)
+        if arg_types.iter().any(|t| t.is_null()) {
+            Ok(Null)
         } else {
             Ok(List(Arc::new(Field::new(
                 "item",
@@ -88,7 +103,7 @@ impl ScalarUDFImpl for Range {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        if args.iter().any(|arg| arg.data_type() == DataType::Null) {
+        if args.iter().any(|arg| arg.data_type().is_null()) {
             return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
         }
         match args[0].data_type() {
@@ -120,16 +135,7 @@ pub(super) struct GenSeries {
 impl GenSeries {
     pub fn new() -> Self {
         Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Exact(vec![Int64]),
-                    TypeSignature::Exact(vec![Int64, Int64]),
-                    TypeSignature::Exact(vec![Int64, Int64, Int64]),
-                    TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
-                    TypeSignature::Any(3),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::user_defined(Volatility::Immutable),
             aliases: vec![],
         }
     }
@@ -146,9 +152,34 @@ impl ScalarUDFImpl for GenSeries {
         &self.signature
     }
 
+    fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        _arg_types
+            .iter()
+            .map(|arg_type| match arg_type {
+                Null => Ok(Null),
+                Int8 => Ok(Int64),
+                Int16 => Ok(Int64),
+                Int32 => Ok(Int64),
+                Int64 => Ok(Int64),
+                UInt8 => Ok(Int64),
+                UInt16 => Ok(Int64),
+                UInt32 => Ok(Int64),
+                UInt64 => Ok(Int64),
+                Timestamp(_, _) => Ok(Date32),
+                Date32 => Ok(Date32),
+                Date64 => Ok(Date32),
+                Utf8 => Ok(Date32),
+                LargeUtf8 => Ok(Date32),
+                Utf8View => Ok(Date32),
+                Interval(_) => Ok(Interval(MonthDayNano)),
+                _ => exec_err!("Unsupported DataType"),
+            })
+            .try_collect()
+    }
+
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        if arg_types.iter().any(|t| t.eq(&DataType::Null)) {
-            Ok(DataType::Null)
+        if arg_types.iter().any(|t| t.is_null()) {
+            Ok(Null)
         } else {
             Ok(List(Arc::new(Field::new(
                 "item",
@@ -159,7 +190,7 @@ impl ScalarUDFImpl for GenSeries {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        if args.iter().any(|arg| arg.data_type() == DataType::Null) {
+        if args.iter().any(|arg| arg.data_type().is_null()) {
             return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
         }
         match args[0].data_type() {
@@ -167,7 +198,7 @@ impl ScalarUDFImpl for GenSeries {
             Date32 => make_scalar_function(|args| gen_range_date(args, true))(args),
             dt => {
                 exec_err!(
-                    "unsupported type for range. Expected Int64 or Date32, got: {}",
+                    "unsupported type for gen_series. Expected Int64 or Date32, got: {}",
                     dt
                 )
             }
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index b97ecced57e35..249241a51aeaa 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5804,7 +5804,7 @@ select generate_series(5),
 ----
 [0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01]
 
-query error DataFusion error: Execution error: unsupported type for range. Expected Int64 or Date32, got: Timestamp\(Nanosecond, None\)
+query error DataFusion error: Execution error: Cannot generate date range less than 1 day\.
 select generate_series('2021-01-01'::timestamp, '2021-01-02'::timestamp, INTERVAL '1' HOUR);
 
 ## should return NULL
@@ -5936,11 +5936,12 @@ select generate_series(start, '1993-03-01'::date, INTERVAL '1 year') from date_t
 
 
 # https://github.com/apache/datafusion/issues/11922
-query error
+query ?
 select generate_series(start, '1993-03-01', INTERVAL '1 year') from date_table;
 ----
-DataFusion error: Internal error: could not cast value to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::Date32Type>.
-This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
+[1992-01-01, 1993-01-01]
+[1993-02-01]
+[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01]
 
 
 ## array_except

From 02eab80cd62e02fcb68dee8b99d63aaac680a66c Mon Sep 17 00:00:00 2001
From: mertak-synnada <mertak67+synaada@gmail.com>
Date: Mon, 19 Aug 2024 13:34:47 +0300
Subject: [PATCH 335/357] Do not add redundant subquery ordering into plan
 (#12003)

* do not add redundant subquery ordering into plan

* format code

* add license

* fix test cases with sort plan removing

* fix comment

* keep sorting on ordering mode test cases

* protect test intentions with order + limit

* protect test intentions with order + limit

* Tmp

* Minor changes

* Minor changes

* Minor changes

* Implement top down recursion with delete check

* Minor changes

* Minor changes

* initialize fetch() api for execution plan
remove unnecessary limit plans when used with sort + fetch
add test case for Sort and Limit with offset
push down limit even if a child with no fetch appears when the child supports push down

* Address reviews

* Update comments

* Minor changes

* Make test deterministic

* add supports limit push down to union exec

* support limit push down with multi children cases

* fix typos

Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>

* Add fetch info to the statistics

* optimize tpch test plans

* Enforce distribution use inexact count estimate also.

* Minor changes

* Minor changes

* merge with apache main
add pushes_global_limit_into_multiple_fetch_plans test case
change limit_pushdown.rs as manual top down operator and simplify algorithm by supporting most parent node remove and other pushdown cases

* format code

* fix doc paths

* fix doc paths

* remove redundant code block

* if partition count is 1 put GlobalLimitExec

* fix test cases

* Apply suggestions from code review

* fix syntax errors

* Simplify branches

* remove redundant limit plans from merge

---------

Co-authored-by: Mustafa Akur <mustafa.akur@synnada.ai>
Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
---
 datafusion/common/src/lib.rs                  |   2 +-
 datafusion/core/src/dataframe/mod.rs          |  26 +-
 .../datasource/physical_plan/arrow_file.rs    |   4 +
 .../core/src/datasource/physical_plan/avro.rs |   4 +
 .../core/src/datasource/physical_plan/csv.rs  |   4 +
 .../core/src/datasource/physical_plan/json.rs |   4 +
 .../datasource/physical_plan/parquet/mod.rs   |   4 +
 .../src/physical_optimizer/enforce_sorting.rs |  15 +-
 datafusion/core/src/physical_planner.rs       |   3 -
 datafusion/core/tests/memory_limit/mod.rs     |  21 +-
 .../physical_optimizer/limit_pushdown.rs      |  80 ++++-
 datafusion/core/tests/sql/explain_analyze.rs  |  28 +-
 datafusion/optimizer/src/push_down_limit.rs   |   6 +-
 .../physical-optimizer/src/limit_pushdown.rs  | 312 ++++++++++-------
 .../physical-plan/src/sorts/partial_sort.rs   |   4 +
 .../src/sorts/sort_preserving_merge.rs        |  15 +
 datafusion/physical-plan/src/streaming.rs     |   4 +
 datafusion/physical-plan/src/union.rs         |   4 +
 datafusion/sql/src/relation/mod.rs            |  35 +-
 .../sqllogictest/test_files/aggregate.slt     |  21 +-
 .../test_files/aggregates_topk.slt            | 120 +++----
 .../sqllogictest/test_files/explain.slt       |   5 +-
 .../sqllogictest/test_files/group_by.slt      | 140 ++++----
 .../join_disable_repartition_joins.slt        |  64 ++--
 datafusion/sqllogictest/test_files/joins.slt  |  20 +-
 datafusion/sqllogictest/test_files/limit.slt  |  13 +
 datafusion/sqllogictest/test_files/order.slt  |  97 +++---
 datafusion/sqllogictest/test_files/select.slt |  30 +-
 .../sqllogictest/test_files/subquery_sort.slt | 149 ++++++++
 datafusion/sqllogictest/test_files/topk.slt   |   5 +-
 .../sqllogictest/test_files/tpch/q10.slt.part | 110 +++---
 .../sqllogictest/test_files/tpch/q11.slt.part |  51 ++-
 .../sqllogictest/test_files/tpch/q13.slt.part |  68 ++--
 .../sqllogictest/test_files/tpch/q16.slt.part |  96 +++---
 .../sqllogictest/test_files/tpch/q2.slt.part  | 236 +++++++------
 .../sqllogictest/test_files/tpch/q3.slt.part  |  94 +++---
 .../sqllogictest/test_files/tpch/q9.slt.part  | 146 ++++----
 datafusion/sqllogictest/test_files/union.slt  |  97 ++++--
 datafusion/sqllogictest/test_files/window.slt | 318 ++++++++----------
 39 files changed, 1415 insertions(+), 1040 deletions(-)
 create mode 100644 datafusion/sqllogictest/test_files/subquery_sort.slt

diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 19af889e426a2..10541e01914ad 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -19,7 +19,6 @@
 
 mod column;
 mod dfschema;
-mod error;
 mod functional_dependencies;
 mod join_type;
 mod param_value;
@@ -33,6 +32,7 @@ pub mod alias;
 pub mod cast;
 pub mod config;
 pub mod display;
+pub mod error;
 pub mod file_options;
 pub mod format;
 pub mod hash_utils;
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 8920a89af11ec..42203e5fe84e3 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -3046,13 +3046,12 @@ mod tests {
         assert_eq!(
             "\
         Projection: t1.c1, t2.c1, Boolean(true) AS new_column\
-        \n  Limit: skip=0, fetch=1\
-        \n    Sort: t1.c1 ASC NULLS FIRST, fetch=1\
-        \n      Inner Join: t1.c1 = t2.c1\
-        \n        SubqueryAlias: t1\
-        \n          TableScan: aggregate_test_100 projection=[c1]\
-        \n        SubqueryAlias: t2\
-        \n          TableScan: aggregate_test_100 projection=[c1]",
+        \n  Sort: t1.c1 ASC NULLS FIRST, fetch=1\
+        \n    Inner Join: t1.c1 = t2.c1\
+        \n      SubqueryAlias: t1\
+        \n        TableScan: aggregate_test_100 projection=[c1]\
+        \n      SubqueryAlias: t2\
+        \n        TableScan: aggregate_test_100 projection=[c1]",
             format!("{}", df_with_column.clone().into_optimized_plan()?)
         );
 
@@ -3240,13 +3239,12 @@ mod tests {
 
         assert_eq!("\
         Projection: t1.c1 AS AAA, t1.c2, t1.c3, t2.c1, t2.c2, t2.c3\
-        \n  Limit: skip=0, fetch=1\
-        \n    Sort: t1.c1 ASC NULLS FIRST, t1.c2 ASC NULLS FIRST, t1.c3 ASC NULLS FIRST, t2.c1 ASC NULLS FIRST, t2.c2 ASC NULLS FIRST, t2.c3 ASC NULLS FIRST, fetch=1\
-        \n      Inner Join: t1.c1 = t2.c1\
-        \n        SubqueryAlias: t1\
-        \n          TableScan: aggregate_test_100 projection=[c1, c2, c3]\
-        \n        SubqueryAlias: t2\
-        \n          TableScan: aggregate_test_100 projection=[c1, c2, c3]",
+        \n  Sort: t1.c1 ASC NULLS FIRST, t1.c2 ASC NULLS FIRST, t1.c3 ASC NULLS FIRST, t2.c1 ASC NULLS FIRST, t2.c2 ASC NULLS FIRST, t2.c3 ASC NULLS FIRST, fetch=1\
+        \n    Inner Join: t1.c1 = t2.c1\
+        \n      SubqueryAlias: t1\
+        \n        TableScan: aggregate_test_100 projection=[c1, c2, c3]\
+        \n      SubqueryAlias: t2\
+        \n        TableScan: aggregate_test_100 projection=[c1, c2, c3]",
                    format!("{}", df_renamed.clone().into_optimized_plan()?)
         );
 
diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index b4edc221c1f83..39625a55ca15e 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -197,6 +197,10 @@ impl ExecutionPlan for ArrowExec {
         Ok(self.projected_statistics.clone())
     }
 
+    fn fetch(&self) -> Option<usize> {
+        self.base_config.limit
+    }
+
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
         let new_config = self.base_config.clone().with_limit(limit);
 
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 298d117252a1a..ce72c4087424e 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -165,6 +165,10 @@ impl ExecutionPlan for AvroExec {
         Some(self.metrics.clone_inner())
     }
 
+    fn fetch(&self) -> Option<usize> {
+        self.base_config.limit
+    }
+
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
         let new_config = self.base_config.clone().with_limit(limit);
 
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index e9f7e5797cb0b..5ab32ed36e539 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -427,6 +427,10 @@ impl ExecutionPlan for CsvExec {
         Some(self.metrics.clone_inner())
     }
 
+    fn fetch(&self) -> Option<usize> {
+        self.base_config.limit
+    }
+
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
         let new_config = self.base_config.clone().with_limit(limit);
 
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index b3f4c995ac81a..cf8f129a50369 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -207,6 +207,10 @@ impl ExecutionPlan for NdJsonExec {
         Some(self.metrics.clone_inner())
     }
 
+    fn fetch(&self) -> Option<usize> {
+        self.base_config.limit
+    }
+
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
         let new_config = self.base_config.clone().with_limit(limit);
 
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index cb026522cfa8d..85d6f8db23736 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -745,6 +745,10 @@ impl ExecutionPlan for ParquetExec {
         Ok(self.projected_statistics.clone())
     }
 
+    fn fetch(&self) -> Option<usize> {
+        self.base_config.limit
+    }
+
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
         let new_config = self.base_config.clone().with_limit(limit);
 
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index 76df99b82c538..bda6d598b6ff3 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -62,7 +62,7 @@ use crate::physical_plan::{Distribution, ExecutionPlan, InputOrderMode};
 use datafusion_common::plan_err;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_physical_expr::{Partitioning, PhysicalSortExpr, PhysicalSortRequirement};
-use datafusion_physical_plan::limit::LocalLimitExec;
+use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::partial_sort::PartialSortExec;
 use datafusion_physical_plan::ExecutionPlanProperties;
@@ -405,7 +405,16 @@ fn analyze_immediate_sort_removal(
                 node.children = node.children.swap_remove(0).children;
                 if let Some(fetch) = sort_exec.fetch() {
                     // If the sort has a fetch, we need to add a limit:
-                    Arc::new(LocalLimitExec::new(sort_input.clone(), fetch))
+                    if sort_exec
+                        .properties()
+                        .output_partitioning()
+                        .partition_count()
+                        == 1
+                    {
+                        Arc::new(GlobalLimitExec::new(sort_input.clone(), 0, Some(fetch)))
+                    } else {
+                        Arc::new(LocalLimitExec::new(sort_input.clone(), fetch))
+                    }
                 } else {
                     sort_input.clone()
                 }
@@ -1124,7 +1133,7 @@ mod tests {
             "    MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         let expected_optimized = [
-            "LocalLimitExec: fetch=2",
+            "GlobalLimitExec: skip=0, fetch=2",
             "  SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
             "    MemoryExec: partitions=1, partition_sizes=[0]",
         ];
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 9cc2f253f8dad..41ab4ccc98ff7 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -2177,9 +2177,6 @@ mod tests {
         assert!(format!("{plan:?}").contains("GlobalLimitExec"));
         assert!(format!("{plan:?}").contains("skip: 3, fetch: Some(5)"));
 
-        // LocalLimitExec adjusts the `fetch`
-        assert!(format!("{plan:?}").contains("LocalLimitExec"));
-        assert!(format!("{plan:?}").contains("fetch: 8"));
         Ok(())
     }
 
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index e6a51eae13372..dbd5592e80205 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -238,18 +238,15 @@ async fn sort_preserving_merge() {
             // SortPreservingMergeExec (not a Sort which would compete
             // with the SortPreservingMergeExec for memory)
             &[
-                "+---------------+---------------------------------------------------------------------------------------------------------------+",
-                "| plan_type     | plan                                                                                                          |",
-                "+---------------+---------------------------------------------------------------------------------------------------------------+",
-                "| logical_plan  | Limit: skip=0, fetch=10                                                                                       |",
-                "|               |   Sort: t.a ASC NULLS LAST, t.b ASC NULLS LAST, fetch=10                                                      |",
-                "|               |     TableScan: t projection=[a, b]                                                                            |",
-                "| physical_plan | GlobalLimitExec: skip=0, fetch=10                                                                             |",
-                "|               |   SortPreservingMergeExec: [a@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10                                  |",
-                "|               |     LocalLimitExec: fetch=10                                                                                  |",
-                "|               |       MemoryExec: partitions=2, partition_sizes=[5, 5], output_ordering=a@0 ASC NULLS LAST,b@1 ASC NULLS LAST |",
-                "|               |                                                                                                               |",
-                "+---------------+---------------------------------------------------------------------------------------------------------------+",
+                "+---------------+-----------------------------------------------------------------------------------------------------------+",
+                "| plan_type     | plan                                                                                                      |",
+                "+---------------+-----------------------------------------------------------------------------------------------------------+",
+                "| logical_plan  | Sort: t.a ASC NULLS LAST, t.b ASC NULLS LAST, fetch=10                                                    |",
+                "|               |   TableScan: t projection=[a, b]                                                                          |",
+                "| physical_plan | SortPreservingMergeExec: [a@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10                                |",
+                "|               |   MemoryExec: partitions=2, partition_sizes=[5, 5], output_ordering=a@0 ASC NULLS LAST,b@1 ASC NULLS LAST |",
+                "|               |                                                                                                           |",
+                "+---------------+-----------------------------------------------------------------------------------------------------------+",
             ]
         )
         .run()
diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
index 8f3a47c95e9d2..b051feb5750ef 100644
--- a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
@@ -15,14 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_schema::{DataType, Field, Schema, SchemaRef};
-use datafusion::physical_optimizer::limit_pushdown::LimitPushdown;
+use arrow_schema::{DataType, Field, Schema, SchemaRef, SortOptions};
 use datafusion_common::config::ConfigOptions;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::Operator;
 use datafusion_physical_expr::expressions::BinaryExpr;
 use datafusion_physical_expr::expressions::{col, lit};
 use datafusion_physical_expr::Partitioning;
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+use datafusion_physical_optimizer::limit_pushdown::LimitPushdown;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
@@ -31,8 +32,10 @@ use datafusion_physical_plan::filter::FilterExec;
 use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion_physical_plan::projection::ProjectionExec;
 use datafusion_physical_plan::repartition::RepartitionExec;
+use datafusion_physical_plan::sorts::sort::SortExec;
+use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
-use datafusion_physical_plan::{get_plan_string, ExecutionPlan};
+use datafusion_physical_plan::{get_plan_string, ExecutionPlan, ExecutionPlanProperties};
 use std::sync::Arc;
 
 struct DummyStreamPartition {
@@ -201,6 +204,52 @@ fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batc
     Ok(())
 }
 
+#[test]
+fn pushes_global_limit_into_multiple_fetch_plans() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone()).unwrap();
+    let coalesce_batches = coalesce_batches_exec(streaming_table);
+    let projection = projection_exec(schema.clone(), coalesce_batches)?;
+    let repartition = repartition_exec(projection)?;
+    let sort = sort_exec(
+        vec![PhysicalSortExpr {
+            expr: col("c1", &schema)?,
+            options: SortOptions::default(),
+        }],
+        repartition,
+    );
+    let spm = sort_preserving_merge_exec(sort.output_ordering().unwrap().to_vec(), sort);
+    let global_limit = global_limit_exec(spm, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  SortPreservingMergeExec: [c1@0 ASC]",
+        "    SortExec: expr=[c1@0 ASC], preserve_partitioning=[false]",
+        "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "        ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "          CoalesceBatchesExec: target_batch_size=8192",
+        "            StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "SortPreservingMergeExec: [c1@0 ASC], fetch=5",
+        "  SortExec: TopK(fetch=5), expr=[c1@0 ASC], preserve_partitioning=[false]",
+        "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "      ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "        CoalesceBatchesExec: target_batch_size=8192",
+        "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
 #[test]
 fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions(
 ) -> datafusion_common::Result<()> {
@@ -227,10 +276,9 @@ fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions(
     let expected = [
         "GlobalLimitExec: skip=0, fetch=5",
         "  CoalescePartitionsExec",
-        "    LocalLimitExec: fetch=5",
-        "      FilterExec: c3@2 > 0",
-        "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-        "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+        "    FilterExec: c3@2 > 0",
+        "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "        StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
     ];
     assert_eq!(get_plan_string(&after_optimize), expected);
 
@@ -256,7 +304,7 @@ fn merges_local_limit_with_local_limit() -> datafusion_common::Result<()> {
     let after_optimize =
         LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?;
 
-    let expected = ["LocalLimitExec: fetch=10", "  EmptyExec"];
+    let expected = ["GlobalLimitExec: skip=0, fetch=10", "  EmptyExec"];
     assert_eq!(get_plan_string(&after_optimize), expected);
 
     Ok(())
@@ -375,6 +423,22 @@ fn local_limit_exec(
     Arc::new(LocalLimitExec::new(input, fetch))
 }
 
+fn sort_exec(
+    sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
+    input: Arc<dyn ExecutionPlan>,
+) -> Arc<dyn ExecutionPlan> {
+    let sort_exprs = sort_exprs.into_iter().collect();
+    Arc::new(SortExec::new(sort_exprs, input))
+}
+
+fn sort_preserving_merge_exec(
+    sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
+    input: Arc<dyn ExecutionPlan>,
+) -> Arc<dyn ExecutionPlan> {
+    let sort_exprs = sort_exprs.into_iter().collect();
+    Arc::new(SortPreservingMergeExec::new(sort_exprs, input))
+}
+
 fn projection_exec(
     schema: SchemaRef,
     input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index 4c1f5efaf9899..39fd492786bc7 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -72,11 +72,6 @@ async fn explain_analyze_baseline_metrics() {
     assert_metrics!(
         &formatted,
         "GlobalLimitExec: skip=0, fetch=3, ",
-        "metrics=[output_rows=1, elapsed_compute="
-    );
-    assert_metrics!(
-        &formatted,
-        "LocalLimitExec: fetch=3",
         "metrics=[output_rows=3, elapsed_compute="
     );
     assert_metrics!(
@@ -612,18 +607,17 @@ async fn test_physical_plan_display_indent() {
     let dataframe = ctx.sql(sql).await.unwrap();
     let physical_plan = dataframe.create_physical_plan().await.unwrap();
     let expected = vec![
-        "GlobalLimitExec: skip=0, fetch=10",
-        "  SortPreservingMergeExec: [the_min@2 DESC], fetch=10",
-        "    SortExec: TopK(fetch=10), expr=[the_min@2 DESC], preserve_partitioning=[true]",
-        "      ProjectionExec: expr=[c1@0 as c1, max(aggregate_test_100.c12)@1 as max(aggregate_test_100.c12), min(aggregate_test_100.c12)@2 as the_min]",
-        "        AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]",
-        "          CoalesceBatchesExec: target_batch_size=4096",
-        "            RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000",
-        "              AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]",
-        "                CoalesceBatchesExec: target_batch_size=4096",
-        "                  FilterExec: c12@1 < 10",
-        "                    RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1",
-        "                      CsvExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1, c12], has_header=true",
+        "SortPreservingMergeExec: [the_min@2 DESC], fetch=10",
+        "  SortExec: TopK(fetch=10), expr=[the_min@2 DESC], preserve_partitioning=[true]",
+        "    ProjectionExec: expr=[c1@0 as c1, max(aggregate_test_100.c12)@1 as max(aggregate_test_100.c12), min(aggregate_test_100.c12)@2 as the_min]",
+        "      AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]",
+        "        CoalesceBatchesExec: target_batch_size=4096",
+        "          RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000",
+        "            AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]",
+        "              CoalesceBatchesExec: target_batch_size=4096",
+        "                FilterExec: c12@1 < 10",
+        "                  RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1",
+        "                    CsvExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1, c12], has_header=true",
     ];
 
     let normalizer = ExplainNormalizer::new();
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index 4d8f1dbdb9558..290b893577b82 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -129,7 +129,11 @@ impl OptimizerRule for PushDownLimit {
                     Some(sort.fetch.map(|f| f.min(sort_fetch)).unwrap_or(sort_fetch))
                 };
                 if new_fetch == sort.fetch {
-                    original_limit(skip, fetch, LogicalPlan::Sort(sort))
+                    if skip > 0 {
+                        original_limit(skip, fetch, LogicalPlan::Sort(sort))
+                    } else {
+                        Ok(Transformed::yes(LogicalPlan::Sort(sort)))
+                    }
                 } else {
                     sort.fetch = new_fetch;
                     limit.input = Arc::new(LogicalPlan::Sort(sort));
diff --git a/datafusion/physical-optimizer/src/limit_pushdown.rs b/datafusion/physical-optimizer/src/limit_pushdown.rs
index 2b787980585a5..7f45292f9e27c 100644
--- a/datafusion/physical-optimizer/src/limit_pushdown.rs
+++ b/datafusion/physical-optimizer/src/limit_pushdown.rs
@@ -23,20 +23,35 @@ use std::sync::Arc;
 
 use crate::PhysicalOptimizerRule;
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::plan_datafusion_err;
-use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+use datafusion_common::error::Result;
+use datafusion_common::tree_node::{Transformed, TreeNodeRecursion};
 use datafusion_common::utils::combine_limit;
-use datafusion_common::Result;
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-use datafusion_physical_plan::ExecutionPlan;
+use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
 /// This rule inspects [`ExecutionPlan`]'s and pushes down the fetch limit from
 /// the parent to the child if applicable.
 #[derive(Default)]
 pub struct LimitPushdown {}
 
+/// This is a "data class" we use within the [`LimitPushdown`] rule to push
+/// down [`LimitExec`] in the plan. GlobalRequirements are hold as a rule-wide state
+/// and holds the fetch and skip information. The struct also has a field named
+/// satisfied which means if the "current" plan is valid in terms of limits or not.
+///
+/// For example: If the plan is satisfied with current fetch info, we decide to not add a LocalLimit
+///
+/// [`LimitPushdown`]: crate::limit_pushdown::LimitPushdown
+/// [`LimitExec`]: crate::limit_pushdown::LimitExec
+#[derive(Default, Clone, Debug)]
+pub struct GlobalRequirements {
+    fetch: Option<usize>,
+    skip: usize,
+    satisfied: bool,
+}
+
 impl LimitPushdown {
     #[allow(missing_docs)]
     pub fn new() -> Self {
@@ -50,7 +65,12 @@ impl PhysicalOptimizerRule for LimitPushdown {
         plan: Arc<dyn ExecutionPlan>,
         _config: &ConfigOptions,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        plan.transform_down(push_down_limits).data()
+        let global_state = GlobalRequirements {
+            fetch: None,
+            skip: 0,
+            satisfied: false,
+        };
+        pushdown_limits(plan, global_state)
     }
 
     fn name(&self) -> &str {
@@ -65,7 +85,7 @@ impl PhysicalOptimizerRule for LimitPushdown {
 /// This enumeration makes `skip` and `fetch` calculations easier by providing
 /// a single API for both local and global limit operators.
 #[derive(Debug)]
-enum LimitExec {
+pub enum LimitExec {
     Global(GlobalLimitExec),
     Local(LocalLimitExec),
 }
@@ -91,15 +111,6 @@ impl LimitExec {
             Self::Local(_) => 0,
         }
     }
-
-    fn with_child(&self, child: Arc<dyn ExecutionPlan>) -> Self {
-        match self {
-            Self::Global(global) => {
-                Self::Global(GlobalLimitExec::new(child, global.skip(), global.fetch()))
-            }
-            Self::Local(local) => Self::Local(LocalLimitExec::new(child, local.fetch())),
-        }
-    }
 }
 
 impl From<LimitExec> for Arc<dyn ExecutionPlan> {
@@ -111,26 +122,156 @@ impl From<LimitExec> for Arc<dyn ExecutionPlan> {
     }
 }
 
-/// Pushes down the limit through the plan.
-pub fn push_down_limits(
-    plan: Arc<dyn ExecutionPlan>,
-) -> Result<Transformed<Arc<dyn ExecutionPlan>>> {
-    let maybe_modified = if let Some(limit_exec) = extract_limit(&plan) {
-        let child = limit_exec.input();
-        if let Some(child_limit) = extract_limit(child) {
-            let merged = merge_limits(&limit_exec, &child_limit);
-            // Revisit current node in case of consecutive pushdowns
-            Some(push_down_limits(merged)?.data)
-        } else if child.supports_limit_pushdown() {
-            try_push_down_limit(&limit_exec, Arc::clone(child))?
+/// This function is the main helper function of the `LimitPushDown` rule.
+/// The helper takes an `ExecutionPlan` and a global (algorithm) state which is
+/// an instance of `GlobalRequirements` and modifies these parameters while
+/// checking if the limits can be pushed down or not.
+pub fn pushdown_limit_helper(
+    mut pushdown_plan: Arc<dyn ExecutionPlan>,
+    mut global_state: GlobalRequirements,
+) -> Result<(Transformed<Arc<dyn ExecutionPlan>>, GlobalRequirements)> {
+    if let Some(limit_exec) = extract_limit(&pushdown_plan) {
+        // If we have fetch/skip info in the global state already, we need to
+        // decide which one to continue with:
+        let (skip, fetch) = combine_limit(
+            global_state.skip,
+            global_state.fetch,
+            limit_exec.skip(),
+            limit_exec.fetch(),
+        );
+        global_state.skip = skip;
+        global_state.fetch = fetch;
+
+        // Now the global state has the most recent information, we can remove
+        // the `LimitExec` plan. We will decide later if we should add it again
+        // or not.
+        return Ok((
+            Transformed {
+                data: Arc::clone(limit_exec.input()),
+                transformed: true,
+                tnr: TreeNodeRecursion::Stop,
+            },
+            global_state,
+        ));
+    }
+
+    // If we have a non-limit operator with fetch capability, update global
+    // state as necessary:
+    if pushdown_plan.fetch().is_some() {
+        if global_state.fetch.is_none() {
+            global_state.satisfied = true;
+        }
+        (global_state.skip, global_state.fetch) = combine_limit(
+            global_state.skip,
+            global_state.fetch,
+            0,
+            pushdown_plan.fetch(),
+        );
+    }
+
+    let Some(global_fetch) = global_state.fetch else {
+        // There's no valid fetch information, exit early:
+        return if global_state.skip > 0 && !global_state.satisfied {
+            // There might be a case with only offset, if so add a global limit:
+            global_state.satisfied = true;
+            Ok((
+                Transformed::yes(add_global_limit(
+                    pushdown_plan,
+                    global_state.skip,
+                    None,
+                )),
+                global_state,
+            ))
         } else {
-            add_fetch_to_child(&limit_exec, Arc::clone(child))
+            // There's no info on offset or fetch, nothing to do:
+            Ok((Transformed::no(pushdown_plan), global_state))
+        };
+    };
+
+    let skip_and_fetch = Some(global_fetch + global_state.skip);
+
+    if pushdown_plan.supports_limit_pushdown() {
+        if !combines_input_partitions(&pushdown_plan) {
+            // We have information in the global state and the plan pushes down,
+            // continue:
+            Ok((Transformed::no(pushdown_plan), global_state))
+        } else if let Some(plan_with_fetch) = pushdown_plan.with_fetch(skip_and_fetch) {
+            // This plan is combining input partitions, so we need to add the
+            // fetch info to plan if possible. If not, we must add a `LimitExec`
+            // with the information from the global state.
+            global_state.fetch = skip_and_fetch;
+            global_state.skip = 0;
+            global_state.satisfied = true;
+            Ok((Transformed::yes(plan_with_fetch), global_state))
+        } else if global_state.satisfied {
+            // If the plan is already satisfied, do not add a limit:
+            Ok((Transformed::no(pushdown_plan), global_state))
+        } else {
+            global_state.satisfied = true;
+            Ok((
+                Transformed::yes(add_limit(
+                    pushdown_plan,
+                    global_state.skip,
+                    global_fetch,
+                )),
+                global_state,
+            ))
         }
     } else {
-        None
-    };
+        // The plan does not support push down and it is not a limit. We will need
+        // to add a limit or a fetch. If the plan is already satisfied, we will try
+        // to add the fetch info and return the plan.
 
-    Ok(maybe_modified.map_or(Transformed::no(plan), Transformed::yes))
+        // There's no push down, change fetch & skip to default values:
+        let global_skip = global_state.skip;
+        global_state.fetch = None;
+        global_state.skip = 0;
+
+        let maybe_fetchable = pushdown_plan.with_fetch(skip_and_fetch);
+        if global_state.satisfied {
+            if let Some(plan_with_fetch) = maybe_fetchable {
+                Ok((Transformed::yes(plan_with_fetch), global_state))
+            } else {
+                Ok((Transformed::no(pushdown_plan), global_state))
+            }
+        } else {
+            // Add fetch or a `LimitExec`:
+            global_state.satisfied = true;
+            pushdown_plan = if let Some(plan_with_fetch) = maybe_fetchable {
+                if global_skip > 0 {
+                    add_global_limit(plan_with_fetch, global_skip, Some(global_fetch))
+                } else {
+                    plan_with_fetch
+                }
+            } else {
+                add_limit(pushdown_plan, global_skip, global_fetch)
+            };
+            Ok((Transformed::yes(pushdown_plan), global_state))
+        }
+    }
+}
+
+/// Pushes down the limit through the plan.
+pub(crate) fn pushdown_limits(
+    pushdown_plan: Arc<dyn ExecutionPlan>,
+    global_state: GlobalRequirements,
+) -> Result<Arc<dyn ExecutionPlan>> {
+    let (mut new_node, mut global_state) =
+        pushdown_limit_helper(pushdown_plan, global_state)?;
+
+    while new_node.tnr == TreeNodeRecursion::Stop {
+        (new_node, global_state) = pushdown_limit_helper(new_node.data, global_state)?;
+    }
+
+    let children = new_node.data.children();
+    let new_children = children
+        .into_iter()
+        .map(|child| {
+            pushdown_limits(Arc::<dyn ExecutionPlan>::clone(child), global_state.clone())
+        })
+        .collect::<Result<_>>()?;
+
+    new_node.data.with_new_children(new_children)
 }
 
 /// Transforms the [`ExecutionPlan`] into a [`LimitExec`] if it is a
@@ -154,100 +295,33 @@ fn extract_limit(plan: &Arc<dyn ExecutionPlan>) -> Option<LimitExec> {
     }
 }
 
-/// Merge the limits of the parent and the child. If at least one of them is a
-/// [`GlobalLimitExec`], the result is also a [`GlobalLimitExec`]. Otherwise,
-/// the result is a [`LocalLimitExec`].
-fn merge_limits(
-    parent_limit_exec: &LimitExec,
-    child_limit_exec: &LimitExec,
-) -> Arc<dyn ExecutionPlan> {
-    // We can use the logic in `combine_limit` from the logical optimizer:
-    let (skip, fetch) = combine_limit(
-        parent_limit_exec.skip(),
-        parent_limit_exec.fetch(),
-        child_limit_exec.skip(),
-        child_limit_exec.fetch(),
-    );
-    match (parent_limit_exec, child_limit_exec) {
-        (LimitExec::Local(_), LimitExec::Local(_)) => {
-            // The fetch is present in this case, can unwrap.
-            Arc::new(LocalLimitExec::new(
-                Arc::clone(child_limit_exec.input()),
-                fetch.unwrap(),
-            ))
-        }
-        _ => Arc::new(GlobalLimitExec::new(
-            Arc::clone(child_limit_exec.input()),
-            skip,
-            fetch,
-        )),
-    }
+/// Checks if the given plan combines input partitions.
+fn combines_input_partitions(plan: &Arc<dyn ExecutionPlan>) -> bool {
+    let plan = plan.as_any();
+    plan.is::<CoalescePartitionsExec>() || plan.is::<SortPreservingMergeExec>()
 }
 
-/// Pushes down the limit through the child. If the child has a single input
-/// partition, simply swaps the parent and the child. Otherwise, adds a
-/// [`LocalLimitExec`] after in between in addition to swapping, because of
-/// multiple input partitions.
-fn try_push_down_limit(
-    limit_exec: &LimitExec,
-    child: Arc<dyn ExecutionPlan>,
-) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-    let grandchildren = child.children();
-    if let Some(&grandchild) = grandchildren.first() {
-        // GlobalLimitExec and LocalLimitExec must have an input after pushdown
-        if combines_input_partitions(&child) {
-            // We still need a LocalLimitExec after the child
-            if let Some(fetch) = limit_exec.fetch() {
-                let new_local_limit = Arc::new(LocalLimitExec::new(
-                    Arc::clone(grandchild),
-                    fetch + limit_exec.skip(),
-                ));
-                let new_child =
-                    Arc::clone(&child).with_new_children(vec![new_local_limit])?;
-                Ok(Some(limit_exec.with_child(new_child).into()))
-            } else {
-                Ok(None)
-            }
-        } else {
-            // Swap current with child
-            let new_limit = limit_exec.with_child(Arc::clone(grandchild));
-            let new_child = child.with_new_children(vec![new_limit.into()])?;
-            Ok(Some(new_child))
-        }
+/// Adds a limit to the plan, chooses between global and local limits based on
+/// skip value and the number of partitions.
+fn add_limit(
+    pushdown_plan: Arc<dyn ExecutionPlan>,
+    skip: usize,
+    fetch: usize,
+) -> Arc<dyn ExecutionPlan> {
+    if skip > 0 || pushdown_plan.output_partitioning().partition_count() == 1 {
+        add_global_limit(pushdown_plan, skip, Some(fetch))
     } else {
-        // Operators supporting limit push down must have a child.
-        Err(plan_datafusion_err!(
-            "{:#?} must have a child to push down limit",
-            child
-        ))
+        Arc::new(LocalLimitExec::new(pushdown_plan, fetch + skip)) as _
     }
 }
 
-fn combines_input_partitions(exec: &Arc<dyn ExecutionPlan>) -> bool {
-    let exec = exec.as_any();
-    exec.is::<CoalescePartitionsExec>() || exec.is::<SortPreservingMergeExec>()
-}
-
-/// Transforms child to the fetching version if supported. Removes the parent if
-/// skip is zero. Otherwise, keeps the parent.
-fn add_fetch_to_child(
-    limit_exec: &LimitExec,
-    child: Arc<dyn ExecutionPlan>,
-) -> Option<Arc<dyn ExecutionPlan>> {
-    let fetch = limit_exec.fetch();
-    let skip = limit_exec.skip();
-
-    let child_fetch = fetch.map(|f| f + skip);
-
-    if let Some(child_with_fetch) = child.with_fetch(child_fetch) {
-        if skip > 0 {
-            Some(limit_exec.with_child(child_with_fetch).into())
-        } else {
-            Some(child_with_fetch)
-        }
-    } else {
-        None
-    }
+/// Adds a global limit to the plan.
+fn add_global_limit(
+    pushdown_plan: Arc<dyn ExecutionPlan>,
+    skip: usize,
+    fetch: Option<usize>,
+) -> Arc<dyn ExecutionPlan> {
+    Arc::new(GlobalLimitExec::new(pushdown_plan, skip, fetch)) as _
 }
 
 // See tests in datafusion/core/tests/physical_optimizer
diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs
index fe6b744935fb3..70a63e71ad2f2 100644
--- a/datafusion/physical-plan/src/sorts/partial_sort.rs
+++ b/datafusion/physical-plan/src/sorts/partial_sort.rs
@@ -238,6 +238,10 @@ impl ExecutionPlan for PartialSortExec {
         &self.cache
     }
 
+    fn fetch(&self) -> Option<usize> {
+        self.fetch
+    }
+
     fn required_input_distribution(&self) -> Vec<Distribution> {
         if self.preserve_partitioning {
             vec![Distribution::UnspecifiedDistribution]
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 0fedfb6296e75..7ba1d77aea4e6 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -163,6 +163,21 @@ impl ExecutionPlan for SortPreservingMergeExec {
         &self.cache
     }
 
+    fn fetch(&self) -> Option<usize> {
+        self.fetch
+    }
+
+    /// Sets the number of rows to fetch
+    fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
+        Some(Arc::new(Self {
+            input: Arc::clone(&self.input),
+            expr: self.expr.clone(),
+            metrics: self.metrics.clone(),
+            fetch: limit,
+            cache: self.cache.clone(),
+        }))
+    }
+
     fn required_input_distribution(&self) -> Vec<Distribution> {
         vec![Distribution::UnspecifiedDistribution]
     }
diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs
index f3cca4bfbe174..9dc8b214420b8 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -217,6 +217,10 @@ impl ExecutionPlan for StreamingTableExec {
         &self.cache
     }
 
+    fn fetch(&self) -> Option<usize> {
+        self.limit
+    }
+
     fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
         vec![]
     }
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 9ef29c833dccb..78b25686054d8 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -260,6 +260,10 @@ impl ExecutionPlan for UnionExec {
     fn benefits_from_input_partitioning(&self) -> Vec<bool> {
         vec![false; self.children().len()]
     }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        true
+    }
 }
 
 /// Combines multiple input streams by interleaving them.
diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs
index 50027a0efd845..c5fe180c23025 100644
--- a/datafusion/sql/src/relation/mod.rs
+++ b/datafusion/sql/src/relation/mod.rs
@@ -16,6 +16,8 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
+
+use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{not_impl_err, plan_err, DFSchema, Result, TableReference};
 use datafusion_expr::{expr::Unnest, Expr, LogicalPlan, LogicalPlanBuilder};
 use sqlparser::ast::{FunctionArg, FunctionArgExpr, TableFactor};
@@ -143,10 +145,39 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 );
             }
         };
+
+        let optimized_plan = optimize_subquery_sort(plan)?.data;
         if let Some(alias) = alias {
-            self.apply_table_alias(plan, alias)
+            self.apply_table_alias(optimized_plan, alias)
         } else {
-            Ok(plan)
+            Ok(optimized_plan)
         }
     }
 }
+
+fn optimize_subquery_sort(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
+    // When initializing subqueries, we examine sort options since they might be unnecessary.
+    // They are only important if the subquery result is affected by the ORDER BY statement,
+    // which can happen when we have:
+    // 1. DISTINCT ON / ARRAY_AGG ... => Handled by an `Aggregate` and its requirements.
+    // 2. RANK / ROW_NUMBER ... => Handled by a `WindowAggr` and its requirements.
+    // 3. LIMIT => Handled by a `Sort`, so we need to search for it.
+    let mut has_limit = false;
+    let new_plan = plan.clone().transform_down(|c| {
+        if let LogicalPlan::Limit(_) = c {
+            has_limit = true;
+            return Ok(Transformed::no(c));
+        }
+        match c {
+            LogicalPlan::Sort(s) => {
+                if !has_limit {
+                    has_limit = false;
+                    return Ok(Transformed::yes(s.input.as_ref().clone()));
+                }
+                Ok(Transformed::no(LogicalPlan::Sort(s)))
+            }
+            _ => Ok(Transformed::no(c)),
+        }
+    });
+    new_plan
+}
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 0cda24d6ff5e4..ce4d16bb69383 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -4625,17 +4625,16 @@ logical_plan
 physical_plan
 01)GlobalLimitExec: skip=0, fetch=5
 02)--CoalescePartitionsExec
-03)----LocalLimitExec: fetch=5
-04)------AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3, min(aggregate_test_100.c1)@1 as min(aggregate_test_100.c1)], aggr=[], lim=[5]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([c3@0, min(aggregate_test_100.c1)@1], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[c3@0 as c3, min(aggregate_test_100.c1)@1 as min(aggregate_test_100.c1)], aggr=[], lim=[5]
-08)--------------AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3], aggr=[min(aggregate_test_100.c1)]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------RepartitionExec: partitioning=Hash([c3@0], 4), input_partitions=4
-11)--------------------AggregateExec: mode=Partial, gby=[c3@1 as c3], aggr=[min(aggregate_test_100.c1)]
-12)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-13)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true
+03)----AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3, min(aggregate_test_100.c1)@1 as min(aggregate_test_100.c1)], aggr=[], lim=[5]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([c3@0, min(aggregate_test_100.c1)@1], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[c3@0 as c3, min(aggregate_test_100.c1)@1 as min(aggregate_test_100.c1)], aggr=[], lim=[5]
+07)------------AggregateExec: mode=FinalPartitioned, gby=[c3@0 as c3], aggr=[min(aggregate_test_100.c1)]
+08)--------------CoalesceBatchesExec: target_batch_size=8192
+09)----------------RepartitionExec: partitioning=Hash([c3@0], 4), input_partitions=4
+10)------------------AggregateExec: mode=Partial, gby=[c3@1 as c3], aggr=[min(aggregate_test_100.c1)]
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+12)----------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true
 
 
 #
diff --git a/datafusion/sqllogictest/test_files/aggregates_topk.slt b/datafusion/sqllogictest/test_files/aggregates_topk.slt
index 8e67f501dbd76..2209edc5d1fc4 100644
--- a/datafusion/sqllogictest/test_files/aggregates_topk.slt
+++ b/datafusion/sqllogictest/test_files/aggregates_topk.slt
@@ -40,20 +40,18 @@ query TT
 explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
 ----
 logical_plan
-01)Limit: skip=0, fetch=4
-02)--Sort: max(traces.timestamp) DESC NULLS FIRST, fetch=4
-03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
-04)------TableScan: traces projection=[trace_id, timestamp]
+01)Sort: max(traces.timestamp) DESC NULLS FIRST, fetch=4
+02)--Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
+03)----TableScan: traces projection=[trace_id, timestamp]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [max(traces.timestamp)@1 DESC], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 DESC], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+01)SortPreservingMergeExec: [max(traces.timestamp)@1 DESC], fetch=4
+02)--SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 DESC], preserve_partitioning=[true]
+03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 
 query TI
@@ -95,77 +93,69 @@ query TT
 explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
 ----
 logical_plan
-01)Limit: skip=0, fetch=4
-02)--Sort: max(traces.timestamp) DESC NULLS FIRST, fetch=4
-03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
-04)------TableScan: traces projection=[trace_id, timestamp]
+01)Sort: max(traces.timestamp) DESC NULLS FIRST, fetch=4
+02)--Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
+03)----TableScan: traces projection=[trace_id, timestamp]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [max(traces.timestamp)@1 DESC], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 DESC], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
-09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+01)SortPreservingMergeExec: [max(traces.timestamp)@1 DESC], fetch=4
+02)--SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 DESC], preserve_partitioning=[true]
+03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) desc limit 4;
 ----
 logical_plan
-01)Limit: skip=0, fetch=4
-02)--Sort: min(traces.timestamp) DESC NULLS FIRST, fetch=4
-03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[min(traces.timestamp)]]
-04)------TableScan: traces projection=[trace_id, timestamp]
+01)Sort: min(traces.timestamp) DESC NULLS FIRST, fetch=4
+02)--Aggregate: groupBy=[[traces.trace_id]], aggr=[[min(traces.timestamp)]]
+03)----TableScan: traces projection=[trace_id, timestamp]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [min(traces.timestamp)@1 DESC], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[min(traces.timestamp)@1 DESC], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
-09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+01)SortPreservingMergeExec: [min(traces.timestamp)@1 DESC], fetch=4
+02)--SortExec: TopK(fetch=4), expr=[min(traces.timestamp)@1 DESC], preserve_partitioning=[true]
+03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) asc limit 4;
 ----
 logical_plan
-01)Limit: skip=0, fetch=4
-02)--Sort: max(traces.timestamp) ASC NULLS LAST, fetch=4
-03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
-04)------TableScan: traces projection=[trace_id, timestamp]
+01)Sort: max(traces.timestamp) ASC NULLS LAST, fetch=4
+02)--Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
+03)----TableScan: traces projection=[trace_id, timestamp]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [max(traces.timestamp)@1 ASC NULLS LAST], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 ASC NULLS LAST], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+01)SortPreservingMergeExec: [max(traces.timestamp)@1 ASC NULLS LAST], fetch=4
+02)--SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 ASC NULLS LAST], preserve_partitioning=[true]
+03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select trace_id, MAX(timestamp) from traces group by trace_id order by trace_id asc limit 4;
 ----
 logical_plan
-01)Limit: skip=0, fetch=4
-02)--Sort: traces.trace_id ASC NULLS LAST, fetch=4
-03)----Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
-04)------TableScan: traces projection=[trace_id, timestamp]
+01)Sort: traces.trace_id ASC NULLS LAST, fetch=4
+02)--Aggregate: groupBy=[[traces.trace_id]], aggr=[[max(traces.timestamp)]]
+03)----TableScan: traces projection=[trace_id, timestamp]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [trace_id@0 ASC NULLS LAST], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[trace_id@0 ASC NULLS LAST], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+01)SortPreservingMergeExec: [trace_id@0 ASC NULLS LAST], fetch=4
+02)--SortExec: TopK(fetch=4), expr=[trace_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TI
 select trace_id, max(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index 1e8850efadff9..94b915cebe14f 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -76,9 +76,8 @@ query TT
 explain SELECT c1 FROM aggregate_test_100_with_order order by c1 ASC limit 10
 ----
 logical_plan
-01)Limit: skip=0, fetch=10
-02)--Sort: aggregate_test_100_with_order.c1 ASC NULLS LAST, fetch=10
-03)----TableScan: aggregate_test_100_with_order projection=[c1]
+01)Sort: aggregate_test_100_with_order.c1 ASC NULLS LAST, fetch=10
+02)--TableScan: aggregate_test_100_with_order projection=[c1]
 physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_test_100_order_by_c1_asc.csv]]}, projection=[c1], limit=10, output_ordering=[c1@0 ASC NULLS LAST], has_header=true
 
 ## explain_physical_plan_only
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index efcc0e5b0f58f..73bfd9844609b 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2260,13 +2260,11 @@ ORDER BY a, b, d
 LIMIT 50;
 ----
 logical_plan
-01)Limit: skip=0, fetch=50
-02)--Sort: annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.d ASC NULLS LAST, fetch=50
-03)----TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d]
+01)Sort: annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.d ASC NULLS LAST, fetch=50
+02)--TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=50
-02)--PartialSortExec: TopK(fetch=50), expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST], common_prefix_length=[2]
-03)----StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
+01)PartialSortExec: TopK(fetch=50), expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST], common_prefix_length=[2]
+02)--StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 
 query TT
 EXPLAIN SELECT *
@@ -2524,21 +2522,23 @@ EXPLAIN SELECT s.country, ARRAY_AGG(s.amount ORDER BY s.amount DESC) AS amounts,
         SUM(s.amount) AS sum1
           FROM (SELECT *
             FROM sales_global
-            ORDER BY country) AS s
+            ORDER BY country
+            LIMIT 10) AS s
           GROUP BY s.country
 ----
 logical_plan
 01)Projection: s.country, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
 02)--Aggregate: groupBy=[[s.country]], aggr=[[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
 03)----SubqueryAlias: s
-04)------Sort: sales_global.country ASC NULLS LAST
+04)------Sort: sales_global.country ASC NULLS LAST, fetch=10
 05)--------TableScan: sales_global projection=[country, amount]
 physical_plan
 01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
 02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
-03)----SortExec: expr=[country@0 ASC NULLS LAST,amount@1 DESC], preserve_partitioning=[false]
+03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST,amount@1 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
+
 query T?R rowsort
 SELECT s.country, ARRAY_AGG(s.amount ORDER BY s.amount DESC) AS amounts,
   SUM(s.amount) AS sum1
@@ -2560,19 +2560,20 @@ EXPLAIN SELECT s.country, s.zip_code, ARRAY_AGG(s.amount ORDER BY s.amount DESC)
         SUM(s.amount) AS sum1
           FROM (SELECT *
             FROM sales_global
-            ORDER BY country) AS s
+            ORDER BY country
+            LIMIT 10) AS s
           GROUP BY s.country, s.zip_code
 ----
 logical_plan
 01)Projection: s.country, s.zip_code, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
 02)--Aggregate: groupBy=[[s.country, s.zip_code]], aggr=[[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
 03)----SubqueryAlias: s
-04)------Sort: sales_global.country ASC NULLS LAST
+04)------Sort: sales_global.country ASC NULLS LAST, fetch=10
 05)--------TableScan: sales_global projection=[zip_code, country, amount]
 physical_plan
 01)ProjectionExec: expr=[country@0 as country, zip_code@1 as zip_code, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@2 as amounts, sum(s.amount)@3 as sum1]
 02)--AggregateExec: mode=Single, gby=[country@1 as country, zip_code@0 as zip_code], aggr=[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=PartiallySorted([0])
-03)----SortExec: expr=[country@1 ASC NULLS LAST,amount@2 DESC], preserve_partitioning=[false]
+03)----SortExec: TopK(fetch=10), expr=[country@1 ASC NULLS LAST,amount@2 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TI?R rowsort
@@ -2596,19 +2597,20 @@ EXPLAIN SELECT s.country, ARRAY_AGG(s.amount ORDER BY s.country DESC) AS amounts
         SUM(s.amount) AS sum1
           FROM (SELECT *
             FROM sales_global
-            ORDER BY country) AS s
+            ORDER BY country
+            LIMIT 10) AS s
           GROUP BY s.country
 ----
 logical_plan
 01)Projection: s.country, array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
 02)--Aggregate: groupBy=[[s.country]], aggr=[[array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
 03)----SubqueryAlias: s
-04)------Sort: sales_global.country ASC NULLS LAST
+04)------Sort: sales_global.country ASC NULLS LAST, fetch=10
 05)--------TableScan: sales_global projection=[country, amount]
 physical_plan
 01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
 02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
-03)----SortExec: expr=[country@0 ASC NULLS LAST], preserve_partitioning=[false]
+03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
 query T?R rowsort
@@ -2631,21 +2633,23 @@ EXPLAIN SELECT s.country, ARRAY_AGG(s.amount ORDER BY s.country DESC, s.amount D
         SUM(s.amount) AS sum1
           FROM (SELECT *
             FROM sales_global
-            ORDER BY country) AS s
+            ORDER BY country
+            LIMIT 10) AS s
           GROUP BY s.country
 ----
 logical_plan
 01)Projection: s.country, array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST] AS amounts, sum(s.amount) AS sum1
 02)--Aggregate: groupBy=[[s.country]], aggr=[[array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST], sum(CAST(s.amount AS Float64))]]
 03)----SubqueryAlias: s
-04)------Sort: sales_global.country ASC NULLS LAST
+04)------Sort: sales_global.country ASC NULLS LAST, fetch=10
 05)--------TableScan: sales_global projection=[country, amount]
 physical_plan
 01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
 02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
-03)----SortExec: expr=[country@0 ASC NULLS LAST,amount@1 DESC], preserve_partitioning=[false]
+03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST,amount@1 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
+
 query T?R rowsort
 SELECT s.country, ARRAY_AGG(s.amount ORDER BY s.country DESC, s.amount DESC) AS amounts,
   SUM(s.amount) AS sum1
@@ -2798,8 +2802,7 @@ EXPLAIN SELECT country, FIRST_VALUE(amount ORDER BY ts DESC) as fv1,
 logical_plan
 01)Projection: sales_global.country, first_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST] AS fv1, last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST] AS lv1, sum(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST] AS sum1
 02)--Aggregate: groupBy=[[sales_global.country]], aggr=[[first_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST], sum(CAST(sales_global.amount AS Float64)) ORDER BY [sales_global.ts DESC NULLS FIRST]]]
-03)----Sort: sales_global.ts ASC NULLS LAST
-04)------TableScan: sales_global projection=[country, ts, amount]
+03)----TableScan: sales_global projection=[country, ts, amount]
 physical_plan
 01)ProjectionExec: expr=[country@0 as country, first_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]@1 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]@2 as lv1, sum(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]@3 as sum1]
 02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST], last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST], sum(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
@@ -2959,6 +2962,7 @@ physical_plan
 08)--------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
 09)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
+
 query TRR
 SELECT country, FIRST_VALUE(amount ORDER BY ts ASC) AS fv1,
   LAST_VALUE(amount ORDER BY ts DESC) AS fv2
@@ -4220,22 +4224,19 @@ EXPLAIN SELECT date_bin('15 minutes', ts) as time_chunks
   LIMIT 5;
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: time_chunks DESC NULLS FIRST, fetch=5
-03)----Projection: date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts) AS time_chunks
-04)------Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }"), unbounded_csv_with_timestamps.ts) AS date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)]], aggr=[[]]
-05)--------TableScan: unbounded_csv_with_timestamps projection=[ts]
+01)Sort: time_chunks DESC NULLS FIRST, fetch=5
+02)--Projection: date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts) AS time_chunks
+03)----Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }"), unbounded_csv_with_timestamps.ts) AS date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)]], aggr=[[]]
+04)------TableScan: unbounded_csv_with_timestamps projection=[ts]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortPreservingMergeExec: [time_chunks@0 DESC], fetch=5
-03)----ProjectionExec: expr=[date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 as time_chunks]
-04)------LocalLimitExec: fetch=5
-05)--------AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
-06)----------CoalesceBatchesExec: target_batch_size=2
-07)------------RepartitionExec: partitioning=Hash([date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0], 8), input_partitions=8, preserve_order=true, sort_exprs=date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 DESC
-08)--------------AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@0) as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------StreamingTableExec: partition_sizes=1, projection=[ts], infinite_source=true, output_ordering=[ts@0 DESC]
+01)SortPreservingMergeExec: [time_chunks@0 DESC], fetch=5
+02)--ProjectionExec: expr=[date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 as time_chunks]
+03)----AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
+04)------CoalesceBatchesExec: target_batch_size=2
+05)--------RepartitionExec: partitioning=Hash([date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0], 8), input_partitions=8, preserve_order=true, sort_exprs=date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)@0 DESC
+06)----------AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@0) as date_bin(Utf8("15 minutes"),unbounded_csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted
+07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+08)--------------StreamingTableExec: partition_sizes=1, projection=[ts], infinite_source=true, output_ordering=[ts@0 DESC]
 
 query P
 SELECT date_bin('15 minutes', ts) as time_chunks
@@ -4277,22 +4278,20 @@ EXPLAIN SELECT extract(month from ts) as months
   LIMIT 5;
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: months DESC NULLS FIRST, fetch=5
-03)----Projection: date_part(Utf8("MONTH"),csv_with_timestamps.ts) AS months
-04)------Aggregate: groupBy=[[date_part(Utf8("MONTH"), csv_with_timestamps.ts)]], aggr=[[]]
-05)--------TableScan: csv_with_timestamps projection=[ts]
+01)Sort: months DESC NULLS FIRST, fetch=5
+02)--Projection: date_part(Utf8("MONTH"),csv_with_timestamps.ts) AS months
+03)----Aggregate: groupBy=[[date_part(Utf8("MONTH"), csv_with_timestamps.ts)]], aggr=[[]]
+04)------TableScan: csv_with_timestamps projection=[ts]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortPreservingMergeExec: [months@0 DESC], fetch=5
-03)----SortExec: TopK(fetch=5), expr=[months@0 DESC], preserve_partitioning=[true]
-04)------ProjectionExec: expr=[date_part(Utf8("MONTH"),csv_with_timestamps.ts)@0 as months]
-05)--------AggregateExec: mode=FinalPartitioned, gby=[date_part(Utf8("MONTH"),csv_with_timestamps.ts)@0 as date_part(Utf8("MONTH"),csv_with_timestamps.ts)], aggr=[]
-06)----------CoalesceBatchesExec: target_batch_size=2
-07)------------RepartitionExec: partitioning=Hash([date_part(Utf8("MONTH"),csv_with_timestamps.ts)@0], 8), input_partitions=8
-08)--------------AggregateExec: mode=Partial, gby=[date_part(MONTH, ts@0) as date_part(Utf8("MONTH"),csv_with_timestamps.ts)], aggr=[]
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/timestamps.csv]]}, projection=[ts], output_ordering=[ts@0 DESC], has_header=false
+01)SortPreservingMergeExec: [months@0 DESC], fetch=5
+02)--SortExec: TopK(fetch=5), expr=[months@0 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[date_part(Utf8("MONTH"),csv_with_timestamps.ts)@0 as months]
+04)------AggregateExec: mode=FinalPartitioned, gby=[date_part(Utf8("MONTH"),csv_with_timestamps.ts)@0 as date_part(Utf8("MONTH"),csv_with_timestamps.ts)], aggr=[]
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------RepartitionExec: partitioning=Hash([date_part(Utf8("MONTH"),csv_with_timestamps.ts)@0], 8), input_partitions=8
+07)------------AggregateExec: mode=Partial, gby=[date_part(MONTH, ts@0) as date_part(Utf8("MONTH"),csv_with_timestamps.ts)], aggr=[]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/timestamps.csv]]}, projection=[ts], output_ordering=[ts@0 DESC], has_header=false
 
 query R
 SELECT extract(month from ts) as months
@@ -4325,17 +4324,14 @@ EXPLAIN SELECT name, date_bin('15 minutes', ts) as time_chunks
   LIMIT 5;
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: unbounded_csv_with_timestamps2.name DESC NULLS FIRST, time_chunks DESC NULLS FIRST, fetch=5
-03)----Projection: unbounded_csv_with_timestamps2.name, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }"), unbounded_csv_with_timestamps2.ts) AS time_chunks
-04)------TableScan: unbounded_csv_with_timestamps2 projection=[name, ts]
+01)Sort: unbounded_csv_with_timestamps2.name DESC NULLS FIRST, time_chunks DESC NULLS FIRST, fetch=5
+02)--Projection: unbounded_csv_with_timestamps2.name, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }"), unbounded_csv_with_timestamps2.ts) AS time_chunks
+03)----TableScan: unbounded_csv_with_timestamps2 projection=[name, ts]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortPreservingMergeExec: [name@0 DESC,time_chunks@1 DESC], fetch=5
-03)----ProjectionExec: expr=[name@0 as name, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@1) as time_chunks]
-04)------LocalLimitExec: fetch=5
-05)--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-06)----------StreamingTableExec: partition_sizes=1, projection=[name, ts], infinite_source=true, output_ordering=[name@0 DESC, ts@1 DESC]
+01)SortPreservingMergeExec: [name@0 DESC,time_chunks@1 DESC], fetch=5
+02)--ProjectionExec: expr=[name@0 as name, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@1) as time_chunks]
+03)----RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+04)------StreamingTableExec: partition_sizes=1, projection=[name, ts], infinite_source=true, output_ordering=[name@0 DESC, ts@1 DESC]
 
 statement ok
 drop table t1
@@ -4573,20 +4569,18 @@ ORDER BY MAX(t1) DESC
 LIMIT 4;
 ----
 logical_plan
-01)Limit: skip=0, fetch=4
-02)--Sort: max(timestamp_table.t1) DESC NULLS FIRST, fetch=4
-03)----Aggregate: groupBy=[[timestamp_table.c2]], aggr=[[max(timestamp_table.t1)]]
-04)------TableScan: timestamp_table projection=[t1, c2]
+01)Sort: max(timestamp_table.t1) DESC NULLS FIRST, fetch=4
+02)--Aggregate: groupBy=[[timestamp_table.c2]], aggr=[[max(timestamp_table.t1)]]
+03)----TableScan: timestamp_table projection=[t1, c2]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=4
-02)--SortPreservingMergeExec: [max(timestamp_table.t1)@1 DESC], fetch=4
-03)----SortExec: TopK(fetch=4), expr=[max(timestamp_table.t1)@1 DESC], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[c2@0 as c2], aggr=[max(timestamp_table.t1)], lim=[4]
-05)--------CoalesceBatchesExec: target_batch_size=2
-06)----------RepartitionExec: partitioning=Hash([c2@0], 8), input_partitions=8
-07)------------AggregateExec: mode=Partial, gby=[c2@1 as c2], aggr=[max(timestamp_table.t1)], lim=[4]
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=4
-09)----------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/0.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/2.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/3.csv]]}, projection=[t1, c2], has_header=true
+01)SortPreservingMergeExec: [max(timestamp_table.t1)@1 DESC], fetch=4
+02)--SortExec: TopK(fetch=4), expr=[max(timestamp_table.t1)@1 DESC], preserve_partitioning=[true]
+03)----AggregateExec: mode=FinalPartitioned, gby=[c2@0 as c2], aggr=[max(timestamp_table.t1)], lim=[4]
+04)------CoalesceBatchesExec: target_batch_size=2
+05)--------RepartitionExec: partitioning=Hash([c2@0], 8), input_partitions=8
+06)----------AggregateExec: mode=Partial, gby=[c2@1 as c2], aggr=[max(timestamp_table.t1)], lim=[4]
+07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=4
+08)--------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/0.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/2.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/3.csv]]}, projection=[t1, c2], has_header=true
 
 # Clean up
 statement ok
diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
index 97130201fca80..c56c59b1bd786 100644
--- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
+++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
@@ -46,22 +46,20 @@ EXPLAIN SELECT t2.a
  LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: t2.a ASC NULLS LAST, fetch=5
-03)----Projection: t2.a
-04)------Inner Join: t1.c = t2.c
-05)--------SubqueryAlias: t1
-06)----------TableScan: annotated_data projection=[c]
-07)--------SubqueryAlias: t2
-08)----------TableScan: annotated_data projection=[a, c]
+01)Sort: t2.a ASC NULLS LAST, fetch=5
+02)--Projection: t2.a
+03)----Inner Join: t1.c = t2.c
+04)------SubqueryAlias: t1
+05)--------TableScan: annotated_data projection=[c]
+06)------SubqueryAlias: t2
+07)--------TableScan: annotated_data projection=[a, c]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortPreservingMergeExec: [a@0 ASC NULLS LAST], fetch=5
-03)----CoalesceBatchesExec: target_batch_size=8192, fetch=5
-04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@0, c@1)], projection=[a@1]
-05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], has_header=true
-06)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_ordering=[a@0 ASC NULLS LAST], has_header=true
+01)SortPreservingMergeExec: [a@0 ASC NULLS LAST], fetch=5
+02)--CoalesceBatchesExec: target_batch_size=8192, fetch=5
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@0, c@1)], projection=[a@1]
+04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], has_header=true
+05)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 # preserve_inner_join
 query IIII nosort
@@ -87,26 +85,24 @@ EXPLAIN SELECT t2.a as a2, t2.b
 LIMIT 10
 ----
 logical_plan
-01)Limit: skip=0, fetch=10
-02)--Sort: a2 ASC NULLS LAST, t2.b ASC NULLS LAST, fetch=10
-03)----Projection: t2.a AS a2, t2.b
-04)------RightSemi Join: t1.d = t2.d, t1.c = t2.c
-05)--------SubqueryAlias: t1
-06)----------TableScan: annotated_data projection=[c, d]
-07)--------SubqueryAlias: t2
-08)----------Filter: annotated_data.d = Int32(3)
-09)------------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3)]
+01)Sort: a2 ASC NULLS LAST, t2.b ASC NULLS LAST, fetch=10
+02)--Projection: t2.a AS a2, t2.b
+03)----RightSemi Join: t1.d = t2.d, t1.c = t2.c
+04)------SubqueryAlias: t1
+05)--------TableScan: annotated_data projection=[c, d]
+06)------SubqueryAlias: t2
+07)--------Filter: annotated_data.d = Int32(3)
+08)----------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3)]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=10
-02)--SortPreservingMergeExec: [a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10
-03)----ProjectionExec: expr=[a@0 as a2, b@1 as b]
-04)------CoalesceBatchesExec: target_batch_size=8192, fetch=10
-05)--------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)], projection=[a@0, b@1]
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
-07)----------CoalesceBatchesExec: target_batch_size=8192
-08)------------FilterExec: d@3 = 3
-09)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
+01)SortPreservingMergeExec: [a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10
+02)--ProjectionExec: expr=[a@0 as a2, b@1 as b]
+03)----CoalesceBatchesExec: target_batch_size=8192, fetch=10
+04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)], projection=[a@0, b@1]
+05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
+06)--------CoalesceBatchesExec: target_batch_size=8192
+07)----------FilterExec: d@3 = 3
+08)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 # preserve_right_semi_join
 query II nosort
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index fa0d4cbc49f87..9f036c387033f 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3898,6 +3898,7 @@ SELECT * FROM (
 ) as lhs RIGHT JOIN (
     SELECT * from right_table_no_nulls
     ORDER BY b
+    LIMIT 10
 ) AS rhs ON lhs.b=rhs.b
 ----
 11 1 21 1
@@ -3911,6 +3912,7 @@ EXPLAIN SELECT * FROM (
 ) as lhs RIGHT JOIN (
     SELECT * from right_table_no_nulls
     ORDER BY b
+    LIMIT 10
 ) AS rhs ON lhs.b=rhs.b
 ----
 logical_plan
@@ -3918,16 +3920,17 @@ logical_plan
 02)--SubqueryAlias: lhs
 03)----TableScan: left_table_no_nulls projection=[a, b]
 04)--SubqueryAlias: rhs
-05)----Sort: right_table_no_nulls.b ASC NULLS LAST
+05)----Sort: right_table_no_nulls.b ASC NULLS LAST, fetch=10
 06)------TableScan: right_table_no_nulls projection=[a, b]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=3
 02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(b@1, b@1)]
 03)----MemoryExec: partitions=1, partition_sizes=[1]
-04)----SortExec: expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
+04)----SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)------MemoryExec: partitions=1, partition_sizes=[1]
 
 
+
 # Missing probe index in the middle of the batch:
 statement ok
 CREATE TABLE left_table_missing_probe(a INT UNSIGNED, b INT UNSIGNED)
@@ -3953,6 +3956,7 @@ SELECT * FROM (
 ) as lhs RIGHT JOIN (
     SELECT * from right_table_missing_probe
     ORDER BY b
+    LIMIT 10
 ) AS rhs ON lhs.b=rhs.b
 ----
 11 1 21 1
@@ -3974,14 +3978,12 @@ logical_plan
 02)--SubqueryAlias: lhs
 03)----TableScan: left_table_no_nulls projection=[a, b]
 04)--SubqueryAlias: rhs
-05)----Sort: right_table_no_nulls.b ASC NULLS LAST
-06)------TableScan: right_table_no_nulls projection=[a, b]
+05)----TableScan: right_table_no_nulls projection=[a, b]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=3
 02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(b@1, b@1)]
 03)----MemoryExec: partitions=1, partition_sizes=[1]
-04)----SortExec: expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
-05)------MemoryExec: partitions=1, partition_sizes=[1]
+04)----MemoryExec: partitions=1, partition_sizes=[1]
 
 
 # Null build indices:
@@ -4009,6 +4011,7 @@ SELECT * FROM (
 ) as lhs RIGHT JOIN (
     SELECT * from right_table_append_null_build
     ORDER BY b
+    LIMIT 10
 ) AS rhs ON lhs.b=rhs.b
 ----
 NULL NULL 21 4
@@ -4025,6 +4028,7 @@ EXPLAIN SELECT * FROM (
 ) as lhs RIGHT JOIN (
     SELECT * from right_table_no_nulls
     ORDER BY b
+    LIMIT 10
 ) AS rhs ON lhs.b=rhs.b
 ----
 logical_plan
@@ -4032,11 +4036,11 @@ logical_plan
 02)--SubqueryAlias: lhs
 03)----TableScan: left_table_no_nulls projection=[a, b]
 04)--SubqueryAlias: rhs
-05)----Sort: right_table_no_nulls.b ASC NULLS LAST
+05)----Sort: right_table_no_nulls.b ASC NULLS LAST, fetch=10
 06)------TableScan: right_table_no_nulls projection=[a, b]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=3
 02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(b@1, b@1)]
 03)----MemoryExec: partitions=1, partition_sizes=[1]
-04)----SortExec: expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
+04)----SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)------MemoryExec: partitions=1, partition_sizes=[1]
diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt
index 7341a9d43bac6..17bd398bd229e 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -550,5 +550,18 @@ logical_plan
 physical_plan StreamingTableExec: partition_sizes=1, projection=[column1, column2], infinite_source=true, fetch=3, output_ordering=[column1@0 ASC NULLS LAST, column2@1 ASC NULLS LAST]
 
 
+# Do not remove limit with Sort when skip is used
+query TT
+explain SELECT * FROM data ORDER BY column1 LIMIT 3,3;
+----
+logical_plan
+01)Limit: skip=3, fetch=3
+02)--Sort: data.column1 ASC NULLS LAST, fetch=6
+03)----TableScan: data projection=[column1, column2]
+physical_plan
+01)GlobalLimitExec: skip=3, fetch=3
+02)--StreamingTableExec: partition_sizes=1, projection=[column1, column2], infinite_source=true, fetch=6, output_ordering=[column1@0 ASC NULLS LAST, column2@1 ASC NULLS LAST]
+
+
 statement ok
 drop table data;
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index f0151417e5553..7bb872e5a48f5 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -998,17 +998,15 @@ ORDER BY c_str
 limit 5;
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: c_str ASC NULLS LAST, fetch=5
-03)----Projection: CAST(ordered_table.c AS Utf8) AS c_str
-04)------TableScan: ordered_table projection=[c]
+01)Sort: c_str ASC NULLS LAST, fetch=5
+02)--Projection: CAST(ordered_table.c AS Utf8) AS c_str
+03)----TableScan: ordered_table projection=[c]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortPreservingMergeExec: [c_str@0 ASC NULLS LAST], fetch=5
-03)----SortExec: TopK(fetch=5), expr=[c_str@0 ASC NULLS LAST], preserve_partitioning=[true]
-04)------ProjectionExec: expr=[CAST(c@0 AS Utf8) as c_str]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
+01)SortPreservingMergeExec: [c_str@0 ASC NULLS LAST], fetch=5
+02)--SortExec: TopK(fetch=5), expr=[c_str@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[CAST(c@0 AS Utf8) as c_str]
+04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
 
 
 # Casting from numeric to numeric types preserves the ordering
@@ -1031,17 +1029,14 @@ ORDER BY c_bigint
 limit 5;
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: c_bigint ASC NULLS LAST, fetch=5
-03)----Projection: CAST(ordered_table.c AS Int64) AS c_bigint
-04)------TableScan: ordered_table projection=[c]
+01)Sort: c_bigint ASC NULLS LAST, fetch=5
+02)--Projection: CAST(ordered_table.c AS Int64) AS c_bigint
+03)----TableScan: ordered_table projection=[c]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortPreservingMergeExec: [c_bigint@0 ASC NULLS LAST], fetch=5
-03)----ProjectionExec: expr=[CAST(c@0 AS Int64) as c_bigint]
-04)------LocalLimitExec: fetch=5
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
+01)SortPreservingMergeExec: [c_bigint@0 ASC NULLS LAST], fetch=5
+02)--ProjectionExec: expr=[CAST(c@0 AS Int64) as c_bigint]
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
 
 statement ok
 drop table ordered_table; 
@@ -1069,17 +1064,15 @@ ORDER BY abs_c
 limit 5;
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: abs_c ASC NULLS LAST, fetch=5
-03)----Projection: abs(ordered_table.c) AS abs_c
-04)------TableScan: ordered_table projection=[c]
+01)Sort: abs_c ASC NULLS LAST, fetch=5
+02)--Projection: abs(ordered_table.c) AS abs_c
+03)----TableScan: ordered_table projection=[c]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortPreservingMergeExec: [abs_c@0 ASC NULLS LAST], fetch=5
-03)----SortExec: TopK(fetch=5), expr=[abs_c@0 ASC NULLS LAST], preserve_partitioning=[true]
-04)------ProjectionExec: expr=[abs(c@0) as abs_c]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
+01)SortPreservingMergeExec: [abs_c@0 ASC NULLS LAST], fetch=5
+02)--SortExec: TopK(fetch=5), expr=[abs_c@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[abs(c@0) as abs_c]
+04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
 
 statement ok
 drop table ordered_table; 
@@ -1106,17 +1099,14 @@ ORDER BY abs_c
 limit 5;
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: abs_c ASC NULLS LAST, fetch=5
-03)----Projection: abs(ordered_table.c) AS abs_c
-04)------TableScan: ordered_table projection=[c]
+01)Sort: abs_c ASC NULLS LAST, fetch=5
+02)--Projection: abs(ordered_table.c) AS abs_c
+03)----TableScan: ordered_table projection=[c]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortPreservingMergeExec: [abs_c@0 ASC NULLS LAST], fetch=5
-03)----ProjectionExec: expr=[abs(c@0) as abs_c]
-04)------LocalLimitExec: fetch=5
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
+01)SortPreservingMergeExec: [abs_c@0 ASC NULLS LAST], fetch=5
+02)--ProjectionExec: expr=[abs(c@0) as abs_c]
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
 
 # Boolean to integer casts preserve the order.
 statement ok
@@ -1151,6 +1141,23 @@ SELECT (SELECT c from ordered_table ORDER BY c LIMIT 1) UNION ALL (SELECT 23 as
 0
 23
 
+# Do not increase partition number after fetch 1. As this will be unnecessary.
+query TT
+EXPLAIN SELECT a + b as sum1 FROM (SELECT a, b
+  FROM ordered_table
+  ORDER BY a ASC LIMIT 1
+);
+----
+logical_plan
+01)Projection: ordered_table.a + ordered_table.b AS sum1
+02)--Sort: ordered_table.a ASC NULLS LAST, fetch=1
+03)----TableScan: ordered_table projection=[a, b]
+physical_plan
+01)ProjectionExec: expr=[a@0 + b@1 as sum1]
+02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+03)----SortExec: TopK(fetch=1), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
+04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], has_header=true
+
 statement ok
 set datafusion.execution.use_row_number_estimates_to_optimize_partitioning = true;
 
@@ -1163,9 +1170,8 @@ EXPLAIN SELECT a + b as sum1 FROM (SELECT a, b
 ----
 logical_plan
 01)Projection: ordered_table.a + ordered_table.b AS sum1
-02)--Limit: skip=0, fetch=1
-03)----Sort: ordered_table.a ASC NULLS LAST, fetch=1
-04)------TableScan: ordered_table projection=[a, b]
+02)--Sort: ordered_table.a ASC NULLS LAST, fetch=1
+03)----TableScan: ordered_table projection=[a, b]
 physical_plan
 01)ProjectionExec: expr=[a@0 + b@1 as sum1]
 02)--SortExec: TopK(fetch=1), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
@@ -1183,9 +1189,8 @@ EXPLAIN SELECT a + b as sum1 FROM (SELECT a, b
 ----
 logical_plan
 01)Projection: ordered_table.a + ordered_table.b AS sum1
-02)--Limit: skip=0, fetch=1
-03)----Sort: ordered_table.a ASC NULLS LAST, fetch=1
-04)------TableScan: ordered_table projection=[a, b]
+02)--Sort: ordered_table.a ASC NULLS LAST, fetch=1
+03)----TableScan: ordered_table projection=[a, b]
 physical_plan
 01)ProjectionExec: expr=[a@0 + b@1 as sum1]
 02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index 49a18ca09de44..9832f97ae862b 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -990,13 +990,13 @@ FROM (
     ) AS a
 ) AS b
 ----
-a 5 -101
-a 5 -54
 a 5 -38
+a 5 -54
+a 6 36
+a 6 -31
 a 5 65
+a 5 -101
 a 6 -101
-a 6 -31
-a 6 36
 
 # nested select without aliases
 query TII
@@ -1011,13 +1011,13 @@ FROM (
     )
 )
 ----
-a 5 -101
-a 5 -54
 a 5 -38
+a 5 -54
+a 6 36
+a 6 -31
 a 5 65
+a 5 -101
 a 6 -101
-a 6 -31
-a 6 36
 
 # select with join unaliased subqueries
 query TIITII
@@ -1118,9 +1118,8 @@ EXPLAIN SELECT a FROM annotated_data_finite2
         LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: annotated_data_finite2.a ASC NULLS LAST, fetch=5
-03)----TableScan: annotated_data_finite2 projection=[a]
+01)Sort: annotated_data_finite2.a ASC NULLS LAST, fetch=5
+02)--TableScan: annotated_data_finite2 projection=[a]
 physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], limit=5, output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 query I
@@ -1461,13 +1460,14 @@ query TT
 EXPLAIN SELECT c2, COUNT(*)
 FROM (SELECT c2
 FROM aggregate_test_100
-ORDER BY c1, c2)
+ORDER BY c1, c2
+LIMIT 4)
 GROUP BY c2;
 ----
 logical_plan
 01)Aggregate: groupBy=[[aggregate_test_100.c2]], aggr=[[count(Int64(1)) AS count(*)]]
 02)--Projection: aggregate_test_100.c2
-03)----Sort: aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST
+03)----Sort: aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST, fetch=4
 04)------Projection: aggregate_test_100.c2, aggregate_test_100.c1
 05)--------TableScan: aggregate_test_100 projection=[c1, c2]
 physical_plan
@@ -1476,7 +1476,9 @@ physical_plan
 03)----RepartitionExec: partitioning=Hash([c2@0], 2), input_partitions=2
 04)------AggregateExec: mode=Partial, gby=[c2@0 as c2], aggr=[count(*)]
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2], has_header=true
+06)----------ProjectionExec: expr=[c2@0 as c2]
+07)------------SortExec: TopK(fetch=4), expr=[c1@1 ASC NULLS LAST,c2@0 ASC NULLS LAST], preserve_partitioning=[false]
+08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c1], has_header=true
 
 # FilterExec can track equality of non-column expressions.
 # plan below shouldn't have a SortExec because given column 'a' is ordered.
diff --git a/datafusion/sqllogictest/test_files/subquery_sort.slt b/datafusion/sqllogictest/test_files/subquery_sort.slt
new file mode 100644
index 0000000000000..17affbc0acadc
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/subquery_sort.slt
@@ -0,0 +1,149 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+statement ok
+CREATE EXTERNAL TABLE sink_table (
+        c1  VARCHAR NOT NULL,
+        c2  TINYINT NOT NULL,
+        c3  SMALLINT NOT NULL,
+        c4  SMALLINT NOT NULL,
+        c5  INTEGER NOT NULL,
+        c6  BIGINT NOT NULL,
+        c7  SMALLINT NOT NULL,
+        c8  INT NOT NULL,
+        c9  INT UNSIGNED NOT NULL,
+        c10 BIGINT UNSIGNED NOT NULL,
+        c11 FLOAT NOT NULL,
+        c12 DOUBLE NOT NULL,
+        c13 VARCHAR NOT NULL
+    )
+STORED AS CSV
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
+
+# Remove the redundant ordering in the subquery
+
+query TT
+EXPLAIN SELECT c1 FROM (SELECT c1 FROM sink_table ORDER BY c2) AS ttt
+----
+logical_plan
+01)SubqueryAlias: ttt
+02)--TableScan: sink_table projection=[c1]
+physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], has_header=true
+
+query TT
+EXPLAIN SELECT c1 FROM (SELECT c1 FROM sink_table ORDER BY c2)
+----
+logical_plan TableScan: sink_table projection=[c1]
+physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], has_header=true
+
+
+# Do not remove ordering when it's with limit
+
+query TT
+EXPLAIN SELECT c1, c2 FROM (SELECT c1, c2, c3, c9 FROM sink_table ORDER BY c1 DESC, c3 LIMIT 2) AS t2 ORDER BY t2.c1, t2.c3, t2.c9;
+----
+logical_plan
+01)Projection: t2.c1, t2.c2
+02)--Sort: t2.c1 ASC NULLS LAST, t2.c3 ASC NULLS LAST, t2.c9 ASC NULLS LAST
+03)----SubqueryAlias: t2
+04)------Sort: sink_table.c1 DESC NULLS FIRST, sink_table.c3 ASC NULLS LAST, fetch=2
+05)--------TableScan: sink_table projection=[c1, c2, c3, c9]
+physical_plan
+01)ProjectionExec: expr=[c1@0 as c1, c2@1 as c2]
+02)--SortExec: expr=[c1@0 ASC NULLS LAST,c3@2 ASC NULLS LAST,c9@3 ASC NULLS LAST], preserve_partitioning=[false]
+03)----SortExec: TopK(fetch=2), expr=[c1@0 DESC,c3@2 ASC NULLS LAST], preserve_partitioning=[false]
+04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c9], has_header=true
+
+
+query TI
+SELECT c1, c2 FROM (SELECT c1, c2, c3, c9 FROM sink_table ORDER BY c1, c3 LIMIT 2) AS t2 ORDER BY t2.c1, t2.c3, t2.c9;
+----
+a 4
+a 5
+
+query TI
+SELECT c1, c2 FROM (SELECT c1, c2, c3, c9 FROM sink_table ORDER BY c1 DESC, c3 LIMIT 2) AS t2 ORDER BY t2.c1, t2.c3, t2.c9;
+----
+e 3
+e 5
+
+
+# Do not remove ordering when it's a part of an aggregation in subquery
+
+query TT
+EXPLAIN SELECT t2.c1, t2.r FROM (SELECT c1, RANK() OVER (ORDER BY c1 DESC) AS r, c3, c9 FROM sink_table ORDER BY c1, c3 LIMIT 2) AS t2 ORDER BY t2.c1, t2.c3, t2.c9;
+----
+logical_plan
+01)Projection: t2.c1, t2.r
+02)--Sort: t2.c1 ASC NULLS LAST, t2.c3 ASC NULLS LAST, t2.c9 ASC NULLS LAST
+03)----SubqueryAlias: t2
+04)------Sort: sink_table.c1 ASC NULLS LAST, sink_table.c3 ASC NULLS LAST, fetch=2
+05)--------Projection: sink_table.c1, RANK() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS r, sink_table.c3, sink_table.c9
+06)----------WindowAggr: windowExpr=[[RANK() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+07)------------TableScan: sink_table projection=[c1, c3, c9]
+physical_plan
+01)ProjectionExec: expr=[c1@0 as c1, r@1 as r]
+02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST,c3@2 ASC NULLS LAST,c9@3 ASC NULLS LAST], preserve_partitioning=[false]
+03)----ProjectionExec: expr=[c1@0 as c1, RANK() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9]
+04)------BoundedWindowAggExec: wdw=[RANK() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "RANK() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Utf8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false]
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3, c9], has_header=true
+
+
+query TT
+EXPLAIN SELECT c1, c2 FROM (SELECT DISTINCT ON (c1) c1, c2, c3, c9 FROM sink_table ORDER BY c1, c3 DESC, c9) AS t2 ORDER BY t2.c1, t2.c3 DESC, t2.c9
+----
+logical_plan
+01)Projection: t2.c1, t2.c2
+02)--Sort: t2.c1 ASC NULLS LAST, t2.c3 DESC NULLS FIRST, t2.c9 ASC NULLS LAST
+03)----SubqueryAlias: t2
+04)------Projection: first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST] AS c1, first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST] AS c2, first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST] AS c3, first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST] AS c9
+05)--------Sort: sink_table.c1 ASC NULLS LAST
+06)----------Aggregate: groupBy=[[sink_table.c1]], aggr=[[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]]]
+07)------------TableScan: sink_table projection=[c1, c2, c3, c9]
+physical_plan
+01)ProjectionExec: expr=[c1@0 as c1, c2@1 as c2]
+02)--SortPreservingMergeExec: [c1@0 ASC NULLS LAST,c3@2 DESC,c9@3 ASC NULLS LAST]
+03)----SortExec: expr=[c1@0 ASC NULLS LAST,c3@2 DESC,c9@3 ASC NULLS LAST], preserve_partitioning=[true]
+04)------ProjectionExec: expr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@1 as c1, first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@2 as c2, first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@3 as c3, first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@4 as c9]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]]
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4
+08)--------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c9], has_header=true
+
+
+query TI
+SELECT c1, c2 FROM (SELECT DISTINCT ON (c1) c1, c2, c3, c9 FROM sink_table ORDER BY c1, c3, c9) AS t2 ORDER BY t2.c1, t2.c3, t2.c9;
+----
+a 4
+b 4
+c 2
+d 1
+e 3
+
+
+query TI
+SELECT c1, c2 FROM (SELECT DISTINCT ON (c1) c1, c2, c3, c9 FROM sink_table ORDER BY c1, c3 DESC, c9) AS t2 ORDER BY t2.c1, t2.c3 DESC, t2.c9
+----
+a 1
+b 5
+c 4
+d 1
+e 1
diff --git a/datafusion/sqllogictest/test_files/topk.slt b/datafusion/sqllogictest/test_files/topk.slt
index c38b9d1f1a39f..53f4ffe4edf58 100644
--- a/datafusion/sqllogictest/test_files/topk.slt
+++ b/datafusion/sqllogictest/test_files/topk.slt
@@ -76,9 +76,8 @@ query TT
 explain select * from aggregate_test_100 ORDER BY c13 desc limit 5;
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: aggregate_test_100.c13 DESC NULLS FIRST, fetch=5
-03)----TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13]
+01)Sort: aggregate_test_100.c13 DESC NULLS FIRST, fetch=5
+02)--TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c13@12 DESC], preserve_partitioning=[false]
 02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true
diff --git a/datafusion/sqllogictest/test_files/tpch/q10.slt.part b/datafusion/sqllogictest/test_files/tpch/q10.slt.part
index 37a9d17229707..23ae70d7ec976 100644
--- a/datafusion/sqllogictest/test_files/tpch/q10.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q10.slt.part
@@ -51,63 +51,61 @@ order by
 limit 10;
 ----
 logical_plan
-01)Limit: skip=0, fetch=10
-02)--Sort: revenue DESC NULLS FIRST, fetch=10
-03)----Projection: customer.c_custkey, customer.c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment
-04)------Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
-05)--------Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name
-06)----------Inner Join: customer.c_nationkey = nation.n_nationkey
-07)------------Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount
-08)--------------Inner Join: orders.o_orderkey = lineitem.l_orderkey
-09)----------------Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey
-10)------------------Inner Join: customer.c_custkey = orders.o_custkey
-11)--------------------TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment]
-12)--------------------Projection: orders.o_orderkey, orders.o_custkey
-13)----------------------Filter: orders.o_orderdate >= Date32("1993-10-01") AND orders.o_orderdate < Date32("1994-01-01")
-14)------------------------TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1993-10-01"), orders.o_orderdate < Date32("1994-01-01")]
-15)----------------Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount
-16)------------------Filter: lineitem.l_returnflag = Utf8("R")
-17)--------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], partial_filters=[lineitem.l_returnflag = Utf8("R")]
-18)------------TableScan: nation projection=[n_nationkey, n_name]
+01)Sort: revenue DESC NULLS FIRST, fetch=10
+02)--Projection: customer.c_custkey, customer.c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment
+03)----Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+04)------Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name
+05)--------Inner Join: customer.c_nationkey = nation.n_nationkey
+06)----------Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount
+07)------------Inner Join: orders.o_orderkey = lineitem.l_orderkey
+08)--------------Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey
+09)----------------Inner Join: customer.c_custkey = orders.o_custkey
+10)------------------TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment]
+11)------------------Projection: orders.o_orderkey, orders.o_custkey
+12)--------------------Filter: orders.o_orderdate >= Date32("1993-10-01") AND orders.o_orderdate < Date32("1994-01-01")
+13)----------------------TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1993-10-01"), orders.o_orderdate < Date32("1994-01-01")]
+14)--------------Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount
+15)----------------Filter: lineitem.l_returnflag = Utf8("R")
+16)------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], partial_filters=[lineitem.l_returnflag = Utf8("R")]
+17)----------TableScan: nation projection=[n_nationkey, n_name]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=10
-02)--SortPreservingMergeExec: [revenue@2 DESC], fetch=10
-03)----SortExec: TopK(fetch=10), expr=[revenue@2 DESC], preserve_partitioning=[true]
-04)------ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment]
-05)--------AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4
-08)--------------AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@3, n_nationkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@7, l_discount@8, n_name@10]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([o_orderkey@7], 4), input_partitions=4
-17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-18)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7]
-19)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-20)--------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-21)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-22)------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], has_header=false
-23)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-24)--------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-25)----------------------------------------ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey]
-26)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-27)--------------------------------------------FilterExec: o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01
-28)----------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], has_header=false
-29)----------------------------CoalesceBatchesExec: target_batch_size=8192
-30)------------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-31)--------------------------------ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
-32)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-33)------------------------------------FilterExec: l_returnflag@3 = R
-34)--------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], has_header=false
-35)--------------------CoalesceBatchesExec: target_batch_size=8192
-36)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-37)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-38)--------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
+01)SortPreservingMergeExec: [revenue@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[revenue@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment]
+04)------AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+08)--------------CoalesceBatchesExec: target_batch_size=8192
+09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@3, n_nationkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@7, l_discount@8, n_name@10]
+10)------------------CoalesceBatchesExec: target_batch_size=8192
+11)--------------------RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4
+12)----------------------CoalesceBatchesExec: target_batch_size=8192
+13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10]
+14)--------------------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------------------RepartitionExec: partitioning=Hash([o_orderkey@7], 4), input_partitions=4
+16)------------------------------CoalesceBatchesExec: target_batch_size=8192
+17)--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7]
+18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+19)------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
+20)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+21)----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], has_header=false
+22)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+23)------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+24)--------------------------------------ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey]
+25)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
+26)------------------------------------------FilterExec: o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01
+27)--------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], has_header=false
+28)--------------------------CoalesceBatchesExec: target_batch_size=8192
+29)----------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+30)------------------------------ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
+31)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+32)----------------------------------FilterExec: l_returnflag@3 = R
+33)------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], has_header=false
+34)------------------CoalesceBatchesExec: target_batch_size=8192
+35)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+36)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+37)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
 
 
 
diff --git a/datafusion/sqllogictest/test_files/tpch/q11.slt.part b/datafusion/sqllogictest/test_files/tpch/q11.slt.part
index ce989ee33ebdc..0d66b2f2f2a9b 100644
--- a/datafusion/sqllogictest/test_files/tpch/q11.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q11.slt.part
@@ -47,32 +47,31 @@ order by
 limit 10;
 ----
 logical_plan
-01)Limit: skip=0, fetch=10
-02)--Sort: value DESC NULLS FIRST, fetch=10
-03)----Projection: partsupp.ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS value
-04)------Inner Join:  Filter: CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)
-05)--------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]
-06)----------Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost
-07)------------Inner Join: supplier.s_nationkey = nation.n_nationkey
-08)--------------Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey
-09)----------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
-10)------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]
-11)------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
-12)--------------Projection: nation.n_nationkey
-13)----------------Filter: nation.n_name = Utf8("GERMANY")
-14)------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
-15)--------SubqueryAlias: __scalar_sq_1
-16)----------Projection: CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15))
-17)------------Aggregate: groupBy=[[]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]
-18)--------------Projection: partsupp.ps_availqty, partsupp.ps_supplycost
-19)----------------Inner Join: supplier.s_nationkey = nation.n_nationkey
-20)------------------Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey
-21)--------------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
-22)----------------------TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost]
-23)----------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
-24)------------------Projection: nation.n_nationkey
-25)--------------------Filter: nation.n_name = Utf8("GERMANY")
-26)----------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
+01)Sort: value DESC NULLS FIRST, fetch=10
+02)--Projection: partsupp.ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS value
+03)----Inner Join:  Filter: CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)
+04)------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]
+05)--------Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost
+06)----------Inner Join: supplier.s_nationkey = nation.n_nationkey
+07)------------Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey
+08)--------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+09)----------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]
+10)----------------TableScan: supplier projection=[s_suppkey, s_nationkey]
+11)------------Projection: nation.n_nationkey
+12)--------------Filter: nation.n_name = Utf8("GERMANY")
+13)----------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
+14)------SubqueryAlias: __scalar_sq_1
+15)--------Projection: CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15))
+16)----------Aggregate: groupBy=[[]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]
+17)------------Projection: partsupp.ps_availqty, partsupp.ps_supplycost
+18)--------------Inner Join: supplier.s_nationkey = nation.n_nationkey
+19)----------------Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey
+20)------------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+21)--------------------TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost]
+22)--------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
+23)----------------Projection: nation.n_nationkey
+24)------------------Filter: nation.n_name = Utf8("GERMANY")
+25)--------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
 physical_plan
 01)SortExec: TopK(fetch=10), expr=[value@1 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]
diff --git a/datafusion/sqllogictest/test_files/tpch/q13.slt.part b/datafusion/sqllogictest/test_files/tpch/q13.slt.part
index f25f23de88179..011bd761d760f 100644
--- a/datafusion/sqllogictest/test_files/tpch/q13.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q13.slt.part
@@ -40,42 +40,40 @@ order by
 limit 10;
 ----
 logical_plan
-01)Limit: skip=0, fetch=10
-02)--Sort: custdist DESC NULLS FIRST, c_orders.c_count DESC NULLS FIRST, fetch=10
-03)----Projection: c_orders.c_count, count(*) AS custdist
-04)------Aggregate: groupBy=[[c_orders.c_count]], aggr=[[count(Int64(1)) AS count(*)]]
-05)--------SubqueryAlias: c_orders
-06)----------Projection: count(orders.o_orderkey) AS c_count
-07)------------Aggregate: groupBy=[[customer.c_custkey]], aggr=[[count(orders.o_orderkey)]]
-08)--------------Projection: customer.c_custkey, orders.o_orderkey
-09)----------------Left Join: customer.c_custkey = orders.o_custkey
-10)------------------TableScan: customer projection=[c_custkey]
-11)------------------Projection: orders.o_orderkey, orders.o_custkey
-12)--------------------Filter: orders.o_comment NOT LIKE Utf8("%special%requests%")
-13)----------------------TableScan: orders projection=[o_orderkey, o_custkey, o_comment], partial_filters=[orders.o_comment NOT LIKE Utf8("%special%requests%")]
+01)Sort: custdist DESC NULLS FIRST, c_orders.c_count DESC NULLS FIRST, fetch=10
+02)--Projection: c_orders.c_count, count(*) AS custdist
+03)----Aggregate: groupBy=[[c_orders.c_count]], aggr=[[count(Int64(1)) AS count(*)]]
+04)------SubqueryAlias: c_orders
+05)--------Projection: count(orders.o_orderkey) AS c_count
+06)----------Aggregate: groupBy=[[customer.c_custkey]], aggr=[[count(orders.o_orderkey)]]
+07)------------Projection: customer.c_custkey, orders.o_orderkey
+08)--------------Left Join: customer.c_custkey = orders.o_custkey
+09)----------------TableScan: customer projection=[c_custkey]
+10)----------------Projection: orders.o_orderkey, orders.o_custkey
+11)------------------Filter: orders.o_comment NOT LIKE Utf8("%special%requests%")
+12)--------------------TableScan: orders projection=[o_orderkey, o_custkey, o_comment], partial_filters=[orders.o_comment NOT LIKE Utf8("%special%requests%")]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=10
-02)--SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC], fetch=10
-03)----SortExec: TopK(fetch=10), expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true]
-04)------ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist]
-05)--------AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([c_count@0], 4), input_partitions=4
-08)--------------AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)]
-09)----------------ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count]
-10)------------------AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1]
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-15)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-16)------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey], has_header=false
-17)------------------------CoalesceBatchesExec: target_batch_size=8192
-18)--------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-19)----------------------------ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey]
-20)------------------------------CoalesceBatchesExec: target_batch_size=8192
-21)--------------------------------FilterExec: o_comment@2 NOT LIKE %special%requests%
-22)----------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_comment], has_header=false
+01)SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist]
+04)------AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------RepartitionExec: partitioning=Hash([c_count@0], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)]
+08)--------------ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count]
+09)----------------AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)]
+10)------------------CoalesceBatchesExec: target_batch_size=8192
+11)--------------------HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1]
+12)----------------------CoalesceBatchesExec: target_batch_size=8192
+13)------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
+14)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+15)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey], has_header=false
+16)----------------------CoalesceBatchesExec: target_batch_size=8192
+17)------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+18)--------------------------ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey]
+19)----------------------------CoalesceBatchesExec: target_batch_size=8192
+20)------------------------------FilterExec: o_comment@2 NOT LIKE %special%requests%
+21)--------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_comment], has_header=false
 
 
 
diff --git a/datafusion/sqllogictest/test_files/tpch/q16.slt.part b/datafusion/sqllogictest/test_files/tpch/q16.slt.part
index d568b2ca69e6e..b847db14f2b2b 100644
--- a/datafusion/sqllogictest/test_files/tpch/q16.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q16.slt.part
@@ -50,56 +50,54 @@ order by
 limit 10;
 ----
 logical_plan
-01)Limit: skip=0, fetch=10
-02)--Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type ASC NULLS LAST, part.p_size ASC NULLS LAST, fetch=10
-03)----Projection: part.p_brand, part.p_type, part.p_size, count(alias1) AS supplier_cnt
-04)------Aggregate: groupBy=[[part.p_brand, part.p_type, part.p_size]], aggr=[[count(alias1)]]
-05)--------Aggregate: groupBy=[[part.p_brand, part.p_type, part.p_size, partsupp.ps_suppkey AS alias1]], aggr=[[]]
-06)----------LeftAnti Join: partsupp.ps_suppkey = __correlated_sq_1.s_suppkey
-07)------------Projection: partsupp.ps_suppkey, part.p_brand, part.p_type, part.p_size
-08)--------------Inner Join: partsupp.ps_partkey = part.p_partkey
-09)----------------TableScan: partsupp projection=[ps_partkey, ps_suppkey]
-10)----------------Filter: part.p_brand != Utf8("Brand#45") AND part.p_type NOT LIKE Utf8("MEDIUM POLISHED%") AND part.p_size IN ([Int32(49), Int32(14), Int32(23), Int32(45), Int32(19), Int32(3), Int32(36), Int32(9)])
-11)------------------TableScan: part projection=[p_partkey, p_brand, p_type, p_size], partial_filters=[part.p_brand != Utf8("Brand#45"), part.p_type NOT LIKE Utf8("MEDIUM POLISHED%"), part.p_size IN ([Int32(49), Int32(14), Int32(23), Int32(45), Int32(19), Int32(3), Int32(36), Int32(9)])]
-12)------------SubqueryAlias: __correlated_sq_1
-13)--------------Projection: supplier.s_suppkey
-14)----------------Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%")
-15)------------------TableScan: supplier projection=[s_suppkey, s_comment], partial_filters=[supplier.s_comment LIKE Utf8("%Customer%Complaints%")]
+01)Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type ASC NULLS LAST, part.p_size ASC NULLS LAST, fetch=10
+02)--Projection: part.p_brand, part.p_type, part.p_size, count(alias1) AS supplier_cnt
+03)----Aggregate: groupBy=[[part.p_brand, part.p_type, part.p_size]], aggr=[[count(alias1)]]
+04)------Aggregate: groupBy=[[part.p_brand, part.p_type, part.p_size, partsupp.ps_suppkey AS alias1]], aggr=[[]]
+05)--------LeftAnti Join: partsupp.ps_suppkey = __correlated_sq_1.s_suppkey
+06)----------Projection: partsupp.ps_suppkey, part.p_brand, part.p_type, part.p_size
+07)------------Inner Join: partsupp.ps_partkey = part.p_partkey
+08)--------------TableScan: partsupp projection=[ps_partkey, ps_suppkey]
+09)--------------Filter: part.p_brand != Utf8("Brand#45") AND part.p_type NOT LIKE Utf8("MEDIUM POLISHED%") AND part.p_size IN ([Int32(49), Int32(14), Int32(23), Int32(45), Int32(19), Int32(3), Int32(36), Int32(9)])
+10)----------------TableScan: part projection=[p_partkey, p_brand, p_type, p_size], partial_filters=[part.p_brand != Utf8("Brand#45"), part.p_type NOT LIKE Utf8("MEDIUM POLISHED%"), part.p_size IN ([Int32(49), Int32(14), Int32(23), Int32(45), Int32(19), Int32(3), Int32(36), Int32(9)])]
+11)----------SubqueryAlias: __correlated_sq_1
+12)------------Projection: supplier.s_suppkey
+13)--------------Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%")
+14)----------------TableScan: supplier projection=[s_suppkey, s_comment], partial_filters=[supplier.s_comment LIKE Utf8("%Customer%Complaints%")]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=10
-02)--SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], fetch=10
-03)----SortExec: TopK(fetch=10), expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true]
-04)------ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt]
-05)--------AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2], 4), input_partitions=4
-08)--------------AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)]
-09)----------------AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2, alias1@3], 4), input_partitions=4
-12)----------------------AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[]
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(ps_suppkey@0, s_suppkey@0)]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
-17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-18)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, p_partkey@0)], projection=[ps_suppkey@1, p_brand@3, p_type@4, p_size@5]
-19)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-20)--------------------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-21)----------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey], has_header=false
-22)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-23)--------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-24)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)------------------------------------------FilterExec: p_brand@1 != Brand#45 AND p_type@2 NOT LIKE MEDIUM POLISHED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(49) }, Literal { value: Int32(14) }, Literal { value: Int32(23) }, Literal { value: Int32(45) }, Literal { value: Int32(19) }, Literal { value: Int32(3) }, Literal { value: Int32(36) }, Literal { value: Int32(9) }])
-26)--------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-27)----------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_type, p_size], has_header=false
-28)----------------------------CoalesceBatchesExec: target_batch_size=8192
-29)------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-30)--------------------------------ProjectionExec: expr=[s_suppkey@0 as s_suppkey]
-31)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-32)------------------------------------FilterExec: s_comment@1 LIKE %Customer%Complaints%
-33)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-34)----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_comment], has_header=false
+01)SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt]
+04)------AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)]
+08)--------------AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[]
+09)----------------CoalesceBatchesExec: target_batch_size=8192
+10)------------------RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2, alias1@3], 4), input_partitions=4
+11)--------------------AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[]
+12)----------------------CoalesceBatchesExec: target_batch_size=8192
+13)------------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(ps_suppkey@0, s_suppkey@0)]
+14)--------------------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
+16)------------------------------CoalesceBatchesExec: target_batch_size=8192
+17)--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, p_partkey@0)], projection=[ps_suppkey@1, p_brand@3, p_type@4, p_size@5]
+18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+19)------------------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+20)--------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey], has_header=false
+21)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+22)------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+23)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
+24)----------------------------------------FilterExec: p_brand@1 != Brand#45 AND p_type@2 NOT LIKE MEDIUM POLISHED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(49) }, Literal { value: Int32(14) }, Literal { value: Int32(23) }, Literal { value: Int32(45) }, Literal { value: Int32(19) }, Literal { value: Int32(3) }, Literal { value: Int32(36) }, Literal { value: Int32(9) }])
+25)------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+26)--------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_type, p_size], has_header=false
+27)--------------------------CoalesceBatchesExec: target_batch_size=8192
+28)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+29)------------------------------ProjectionExec: expr=[s_suppkey@0 as s_suppkey]
+30)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+31)----------------------------------FilterExec: s_comment@1 LIKE %Customer%Complaints%
+32)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+33)--------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_comment], has_header=false
 
 
 
diff --git a/datafusion/sqllogictest/test_files/tpch/q2.slt.part b/datafusion/sqllogictest/test_files/tpch/q2.slt.part
index 85dfefcd03f46..223a011c9e37a 100644
--- a/datafusion/sqllogictest/test_files/tpch/q2.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q2.slt.part
@@ -63,126 +63,124 @@ order by
 limit 10;
 ----
 logical_plan
-01)Limit: skip=0, fetch=10
-02)--Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST, fetch=10
-03)----Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
-04)------Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.min(partsupp.ps_supplycost)
-05)--------Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name
-06)----------Inner Join: nation.n_regionkey = region.r_regionkey
-07)------------Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name, nation.n_regionkey
-08)--------------Inner Join: supplier.s_nationkey = nation.n_nationkey
-09)----------------Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost
-10)------------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
-11)--------------------Projection: part.p_partkey, part.p_mfgr, partsupp.ps_suppkey, partsupp.ps_supplycost
-12)----------------------Inner Join: part.p_partkey = partsupp.ps_partkey
-13)------------------------Projection: part.p_partkey, part.p_mfgr
-14)--------------------------Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
-15)----------------------------TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size], partial_filters=[part.p_size = Int32(15), part.p_type LIKE Utf8("%BRASS")]
-16)------------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
-17)--------------------TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
-18)----------------TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
-19)------------Projection: region.r_regionkey
-20)--------------Filter: region.r_name = Utf8("EUROPE")
-21)----------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
-22)--------SubqueryAlias: __scalar_sq_1
-23)----------Projection: min(partsupp.ps_supplycost), partsupp.ps_partkey
-24)------------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[min(partsupp.ps_supplycost)]]
-25)--------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost
-26)----------------Inner Join: nation.n_regionkey = region.r_regionkey
-27)------------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_regionkey
-28)--------------------Inner Join: supplier.s_nationkey = nation.n_nationkey
-29)----------------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost, supplier.s_nationkey
-30)------------------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
-31)--------------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
-32)--------------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
-33)----------------------TableScan: nation projection=[n_nationkey, n_regionkey]
-34)------------------Projection: region.r_regionkey
-35)--------------------Filter: region.r_name = Utf8("EUROPE")
-36)----------------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
+01)Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST, fetch=10
+02)--Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
+03)----Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.min(partsupp.ps_supplycost)
+04)------Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name
+05)--------Inner Join: nation.n_regionkey = region.r_regionkey
+06)----------Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name, nation.n_regionkey
+07)------------Inner Join: supplier.s_nationkey = nation.n_nationkey
+08)--------------Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost
+09)----------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+10)------------------Projection: part.p_partkey, part.p_mfgr, partsupp.ps_suppkey, partsupp.ps_supplycost
+11)--------------------Inner Join: part.p_partkey = partsupp.ps_partkey
+12)----------------------Projection: part.p_partkey, part.p_mfgr
+13)------------------------Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
+14)--------------------------TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size], partial_filters=[part.p_size = Int32(15), part.p_type LIKE Utf8("%BRASS")]
+15)----------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
+16)------------------TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
+17)--------------TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
+18)----------Projection: region.r_regionkey
+19)------------Filter: region.r_name = Utf8("EUROPE")
+20)--------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
+21)------SubqueryAlias: __scalar_sq_1
+22)--------Projection: min(partsupp.ps_supplycost), partsupp.ps_partkey
+23)----------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[min(partsupp.ps_supplycost)]]
+24)------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost
+25)--------------Inner Join: nation.n_regionkey = region.r_regionkey
+26)----------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_regionkey
+27)------------------Inner Join: supplier.s_nationkey = nation.n_nationkey
+28)--------------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost, supplier.s_nationkey
+29)----------------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+30)------------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
+31)------------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
+32)--------------------TableScan: nation projection=[n_nationkey, n_regionkey]
+33)----------------Projection: region.r_regionkey
+34)------------------Filter: region.r_name = Utf8("EUROPE")
+35)--------------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=10
-02)--SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=10
-03)----SortExec: TopK(fetch=10), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true]
-04)------ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 4), input_partitions=4
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@9, r_regionkey@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, ps_supplycost@7, n_name@8]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([n_regionkey@9], 4), input_partitions=4
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@4, n_nationkey@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@10, n_regionkey@11]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([s_nationkey@4], 4), input_partitions=4
-17)--------------------------------ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_name@3 as s_name, s_address@4 as s_address, s_nationkey@5 as s_nationkey, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@2 as ps_supplycost]
-18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@2, s_suppkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_supplycost@3, s_name@5, s_address@6, s_nationkey@7, s_phone@8, s_acctbal@9, s_comment@10]
-20)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-21)----------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@2], 4), input_partitions=4
-22)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-23)--------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4]
-24)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)------------------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-26)--------------------------------------------------ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr]
-27)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-28)------------------------------------------------------FilterExec: p_size@3 = 15 AND p_type@2 LIKE %BRASS
-29)--------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-30)----------------------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_mfgr, p_type, p_size], has_header=false
-31)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-32)------------------------------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-33)--------------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], has_header=false
-34)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-35)----------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-36)------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-37)--------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment], has_header=false
-38)----------------------------CoalesceBatchesExec: target_batch_size=8192
-39)------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-40)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-41)----------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name, n_regionkey], has_header=false
-42)--------------------CoalesceBatchesExec: target_batch_size=8192
-43)----------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
-44)------------------------ProjectionExec: expr=[r_regionkey@0 as r_regionkey]
-45)--------------------------CoalesceBatchesExec: target_batch_size=8192
-46)----------------------------FilterExec: r_name@1 = EUROPE
-47)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-48)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], has_header=false
-49)------------CoalesceBatchesExec: target_batch_size=8192
-50)--------------RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4
-51)----------------ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]
-52)------------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
-53)--------------------CoalesceBatchesExec: target_batch_size=8192
-54)----------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-55)------------------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
-56)--------------------------CoalesceBatchesExec: target_batch_size=8192
-57)----------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@2, r_regionkey@0)], projection=[ps_partkey@0, ps_supplycost@1]
-58)------------------------------CoalesceBatchesExec: target_batch_size=8192
-59)--------------------------------RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4
-60)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-61)------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_partkey@0, ps_supplycost@1, n_regionkey@4]
-62)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-63)----------------------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
-64)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-65)--------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_supplycost@2, s_nationkey@4]
-66)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-67)------------------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
-68)--------------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], has_header=false
-69)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-70)------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-71)--------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-72)----------------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
-73)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-74)----------------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-75)------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-76)--------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], has_header=false
-77)------------------------------CoalesceBatchesExec: target_batch_size=8192
-78)--------------------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
-79)----------------------------------ProjectionExec: expr=[r_regionkey@0 as r_regionkey]
-80)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-81)--------------------------------------FilterExec: r_name@1 = EUROPE
-82)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-83)------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], has_header=false
+01)SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8]
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 4), input_partitions=4
+08)--------------CoalesceBatchesExec: target_batch_size=8192
+09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@9, r_regionkey@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, ps_supplycost@7, n_name@8]
+10)------------------CoalesceBatchesExec: target_batch_size=8192
+11)--------------------RepartitionExec: partitioning=Hash([n_regionkey@9], 4), input_partitions=4
+12)----------------------CoalesceBatchesExec: target_batch_size=8192
+13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@4, n_nationkey@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@10, n_regionkey@11]
+14)--------------------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------------------RepartitionExec: partitioning=Hash([s_nationkey@4], 4), input_partitions=4
+16)------------------------------ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_name@3 as s_name, s_address@4 as s_address, s_nationkey@5 as s_nationkey, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@2 as ps_supplycost]
+17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+18)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@2, s_suppkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_supplycost@3, s_name@5, s_address@6, s_nationkey@7, s_phone@8, s_acctbal@9, s_comment@10]
+19)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+20)--------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@2], 4), input_partitions=4
+21)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
+22)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4]
+23)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+24)----------------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+25)------------------------------------------------ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr]
+26)--------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+27)----------------------------------------------------FilterExec: p_size@3 = 15 AND p_type@2 LIKE %BRASS
+28)------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+29)--------------------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_mfgr, p_type, p_size], has_header=false
+30)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+31)----------------------------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+32)------------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], has_header=false
+33)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+34)--------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+35)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+36)------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment], has_header=false
+37)--------------------------CoalesceBatchesExec: target_batch_size=8192
+38)----------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+39)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+40)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name, n_regionkey], has_header=false
+41)------------------CoalesceBatchesExec: target_batch_size=8192
+42)--------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+43)----------------------ProjectionExec: expr=[r_regionkey@0 as r_regionkey]
+44)------------------------CoalesceBatchesExec: target_batch_size=8192
+45)--------------------------FilterExec: r_name@1 = EUROPE
+46)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+47)------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], has_header=false
+48)----------CoalesceBatchesExec: target_batch_size=8192
+49)------------RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4
+50)--------------ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]
+51)----------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
+52)------------------CoalesceBatchesExec: target_batch_size=8192
+53)--------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+54)----------------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
+55)------------------------CoalesceBatchesExec: target_batch_size=8192
+56)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@2, r_regionkey@0)], projection=[ps_partkey@0, ps_supplycost@1]
+57)----------------------------CoalesceBatchesExec: target_batch_size=8192
+58)------------------------------RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4
+59)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+60)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_partkey@0, ps_supplycost@1, n_regionkey@4]
+61)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+62)--------------------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+63)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
+64)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_supplycost@2, s_nationkey@4]
+65)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+66)----------------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
+67)------------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], has_header=false
+68)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+69)----------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+70)------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+71)--------------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
+72)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+73)--------------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+74)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+75)------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], has_header=false
+76)----------------------------CoalesceBatchesExec: target_batch_size=8192
+77)------------------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+78)--------------------------------ProjectionExec: expr=[r_regionkey@0 as r_regionkey]
+79)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+80)------------------------------------FilterExec: r_name@1 = EUROPE
+81)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+82)----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], has_header=false
 
 
 
diff --git a/datafusion/sqllogictest/test_files/tpch/q3.slt.part b/datafusion/sqllogictest/test_files/tpch/q3.slt.part
index d0f1a01cac193..1a8512372d370 100644
--- a/datafusion/sqllogictest/test_files/tpch/q3.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q3.slt.part
@@ -42,55 +42,53 @@ order by
 limit 10;
 ----
 logical_plan
-01)Limit: skip=0, fetch=10
-02)--Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST, fetch=10
-03)----Projection: lineitem.l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority
-04)------Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
-05)--------Projection: orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount
-06)----------Inner Join: orders.o_orderkey = lineitem.l_orderkey
-07)------------Projection: orders.o_orderkey, orders.o_orderdate, orders.o_shippriority
-08)--------------Inner Join: customer.c_custkey = orders.o_custkey
-09)----------------Projection: customer.c_custkey
-10)------------------Filter: customer.c_mktsegment = Utf8("BUILDING")
-11)--------------------TableScan: customer projection=[c_custkey, c_mktsegment], partial_filters=[customer.c_mktsegment = Utf8("BUILDING")]
-12)----------------Filter: orders.o_orderdate < Date32("1995-03-15")
-13)------------------TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], partial_filters=[orders.o_orderdate < Date32("1995-03-15")]
-14)------------Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount
-15)--------------Filter: lineitem.l_shipdate > Date32("1995-03-15")
-16)----------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate > Date32("1995-03-15")]
+01)Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST, fetch=10
+02)--Projection: lineitem.l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority
+03)----Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+04)------Projection: orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount
+05)--------Inner Join: orders.o_orderkey = lineitem.l_orderkey
+06)----------Projection: orders.o_orderkey, orders.o_orderdate, orders.o_shippriority
+07)------------Inner Join: customer.c_custkey = orders.o_custkey
+08)--------------Projection: customer.c_custkey
+09)----------------Filter: customer.c_mktsegment = Utf8("BUILDING")
+10)------------------TableScan: customer projection=[c_custkey, c_mktsegment], partial_filters=[customer.c_mktsegment = Utf8("BUILDING")]
+11)--------------Filter: orders.o_orderdate < Date32("1995-03-15")
+12)----------------TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], partial_filters=[orders.o_orderdate < Date32("1995-03-15")]
+13)----------Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount
+14)------------Filter: lineitem.l_shipdate > Date32("1995-03-15")
+15)--------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate > Date32("1995-03-15")]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=10
-02)--SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10
-03)----SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true]
-04)------ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority]
-05)--------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([l_orderkey@0, o_orderdate@1, o_shippriority@2], 4), input_partitions=4
-08)--------------AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-17)--------------------------------ProjectionExec: expr=[c_custkey@0 as c_custkey]
-18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------------FilterExec: c_mktsegment@1 = BUILDING
-20)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-21)----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_mktsegment], has_header=false
-22)----------------------------CoalesceBatchesExec: target_batch_size=8192
-23)------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-24)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)----------------------------------FilterExec: o_orderdate@2 < 1995-03-15
-26)------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], has_header=false
-27)--------------------CoalesceBatchesExec: target_batch_size=8192
-28)----------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-29)------------------------ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
-30)--------------------------CoalesceBatchesExec: target_batch_size=8192
-31)----------------------------FilterExec: l_shipdate@3 > 1995-03-15
-32)------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], has_header=false
+01)SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority]
+04)------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------RepartitionExec: partitioning=Hash([l_orderkey@0, o_orderdate@1, o_shippriority@2], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+08)--------------CoalesceBatchesExec: target_batch_size=8192
+09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5]
+10)------------------CoalesceBatchesExec: target_batch_size=8192
+11)--------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+12)----------------------CoalesceBatchesExec: target_batch_size=8192
+13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4]
+14)--------------------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
+16)------------------------------ProjectionExec: expr=[c_custkey@0 as c_custkey]
+17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+18)----------------------------------FilterExec: c_mktsegment@1 = BUILDING
+19)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+20)--------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_mktsegment], has_header=false
+21)--------------------------CoalesceBatchesExec: target_batch_size=8192
+22)----------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+23)------------------------------CoalesceBatchesExec: target_batch_size=8192
+24)--------------------------------FilterExec: o_orderdate@2 < 1995-03-15
+25)----------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], has_header=false
+26)------------------CoalesceBatchesExec: target_batch_size=8192
+27)--------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+28)----------------------ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
+29)------------------------CoalesceBatchesExec: target_batch_size=8192
+30)--------------------------FilterExec: l_shipdate@3 > 1995-03-15
+31)----------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], has_header=false
 
 
 
diff --git a/datafusion/sqllogictest/test_files/tpch/q9.slt.part b/datafusion/sqllogictest/test_files/tpch/q9.slt.part
index e49cddced50fe..a3fe2e3c675b3 100644
--- a/datafusion/sqllogictest/test_files/tpch/q9.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q9.slt.part
@@ -52,81 +52,79 @@ order by
 limit 10;
 ----
 logical_plan
-01)Limit: skip=0, fetch=10
-02)--Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST, fetch=10
-03)----Projection: profit.nation, profit.o_year, sum(profit.amount) AS sum_profit
-04)------Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[sum(profit.amount)]]
-05)--------SubqueryAlias: profit
-06)----------Projection: nation.n_name AS nation, date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) - partsupp.ps_supplycost * lineitem.l_quantity AS amount
-07)------------Inner Join: supplier.s_nationkey = nation.n_nationkey
-08)--------------Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost, orders.o_orderdate
-09)----------------Inner Join: lineitem.l_orderkey = orders.o_orderkey
-10)------------------Projection: lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost
-11)--------------------Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey
-12)----------------------Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey
-13)------------------------Inner Join: lineitem.l_suppkey = supplier.s_suppkey
-14)--------------------------Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount
-15)----------------------------Inner Join: part.p_partkey = lineitem.l_partkey
-16)------------------------------Projection: part.p_partkey
-17)--------------------------------Filter: part.p_name LIKE Utf8("%green%")
-18)----------------------------------TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("%green%")]
-19)------------------------------TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount]
-20)--------------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
-21)----------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
-22)------------------TableScan: orders projection=[o_orderkey, o_orderdate]
-23)--------------TableScan: nation projection=[n_nationkey, n_name]
+01)Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST, fetch=10
+02)--Projection: profit.nation, profit.o_year, sum(profit.amount) AS sum_profit
+03)----Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[sum(profit.amount)]]
+04)------SubqueryAlias: profit
+05)--------Projection: nation.n_name AS nation, date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) - partsupp.ps_supplycost * lineitem.l_quantity AS amount
+06)----------Inner Join: supplier.s_nationkey = nation.n_nationkey
+07)------------Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost, orders.o_orderdate
+08)--------------Inner Join: lineitem.l_orderkey = orders.o_orderkey
+09)----------------Projection: lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost
+10)------------------Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey
+11)--------------------Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey
+12)----------------------Inner Join: lineitem.l_suppkey = supplier.s_suppkey
+13)------------------------Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount
+14)--------------------------Inner Join: part.p_partkey = lineitem.l_partkey
+15)----------------------------Projection: part.p_partkey
+16)------------------------------Filter: part.p_name LIKE Utf8("%green%")
+17)--------------------------------TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("%green%")]
+18)----------------------------TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount]
+19)------------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
+20)--------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
+21)----------------TableScan: orders projection=[o_orderkey, o_orderdate]
+22)------------TableScan: nation projection=[n_nationkey, n_name]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=10
-02)--SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC], fetch=10
-03)----SortExec: TopK(fetch=10), expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true]
-04)------ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit]
-05)--------AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([nation@0, o_year@1], 4), input_partitions=4
-08)--------------AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)]
-09)----------------ProjectionExec: expr=[n_name@5 as nation, date_part(YEAR, o_orderdate@4) as o_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) - ps_supplycost@3 * l_quantity@0 as amount]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[l_quantity@0, l_extendedprice@1, l_discount@2, ps_supplycost@4, o_orderdate@5, n_name@7]
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_quantity@1, l_extendedprice@2, l_discount@3, s_nationkey@4, ps_supplycost@5, o_orderdate@7]
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@2, ps_suppkey@1), (l_partkey@1, ps_partkey@0)], projection=[l_orderkey@0, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@9]
-20)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-21)----------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 4), input_partitions=4
-22)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-23)--------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@2, s_suppkey@0)], projection=[l_orderkey@0, l_partkey@1, l_suppkey@2, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@7]
-24)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)------------------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@2], 4), input_partitions=4
-26)--------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-27)----------------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6]
-28)------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-29)--------------------------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-30)----------------------------------------------------------ProjectionExec: expr=[p_partkey@0 as p_partkey]
-31)------------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-32)--------------------------------------------------------------FilterExec: p_name@1 LIKE %green%
-33)----------------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-34)------------------------------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], has_header=false
-35)------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-36)--------------------------------------------------------RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4
-37)----------------------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount], has_header=false
-38)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-39)------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-40)--------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-41)----------------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
-42)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-43)----------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4
-44)------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], has_header=false
-45)------------------------------CoalesceBatchesExec: target_batch_size=8192
-46)--------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-47)----------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderdate], has_header=false
-48)----------------------CoalesceBatchesExec: target_batch_size=8192
-49)------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-50)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-51)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
+01)SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit]
+04)------AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------RepartitionExec: partitioning=Hash([nation@0, o_year@1], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)]
+08)--------------ProjectionExec: expr=[n_name@5 as nation, date_part(YEAR, o_orderdate@4) as o_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) - ps_supplycost@3 * l_quantity@0 as amount]
+09)----------------CoalesceBatchesExec: target_batch_size=8192
+10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[l_quantity@0, l_extendedprice@1, l_discount@2, ps_supplycost@4, o_orderdate@5, n_name@7]
+11)--------------------CoalesceBatchesExec: target_batch_size=8192
+12)----------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
+13)------------------------CoalesceBatchesExec: target_batch_size=8192
+14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_quantity@1, l_extendedprice@2, l_discount@3, s_nationkey@4, ps_supplycost@5, o_orderdate@7]
+15)----------------------------CoalesceBatchesExec: target_batch_size=8192
+16)------------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+18)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@2, ps_suppkey@1), (l_partkey@1, ps_partkey@0)], projection=[l_orderkey@0, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@9]
+19)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+20)--------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 4), input_partitions=4
+21)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
+22)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_suppkey@2, s_suppkey@0)], projection=[l_orderkey@0, l_partkey@1, l_suppkey@2, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@7]
+23)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+24)----------------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@2], 4), input_partitions=4
+25)------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+26)--------------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6]
+27)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+28)------------------------------------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+29)--------------------------------------------------------ProjectionExec: expr=[p_partkey@0 as p_partkey]
+30)----------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+31)------------------------------------------------------------FilterExec: p_name@1 LIKE %green%
+32)--------------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+33)----------------------------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], has_header=false
+34)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+35)------------------------------------------------------RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4
+36)--------------------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount], has_header=false
+37)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+38)----------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+39)------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+40)--------------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
+41)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+42)--------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4
+43)----------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], has_header=false
+44)----------------------------CoalesceBatchesExec: target_batch_size=8192
+45)------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+46)--------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderdate], has_header=false
+47)--------------------CoalesceBatchesExec: target_batch_size=8192
+48)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+49)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+50)--------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
 
 
 
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 288f99d82c107..e1fd5eb726f1d 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -396,26 +396,23 @@ query TT
 explain SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggregate_test_100 ORDER BY c9 DESC LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: aggregate_test_100.c9 DESC NULLS FIRST, fetch=5
-03)----Union
-04)------Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Int64) AS c9
-05)--------TableScan: aggregate_test_100 projection=[c1, c9]
-06)------Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Int64) AS c9
-07)--------TableScan: aggregate_test_100 projection=[c1, c3]
+01)Sort: aggregate_test_100.c9 DESC NULLS FIRST, fetch=5
+02)--Union
+03)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Int64) AS c9
+04)------TableScan: aggregate_test_100 projection=[c1, c9]
+05)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Int64) AS c9
+06)------TableScan: aggregate_test_100 projection=[c1, c3]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortPreservingMergeExec: [c9@1 DESC], fetch=5
-03)----LocalLimitExec: fetch=5
-04)------UnionExec
-05)--------SortExec: expr=[c9@1 DESC], preserve_partitioning=[true]
-06)----------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Int64) as c9]
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true
-09)--------SortExec: expr=[c9@1 DESC], preserve_partitioning=[true]
-10)----------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Int64) as c9]
-11)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-12)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true
+01)SortPreservingMergeExec: [c9@1 DESC], fetch=5
+02)--UnionExec
+03)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true]
+04)------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Int64) as c9]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true
+07)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true]
+08)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Int64) as c9]
+09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true
 
 query TI
 SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggregate_test_100 ORDER BY c9 DESC LIMIT 5
@@ -460,6 +457,68 @@ physical_plan
 14)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 15)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
+# Union with limit push down 3 children test case
+query TT
+EXPLAIN
+               SELECT count(*) as cnt FROM
+                 (SELECT count(*), c1
+                  FROM aggregate_test_100
+                  WHERE c13 != 'C2GT5KVyOPZpgKVl110TyZO0NcJ434'
+                  GROUP BY c1
+                  ORDER BY c1
+                  ) AS a
+                 UNION ALL
+               SELECT 1 as cnt
+                 UNION ALL
+               SELECT lead(c1, 1) OVER () as cnt FROM (select 1 as c1) AS b
+               LIMIT 3
+----
+logical_plan
+01)Limit: skip=0, fetch=3
+02)--Union
+03)----Projection: count(*) AS cnt
+04)------Limit: skip=0, fetch=3
+05)--------Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]
+06)----------SubqueryAlias: a
+07)------------Projection: 
+08)--------------Aggregate: groupBy=[[aggregate_test_100.c1]], aggr=[[]]
+09)----------------Projection: aggregate_test_100.c1
+10)------------------Filter: aggregate_test_100.c13 != Utf8("C2GT5KVyOPZpgKVl110TyZO0NcJ434")
+11)--------------------TableScan: aggregate_test_100 projection=[c1, c13], partial_filters=[aggregate_test_100.c13 != Utf8("C2GT5KVyOPZpgKVl110TyZO0NcJ434")]
+12)----Projection: Int64(1) AS cnt
+13)------Limit: skip=0, fetch=3
+14)--------EmptyRelation
+15)----Projection: LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cnt
+16)------Limit: skip=0, fetch=3
+17)--------WindowAggr: windowExpr=[[LEAD(b.c1, Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+18)----------SubqueryAlias: b
+19)------------Projection: Int64(1) AS c1
+20)--------------EmptyRelation
+physical_plan
+01)GlobalLimitExec: skip=0, fetch=3
+02)--CoalescePartitionsExec
+03)----UnionExec
+04)------ProjectionExec: expr=[count(*)@0 as cnt]
+05)--------AggregateExec: mode=Final, gby=[], aggr=[count(*)]
+06)----------CoalescePartitionsExec
+07)------------AggregateExec: mode=Partial, gby=[], aggr=[count(*)]
+08)--------------ProjectionExec: expr=[]
+09)----------------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[]
+10)------------------CoalesceBatchesExec: target_batch_size=2
+11)--------------------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4
+12)----------------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[]
+13)------------------------ProjectionExec: expr=[c1@0 as c1]
+14)--------------------------CoalesceBatchesExec: target_batch_size=2
+15)----------------------------FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434
+16)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+17)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c13], has_header=true
+18)------ProjectionExec: expr=[1 as cnt]
+19)--------PlaceholderRowExec
+20)------ProjectionExec: expr=[LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt]
+21)--------BoundedWindowAggExec: wdw=[LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+22)----------ProjectionExec: expr=[1 as c1]
+23)------------PlaceholderRowExec
+
 
 ########
 # Clean up after the test
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index ef6746730eb60..af882c3a404a7 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -1768,11 +1768,10 @@ logical_plan
 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]
 03)----SubqueryAlias: a
 04)------Projection: 
-05)--------Sort: aggregate_test_100.c1 ASC NULLS LAST
-06)----------Aggregate: groupBy=[[aggregate_test_100.c1]], aggr=[[]]
-07)------------Projection: aggregate_test_100.c1
-08)--------------Filter: aggregate_test_100.c13 != Utf8("C2GT5KVyOPZpgKVl110TyZO0NcJ434")
-09)----------------TableScan: aggregate_test_100 projection=[c1, c13], partial_filters=[aggregate_test_100.c13 != Utf8("C2GT5KVyOPZpgKVl110TyZO0NcJ434")]
+05)--------Aggregate: groupBy=[[aggregate_test_100.c1]], aggr=[[]]
+06)----------Projection: aggregate_test_100.c1
+07)------------Filter: aggregate_test_100.c13 != Utf8("C2GT5KVyOPZpgKVl110TyZO0NcJ434")
+08)--------------TableScan: aggregate_test_100 projection=[c1, c13], partial_filters=[aggregate_test_100.c13 != Utf8("C2GT5KVyOPZpgKVl110TyZO0NcJ434")]
 physical_plan
 01)ProjectionExec: expr=[count(*)@0 as global_count]
 02)--AggregateExec: mode=Final, gby=[], aggr=[count(*)]
@@ -1815,27 +1814,24 @@ EXPLAIN SELECT c3,
     LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: aggregate_test_100.c3 ASC NULLS LAST, fetch=5
-03)----Projection: aggregate_test_100.c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum2
-04)------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-05)--------Projection: aggregate_test_100.c3, aggregate_test_100.c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-06)----------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-07)------------TableScan: aggregate_test_100 projection=[c2, c3, c9]
+01)Sort: aggregate_test_100.c3 ASC NULLS LAST, fetch=5
+02)--Projection: aggregate_test_100.c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum2
+03)----WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------Projection: aggregate_test_100.c3, aggregate_test_100.c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+05)--------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+06)----------TableScan: aggregate_test_100 projection=[c2, c3, c9]
 physical_plan
-01)GlobalLimitExec: skip=0, fetch=5
-02)--SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5
-03)----ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
-04)------LocalLimitExec: fetch=5
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-06)----------SortExec: expr=[c3@0 ASC NULLS LAST,c9@1 DESC], preserve_partitioning=[true]
-07)------------CoalesceBatchesExec: target_batch_size=4096
-08)--------------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)------------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-11)--------------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-12)----------------------SortExec: expr=[c3@1 DESC,c9@2 DESC,c2@0 ASC NULLS LAST], preserve_partitioning=[false]
-13)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], has_header=true
+01)SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5
+02)--ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+04)------SortExec: expr=[c3@0 ASC NULLS LAST,c9@1 DESC], preserve_partitioning=[true]
+05)--------CoalesceBatchesExec: target_batch_size=4096
+06)----------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2
+07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)--------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+10)------------------SortExec: expr=[c3@1 DESC,c9@2 DESC,c2@0 ASC NULLS LAST], preserve_partitioning=[false]
+11)--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], has_header=true
 
 
 
@@ -2044,9 +2040,8 @@ EXPLAIN SELECT ARRAY_AGG(c13) as array_agg1 FROM (SELECT * FROM aggregate_test_1
 logical_plan
 01)Projection: array_agg(aggregate_test_100.c13) AS array_agg1
 02)--Aggregate: groupBy=[[]], aggr=[[array_agg(aggregate_test_100.c13)]]
-03)----Limit: skip=0, fetch=1
-04)------Sort: aggregate_test_100.c13 ASC NULLS LAST, fetch=1
-05)--------TableScan: aggregate_test_100 projection=[c13]
+03)----Sort: aggregate_test_100.c13 ASC NULLS LAST, fetch=1
+04)------TableScan: aggregate_test_100 projection=[c13]
 physical_plan
 01)ProjectionExec: expr=[array_agg(aggregate_test_100.c13)@0 as array_agg1]
 02)--AggregateExec: mode=Final, gby=[], aggr=[array_agg(aggregate_test_100.c13)]
@@ -2102,15 +2097,14 @@ EXPLAIN SELECT
     LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: aggregate_test_100.c9 ASC NULLS LAST, fetch=5
-03)----Projection: aggregate_test_100.c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sum2, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS sum3, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS sum4
-04)------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
-05)--------Projection: aggregate_test_100.c1, aggregate_test_100.c2, aggregate_test_100.c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING
-06)----------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]
-07)------------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
-08)--------------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]
-09)----------------TableScan: aggregate_test_100 projection=[c1, c2, c8, c9]
+01)Sort: aggregate_test_100.c9 ASC NULLS LAST, fetch=5
+02)--Projection: aggregate_test_100.c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sum2, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS sum3, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS sum4
+03)----WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
+04)------Projection: aggregate_test_100.c1, aggregate_test_100.c2, aggregate_test_100.c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING
+05)--------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]
+06)----------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
+07)------------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]
+08)--------------TableScan: aggregate_test_100 projection=[c1, c2, c8, c9]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
@@ -2164,9 +2158,8 @@ logical_plan
 07)------------WindowAggr: windowExpr=[[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]]
 08)--------------WindowAggr: windowExpr=[[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]
 09)----------------SubqueryAlias: t1
-10)------------------Sort: aggregate_test_100.c9 ASC NULLS LAST
-11)--------------------Projection: aggregate_test_100.c1, aggregate_test_100.c2, aggregate_test_100.c8, aggregate_test_100.c9, aggregate_test_100.c1 AS c1_alias
-12)----------------------TableScan: aggregate_test_100 projection=[c1, c2, c8, c9]
+10)------------------Projection: aggregate_test_100.c1, aggregate_test_100.c2, aggregate_test_100.c8, aggregate_test_100.c9, aggregate_test_100.c1 AS c1_alias
+11)--------------------TableScan: aggregate_test_100 projection=[c1, c2, c8, c9]
 physical_plan
 01)ProjectionExec: expr=[c9@1 as c9, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
 02)--GlobalLimitExec: skip=0, fetch=5
@@ -2206,13 +2199,12 @@ EXPLAIN SELECT SUM(c12) OVER(ORDER BY c1, c2 GROUPS BETWEEN 1 PRECEDING AND 1 FO
 ----
 logical_plan
 01)Projection: sum1, sum2
-02)--Limit: skip=0, fetch=5
-03)----Sort: aggregate_test_100.c9 ASC NULLS LAST, fetch=5
-04)------Projection: sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS sum1, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING AS sum2, aggregate_test_100.c9
-05)--------WindowAggr: windowExpr=[[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING]]
-06)----------Projection: aggregate_test_100.c1, aggregate_test_100.c9, aggregate_test_100.c12, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING
-07)------------WindowAggr: windowExpr=[[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING]]
-08)--------------TableScan: aggregate_test_100 projection=[c1, c2, c9, c12]
+02)--Sort: aggregate_test_100.c9 ASC NULLS LAST, fetch=5
+03)----Projection: sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS sum1, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING AS sum2, aggregate_test_100.c9
+04)------WindowAggr: windowExpr=[[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING]]
+05)--------Projection: aggregate_test_100.c1, aggregate_test_100.c9, aggregate_test_100.c12, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING
+06)----------WindowAggr: windowExpr=[[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING]]
+07)------------TableScan: aggregate_test_100 projection=[c1, c2, c9, c12]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2]
 02)--SortExec: TopK(fetch=5), expr=[c9@2 ASC NULLS LAST], preserve_partitioning=[false]
@@ -2246,12 +2238,10 @@ EXPLAIN SELECT c9, rn1 FROM (SELECT c9,
    LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: rn1 ASC NULLS LAST, fetch=5
-03)----Sort: aggregate_test_100.c9 ASC NULLS LAST
-04)------Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-06)----------TableScan: aggregate_test_100 projection=[c9]
+01)Sort: rn1 ASC NULLS LAST, fetch=5
+02)--Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+03)----WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
@@ -2285,12 +2275,10 @@ EXPLAIN SELECT c9, rn1 FROM (SELECT c9,
    LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: rn1 ASC NULLS LAST, fetch=5
-03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-06)----------TableScan: aggregate_test_100 projection=[c9]
+01)Sort: rn1 ASC NULLS LAST, fetch=5
+02)--Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+03)----WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
@@ -2324,12 +2312,10 @@ EXPLAIN SELECT c9, rn1 FROM (SELECT c9,
    LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: rn1 DESC NULLS FIRST, fetch=5
-03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-06)----------TableScan: aggregate_test_100 projection=[c9]
+01)Sort: rn1 DESC NULLS FIRST, fetch=5
+02)--Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+03)----WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[rn1@1 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
@@ -2366,12 +2352,10 @@ EXPLAIN SELECT c9, rn1 FROM (SELECT c9,
        LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: rn1 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST, fetch=5
-03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-06)----------TableScan: aggregate_test_100 projection=[c9]
+01)Sort: rn1 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST, fetch=5
+02)--Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+03)----WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST,c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
@@ -2419,12 +2403,10 @@ EXPLAIN SELECT c9, rn1 FROM (SELECT c9,
        LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: rn1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST, fetch=5
-03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-06)----------TableScan: aggregate_test_100 projection=[c9]
+01)Sort: rn1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST, fetch=5
+02)--Projection: aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+03)----WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
@@ -2443,12 +2425,10 @@ EXPLAIN SELECT c5, c9, rn1 FROM (SELECT c5, c9,
        LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: rn1 ASC NULLS LAST, CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST, fetch=5
-03)----Sort: CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c5, aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-06)----------TableScan: aggregate_test_100 projection=[c5, c9]
+01)Sort: rn1 ASC NULLS LAST, CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST, fetch=5
+02)--Projection: aggregate_test_100.c5, aggregate_test_100.c9, row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+03)----WindowAggr: windowExpr=[[row_number() ORDER BY [CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------TableScan: aggregate_test_100 projection=[c5, c9]
 physical_plan
 01)ProjectionExec: expr=[c5@0 as c5, c9@1 as c9, row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
@@ -2466,12 +2446,10 @@ EXPLAIN SELECT c9, rn1 FROM (SELECT c9,
        LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: rn1 ASC NULLS LAST, fetch=5
-03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, CAST(row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS Int64) AS rn1
-05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-06)----------TableScan: aggregate_test_100 projection=[c9]
+01)Sort: rn1 ASC NULLS LAST, fetch=5
+02)--Projection: aggregate_test_100.c9, CAST(row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS Int64) AS rn1
+03)----WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, CAST(row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 AS Int64) as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
@@ -2568,15 +2546,14 @@ EXPLAIN SELECT
 ----
 logical_plan
 01)Projection: sum1, sum2, sum3, min1, min2, min3, max1, max2, max3, cnt1, cnt2, sumr1, sumr2, sumr3, minr1, minr2, minr3, maxr1, maxr2, maxr3, cntr1, cntr2, sum4, cnt3
-02)--Limit: skip=0, fetch=5
-03)----Sort: annotated_data_finite.inc_col DESC NULLS FIRST, fetch=5
-04)------Projection: sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS sum3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS min1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS min2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS min3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS max1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS max2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS max3, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING AS cnt1, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING AS sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING AS sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sumr3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS minr1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS minr2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS minr3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS maxr1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS maxr2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS maxr3, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS cntr1, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS sum4, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cnt3, annotated_data_finite.inc_col
-05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
-06)----------Projection: __common_expr_1, annotated_data_finite.inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING
-07)------------WindowAggr: windowExpr=[[sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
-08)--------------WindowAggr: windowExpr=[[sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
-09)----------------Projection: CAST(annotated_data_finite.desc_col AS Int64) AS __common_expr_1, CAST(annotated_data_finite.inc_col AS Int64) AS __common_expr_2, annotated_data_finite.ts, annotated_data_finite.inc_col, annotated_data_finite.desc_col
-10)------------------TableScan: annotated_data_finite projection=[ts, inc_col, desc_col]
+02)--Sort: annotated_data_finite.inc_col DESC NULLS FIRST, fetch=5
+03)----Projection: sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS sum3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS min1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS min2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS min3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS max1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS max2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS max3, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING AS cnt1, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING AS sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING AS sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS sumr3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS minr1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS minr2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS minr3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS maxr1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING AS maxr2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING AS maxr3, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS cntr1, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS sum4, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS cnt3, annotated_data_finite.inc_col
+04)------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
+05)--------Projection: __common_expr_1, annotated_data_finite.inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING
+06)----------WindowAggr: windowExpr=[[sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
+07)------------WindowAggr: windowExpr=[[sum(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING AS count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]]
+08)--------------Projection: CAST(annotated_data_finite.desc_col AS Int64) AS __common_expr_1, CAST(annotated_data_finite.inc_col AS Int64) AS __common_expr_2, annotated_data_finite.ts, annotated_data_finite.inc_col, annotated_data_finite.desc_col
+09)----------------TableScan: annotated_data_finite projection=[ts, inc_col, desc_col]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, sum3@2 as sum3, min1@3 as min1, min2@4 as min2, min3@5 as min3, max1@6 as max1, max2@7 as max2, max3@8 as max3, cnt1@9 as cnt1, cnt2@10 as cnt2, sumr1@11 as sumr1, sumr2@12 as sumr2, sumr3@13 as sumr3, minr1@14 as minr1, minr2@15 as minr2, minr3@16 as minr3, maxr1@17 as maxr1, maxr2@18 as maxr2, maxr3@19 as maxr3, cntr1@20 as cntr1, cntr2@21 as cntr2, sum4@22 as sum4, cnt3@23 as cnt3]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@24 DESC], preserve_partitioning=[false]
@@ -2659,12 +2636,11 @@ EXPLAIN SELECT
   LIMIT 5;
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: annotated_data_finite.ts DESC NULLS FIRST, fetch=5
-03)----Projection: annotated_data_finite.ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS leadr2
-04)------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
-05)--------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
-06)----------TableScan: annotated_data_finite projection=[ts, inc_col]
+01)Sort: annotated_data_finite.ts DESC NULLS FIRST, fetch=5
+02)--Projection: annotated_data_finite.ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS leadr2
+03)----WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
+04)------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
+05)--------TableScan: annotated_data_finite projection=[ts, inc_col]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[ts@0 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2]
@@ -2730,13 +2706,12 @@ EXPLAIN SELECT
 ----
 logical_plan
 01)Projection: sum1, sum2, min1, min2, max1, max2, count1, count2, avg1, avg2
-02)--Limit: skip=0, fetch=5
-03)----Sort: annotated_data_finite.inc_col ASC NULLS LAST, fetch=5
-04)------Projection: sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS avg2, annotated_data_finite.inc_col
-05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING]]
-06)----------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, avg(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING]]
-07)------------Projection: CAST(annotated_data_finite.inc_col AS Int64) AS __common_expr_1, CAST(annotated_data_finite.inc_col AS Float64) AS __common_expr_2, annotated_data_finite.ts, annotated_data_finite.inc_col
-08)--------------TableScan: annotated_data_finite projection=[ts, inc_col]
+02)--Sort: annotated_data_finite.inc_col ASC NULLS LAST, fetch=5
+03)----Projection: sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING AS avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS avg2, annotated_data_finite.inc_col
+04)------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING]]
+05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, avg(__common_expr_2 AS annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING]]
+06)----------Projection: CAST(annotated_data_finite.inc_col AS Int64) AS __common_expr_1, CAST(annotated_data_finite.inc_col AS Float64) AS __common_expr_2, annotated_data_finite.ts, annotated_data_finite.inc_col
+07)------------TableScan: annotated_data_finite projection=[ts, inc_col]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, min1@2 as min1, min2@3 as min2, max1@4 as max1, max2@5 as max2, count1@6 as count1, count2@7 as count2, avg1@8 as avg1, avg2@9 as avg2]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@10 ASC NULLS LAST], preserve_partitioning=[false]
@@ -2784,12 +2759,11 @@ EXPLAIN SELECT
 ----
 logical_plan
 01)Projection: first_value1, first_value2, last_value1, last_value2, nth_value1
-02)--Limit: skip=0, fetch=5
-03)----Sort: annotated_data_finite.inc_col ASC NULLS LAST, fetch=5
-04)------Projection: first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS first_value1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS first_value2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS last_value1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS last_value2, NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS nth_value1, annotated_data_finite.inc_col
-05)--------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING]]
-06)----------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING]]
-07)------------TableScan: annotated_data_finite projection=[ts, inc_col]
+02)--Sort: annotated_data_finite.inc_col ASC NULLS LAST, fetch=5
+03)----Projection: first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS first_value1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS first_value2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS last_value1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS last_value2, NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS nth_value1, annotated_data_finite.inc_col
+04)------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING]]
+05)--------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING]]
+06)----------TableScan: annotated_data_finite projection=[ts, inc_col]
 physical_plan
 01)ProjectionExec: expr=[first_value1@0 as first_value1, first_value2@1 as first_value2, last_value1@2 as last_value1, last_value2@3 as last_value2, nth_value1@4 as nth_value1]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@5 ASC NULLS LAST], preserve_partitioning=[false]
@@ -2829,13 +2803,12 @@ EXPLAIN SELECT
 ----
 logical_plan
 01)Projection: sum1, sum2, count1, count2
-02)--Limit: skip=0, fetch=5
-03)----Sort: annotated_data_infinite.ts ASC NULLS LAST, fetch=5
-04)------Projection: sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS count2, annotated_data_infinite.ts
-05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING]]
-06)----------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING]]
-07)------------Projection: CAST(annotated_data_infinite.inc_col AS Int64) AS __common_expr_1, annotated_data_infinite.ts, annotated_data_infinite.inc_col
-08)--------------TableScan: annotated_data_infinite projection=[ts, inc_col]
+02)--Sort: annotated_data_infinite.ts ASC NULLS LAST, fetch=5
+03)----Projection: sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS count2, annotated_data_infinite.ts
+04)------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING]]
+05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING]]
+06)----------Projection: CAST(annotated_data_infinite.inc_col AS Int64) AS __common_expr_1, annotated_data_infinite.ts, annotated_data_infinite.inc_col
+07)------------TableScan: annotated_data_infinite projection=[ts, inc_col]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, count1@2 as count1, count2@3 as count2]
 02)--ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2, ts@1 as ts]
@@ -2876,13 +2849,12 @@ EXPLAIN SELECT
 ----
 logical_plan
 01)Projection: sum1, sum2, count1, count2
-02)--Limit: skip=0, fetch=5
-03)----Sort: annotated_data_infinite.ts ASC NULLS LAST, fetch=5
-04)------Projection: sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS count2, annotated_data_infinite.ts
-05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING]]
-06)----------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING]]
-07)------------Projection: CAST(annotated_data_infinite.inc_col AS Int64) AS __common_expr_1, annotated_data_infinite.ts, annotated_data_infinite.inc_col
-08)--------------TableScan: annotated_data_infinite projection=[ts, inc_col]
+02)--Sort: annotated_data_infinite.ts ASC NULLS LAST, fetch=5
+03)----Projection: sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING AS count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING AS count2, annotated_data_infinite.ts
+04)------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING]]
+05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING]]
+06)----------Projection: CAST(annotated_data_infinite.inc_col AS Int64) AS __common_expr_1, annotated_data_infinite.ts, annotated_data_infinite.inc_col
+07)------------TableScan: annotated_data_infinite projection=[ts, inc_col]
 physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, count1@2 as count1, count2@3 as count2]
 02)--ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2, ts@1 as ts]
@@ -3042,17 +3014,16 @@ EXPLAIN SELECT a, b, c,
  LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: annotated_data_finite2.c ASC NULLS LAST, fetch=5
-03)----Projection: annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING AS sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING AS sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING AS sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING AS sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW AS sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING AS sum12
-04)------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING]]
-05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING]]
-06)----------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
-07)------------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING]]
-08)--------------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW]]
-09)----------------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
-10)------------------Projection: CAST(annotated_data_finite2.c AS Int64) AS __common_expr_1, annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, annotated_data_finite2.d
-11)--------------------TableScan: annotated_data_finite2 projection=[a, b, c, d]
+01)Sort: annotated_data_finite2.c ASC NULLS LAST, fetch=5
+02)--Projection: annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING AS sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING AS sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING AS sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING AS sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW AS sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING AS sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING AS sum12
+03)----WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING]]
+04)------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING]]
+05)--------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
+06)----------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING]]
+07)------------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW]]
+08)--------------WindowAggr: windowExpr=[[sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(__common_expr_1 AS annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING]]
+09)----------------Projection: CAST(annotated_data_finite2.c AS Int64) AS __common_expr_1, annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.c, annotated_data_finite2.d
+10)------------------TableScan: annotated_data_finite2 projection=[a, b, c, d]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c@2 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
@@ -3121,19 +3092,17 @@ EXPLAIN SELECT * FROM (SELECT *, ROW_NUMBER() OVER(ORDER BY a ASC) as rn1
 ----
 logical_plan
 01)Sort: rn1 ASC NULLS LAST
-02)--Filter: rn1 < UInt64(50)
-03)----Limit: skip=0, fetch=5
-04)------Sort: rn1 ASC NULLS LAST, fetch=5
-05)--------Projection: annotated_data_infinite2.a0, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
-06)----------WindowAggr: windowExpr=[[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-07)------------TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d]
+02)--Sort: rn1 ASC NULLS LAST, fetch=5
+03)----Projection: annotated_data_infinite2.a0, annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.c, annotated_data_infinite2.d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1
+04)------Filter: row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW < UInt64(50)
+05)--------WindowAggr: windowExpr=[[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+06)----------TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=4096
-02)--FilterExec: rn1@5 < 50
-03)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1]
-04)------GlobalLimitExec: skip=0, fetch=5
-05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-06)----------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
+01)ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1]
+02)--CoalesceBatchesExec: target_batch_size=4096, fetch=5
+03)----FilterExec: row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 < 50
+04)------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+05)--------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 
 # this is a negative test for asserting that window functions (other than ROW_NUMBER)
 # are not added to ordering equivalence
@@ -3147,12 +3116,10 @@ EXPLAIN SELECT c9, sum1 FROM (SELECT c9,
        LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: sum1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST, fetch=5
-03)----Sort: aggregate_test_100.c9 DESC NULLS FIRST
-04)------Projection: aggregate_test_100.c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum1
-05)--------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-06)----------TableScan: aggregate_test_100 projection=[c9]
+01)Sort: sum1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST, fetch=5
+02)--Projection: aggregate_test_100.c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum1
+03)----WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[sum1@1 ASC NULLS LAST,c9@0 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum1]
@@ -3332,13 +3299,12 @@ EXPLAIN SELECT
   LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: aggregate_test_100.c3 ASC NULLS LAST, fetch=5
-03)----Projection: aggregate_test_100.c3, max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max1
-04)------WindowAggr: windowExpr=[[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-05)--------Projection: aggregate_test_100.c3, aggregate_test_100.c12, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-06)----------WindowAggr: windowExpr=[[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-07)------------TableScan: aggregate_test_100 projection=[c3, c11, c12]
+01)Sort: aggregate_test_100.c3 ASC NULLS LAST, fetch=5
+02)--Projection: aggregate_test_100.c3, max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max1
+03)----WindowAggr: windowExpr=[[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------Projection: aggregate_test_100.c3, aggregate_test_100.c12, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+05)--------WindowAggr: windowExpr=[[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+06)----------TableScan: aggregate_test_100 projection=[c3, c11, c12]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c3@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c3@0 as c3, max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as max1]
@@ -3378,11 +3344,10 @@ EXPLAIN SELECT
 ----
 logical_plan
 01)Projection: min1, max1
-02)--Limit: skip=0, fetch=5
-03)----Sort: aggregate_test_100.c3 ASC NULLS LAST, fetch=5
-04)------Projection: max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS max1, aggregate_test_100.c3
-05)--------WindowAggr: windowExpr=[[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-06)----------TableScan: aggregate_test_100 projection=[c3, c12]
+02)--Sort: aggregate_test_100.c3 ASC NULLS LAST, fetch=5
+03)----Projection: max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS max1, aggregate_test_100.c3
+04)------WindowAggr: windowExpr=[[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+05)--------TableScan: aggregate_test_100 projection=[c3, c12]
 physical_plan
 01)ProjectionExec: expr=[min1@0 as min1, max1@1 as max1]
 02)--SortExec: TopK(fetch=5), expr=[c3@2 ASC NULLS LAST], preserve_partitioning=[false]
@@ -3576,11 +3541,10 @@ EXPLAIN SELECT c, NTH_VALUE(c, 2) OVER(order by c DESC) as nv1
   LIMIT 5
 ----
 logical_plan
-01)Limit: skip=0, fetch=5
-02)--Sort: multiple_ordered_table.c ASC NULLS LAST, fetch=5
-03)----Projection: multiple_ordered_table.c, NTH_VALUE(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS nv1
-04)------WindowAggr: windowExpr=[[NTH_VALUE(multiple_ordered_table.c, Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-05)--------TableScan: multiple_ordered_table projection=[c]
+01)Sort: multiple_ordered_table.c ASC NULLS LAST, fetch=5
+02)--Projection: multiple_ordered_table.c, NTH_VALUE(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS nv1
+03)----WindowAggr: windowExpr=[[NTH_VALUE(multiple_ordered_table.c, Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------TableScan: multiple_ordered_table projection=[c]
 physical_plan
 01)ProjectionExec: expr=[c@0 as c, NTH_VALUE(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nv1]
 02)--GlobalLimitExec: skip=0, fetch=5

From c60b798cc0596c77127b728d6e8f08831c36ca1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Berkay=20=C5=9Eahin?=
 <124376117+berkaysynnada@users.noreply.github.com>
Date: Mon, 19 Aug 2024 18:49:01 +0300
Subject: [PATCH 336/357] Fix: Remove Unrelated Fields When Expanding Wildcards
 in Functional Dependency Projections (#12060)

* Fix exprlist bug

* Update datafusion/expr/src/utils.rs

Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>

* Update utils.rs

* Update joins.slt

---------

Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
---
 datafusion/expr/src/utils.rs                 | 26 ++++++++-
 datafusion/sqllogictest/test_files/joins.slt | 61 ++++++++++++++++++++
 2 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 4db5061e8fe7d..5f5c468fa2f59 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -838,16 +838,38 @@ pub fn exprlist_len(
                 qualifier: Some(qualifier),
                 options,
             } => {
+                let related_wildcard_schema = wildcard_schema.as_ref().map_or_else(
+                    || Ok(Arc::clone(schema)),
+                    |schema| {
+                        // Eliminate the fields coming from other tables.
+                        let qualified_fields = schema
+                            .fields()
+                            .iter()
+                            .enumerate()
+                            .filter_map(|(idx, field)| {
+                                let (maybe_table_ref, _) = schema.qualified_field(idx);
+                                if maybe_table_ref.map_or(true, |q| q == qualifier) {
+                                    Some((maybe_table_ref.cloned(), Arc::clone(field)))
+                                } else {
+                                    None
+                                }
+                            })
+                            .collect::<Vec<_>>();
+                        let metadata = schema.metadata().clone();
+                        DFSchema::new_with_metadata(qualified_fields, metadata)
+                            .map(Arc::new)
+                    },
+                )?;
                 let excluded = get_excluded_columns(
                     options.exclude.as_ref(),
                     options.except.as_ref(),
-                    wildcard_schema.unwrap_or(schema),
+                    related_wildcard_schema.as_ref(),
                     Some(qualifier),
                 )?
                 .into_iter()
                 .collect::<HashSet<Column>>();
                 Ok(
-                    get_exprs_except_skipped(wildcard_schema.unwrap_or(schema), excluded)
+                    get_exprs_except_skipped(related_wildcard_schema.as_ref(), excluded)
                         .len(),
                 )
             }
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 9f036c387033f..7af145fe3e818 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -4044,3 +4044,64 @@ physical_plan
 03)----MemoryExec: partitions=1, partition_sizes=[1]
 04)----SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)------MemoryExec: partitions=1, partition_sizes=[1]
+
+
+# Functional dependencies across a join
+statement ok
+CREATE TABLE sales_global (
+    ts TIMESTAMP,
+    sn INTEGER,
+    amount INTEGER,
+    currency VARCHAR NOT NULL,
+    primary key(sn)
+);
+
+statement ok
+CREATE TABLE exchange_rates (
+  ts TIMESTAMP,
+  sn INTEGER,
+  currency_from VARCHAR NOT NULL,
+  currency_to VARCHAR NOT NULL,
+  rate FLOAT,
+  primary key(sn)
+);
+
+query TT
+EXPLAIN SELECT s.*, s.amount * LAST_VALUE(e.rate) AS amount_usd
+FROM sales_global AS s
+JOIN exchange_rates AS e
+ON s.currency = e.currency_from AND
+   e.currency_to = 'USD' AND
+   s.ts >= e.ts
+GROUP BY s.sn
+ORDER BY s.sn
+----
+logical_plan
+01)Sort: s.sn ASC NULLS LAST
+02)--Projection: s.ts, s.sn, s.amount, s.currency, CAST(s.amount AS Float32) * last_value(e.rate) AS amount_usd
+03)----Aggregate: groupBy=[[s.sn, s.ts, s.amount, s.currency]], aggr=[[last_value(e.rate)]]
+04)------Projection: s.ts, s.sn, s.amount, s.currency, e.rate
+05)--------Inner Join: s.currency = e.currency_from Filter: s.ts >= e.ts
+06)----------SubqueryAlias: s
+07)------------TableScan: sales_global projection=[ts, sn, amount, currency]
+08)----------SubqueryAlias: e
+09)------------Projection: exchange_rates.ts, exchange_rates.currency_from, exchange_rates.rate
+10)--------------Filter: exchange_rates.currency_to = Utf8("USD")
+11)----------------TableScan: exchange_rates projection=[ts, currency_from, currency_to, rate]
+physical_plan
+01)SortExec: expr=[sn@1 ASC NULLS LAST], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[ts@1 as ts, sn@0 as sn, amount@2 as amount, currency@3 as currency, CAST(amount@2 AS Float32) * last_value(e.rate)@4 as amount_usd]
+03)----AggregateExec: mode=Single, gby=[sn@1 as sn, ts@0 as ts, amount@2 as amount, currency@3 as currency], aggr=[last_value(e.rate)]
+04)------CoalesceBatchesExec: target_batch_size=3
+05)--------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(currency@3, currency_from@1)], filter=ts@0 >= ts@1, projection=[ts@0, sn@1, amount@2, currency@3, rate@6]
+06)----------MemoryExec: partitions=1, partition_sizes=[0]
+07)----------ProjectionExec: expr=[ts@0 as ts, currency_from@1 as currency_from, rate@3 as rate]
+08)------------CoalesceBatchesExec: target_batch_size=3
+09)--------------FilterExec: currency_to@2 = USD
+10)----------------MemoryExec: partitions=1, partition_sizes=[0]
+
+statement ok
+DROP TABLE sales_global;
+
+statement ok
+DROP TABLE exchange_rates;

From a3c0c19b8aedcd0bcee55abcb1ebcd58e7c4cc7e Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Mon, 19 Aug 2024 17:59:18 +0200
Subject: [PATCH 337/357] Update async-trait in CLI and catalog crates (#12061)

Update async-trait version in CLI and catalog crates and pin to the
version defined in the root Cargo file where possible.
---
 datafusion-cli/Cargo.toml     | 2 +-
 datafusion/catalog/Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index cbd9ffd0febab..252d056e8b83f 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -31,7 +31,7 @@ readme = "README.md"
 
 [dependencies]
 arrow = { version = "52.2.0" }
-async-trait = "0.1.41"
+async-trait = "0.1.73"
 aws-config = "0.55"
 aws-credential-types = "0.55"
 clap = { version = "3", features = ["derive", "cargo"] }
diff --git a/datafusion/catalog/Cargo.toml b/datafusion/catalog/Cargo.toml
index ff28d8e0c64a6..533bd1eeba08d 100644
--- a/datafusion/catalog/Cargo.toml
+++ b/datafusion/catalog/Cargo.toml
@@ -29,7 +29,7 @@ version.workspace = true
 
 [dependencies]
 arrow-schema = { workspace = true }
-async-trait = "0.1.41"
+async-trait = { workspace = true }
 datafusion-common = { workspace = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }

From b77917747fc24d45173d4aed323302f4fda77a12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= <lewiszlw520@gmail.com>
Date: Tue, 20 Aug 2024 00:00:29 +0800
Subject: [PATCH 338/357] Improve doc (#12053)

---
 datafusion/expr-common/src/accumulator.rs | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/datafusion/expr-common/src/accumulator.rs b/datafusion/expr-common/src/accumulator.rs
index eac91c4f8efce..75335209451e1 100644
--- a/datafusion/expr-common/src/accumulator.rs
+++ b/datafusion/expr-common/src/accumulator.rs
@@ -117,8 +117,8 @@ pub trait Accumulator: Send + Sync + Debug {
     /// ┌─────────────────────────┐      ┌─────────────────────────┐
     /// │        GroubyBy         │      │        GroubyBy         │
     /// │(AggregateMode::Partial) │      │(AggregateMode::Partial) │
-    /// └─────────────────────────┘      └────────────▲────────────┘
-    ///              ▲                                │
+    /// └─────────────────────────┘      └─────────────────────────┘
+    ///              ▲                                ▲
     ///              │                                │    update_batch() is called for
     ///              │                                │    each input RecordBatch
     ///         .─────────.                      .─────────.
@@ -185,15 +185,15 @@ pub trait Accumulator: Send + Sync + Debug {
     /// │(AggregateMode::Partial) │  │ (AggregateMode::Partial) │     the groups
     /// └─────────────────────────┘  └──────────────────────────┘
     ///              ▲                             ▲
-    ///              │                            ┌┘
-    ///              │                            │
-    ///         .─────────.                  .─────────.
-    ///      ,─'           '─.            ,─'           '─.
-    ///     ;      Input      :          ;      Input      :          1. Since input data is
-    ///     :   Partition 0   ;          :   Partition 1   ;          arbitrarily or RoundRobin
-    ///      ╲               ╱            ╲               ╱           distributed, each partition
-    ///       '─.         ,─'              '─.         ,─'            likely has all distinct
-    ///          `───────'                    `───────'
+    ///              │                             │
+    ///              │                             │
+    ///         .─────────.                   .─────────.
+    ///      ,─'           '─.             ,─'           '─.
+    ///     ;      Input      :           ;      Input      :         1. Since input data is
+    ///     :   Partition 0   ;           :   Partition 1   ;         arbitrarily or RoundRobin
+    ///      ╲               ╱             ╲               ╱          distributed, each partition
+    ///       '─.         ,─'               '─.         ,─'           likely has all distinct
+    ///          `───────'                     `───────'
     /// ```
     ///
     /// This structure is used so that the `AggregateMode::Partial` accumulators

From 159ab179ffc33e6aae9bdb9f7bbeab1d0dd41187 Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Tue, 20 Aug 2024 01:07:41 +0800
Subject: [PATCH 339/357] Minor: remove clones in `coerce_plan_expr_for_schema`
 (#12051)

---
 datafusion/expr/src/expr_rewriter/mod.rs           | 12 +++++-------
 datafusion/expr/src/logical_plan/builder.rs        |  2 +-
 datafusion/optimizer/src/analyzer/type_coercion.rs |  5 +++--
 datafusion/optimizer/src/eliminate_nested_union.rs |  4 ++--
 datafusion/optimizer/src/eliminate_one_union.rs    |  2 +-
 5 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 32e621350ee24..c26970cb053a1 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -207,27 +207,25 @@ pub fn strip_outer_reference(expr: Expr) -> Expr {
 /// Returns plan with expressions coerced to types compatible with
 /// schema types
 pub fn coerce_plan_expr_for_schema(
-    plan: &LogicalPlan,
+    plan: LogicalPlan,
     schema: &DFSchema,
 ) -> Result<LogicalPlan> {
     match plan {
         // special case Projection to avoid adding multiple projections
         LogicalPlan::Projection(Projection { expr, input, .. }) => {
-            let new_exprs =
-                coerce_exprs_for_schema(expr.clone(), input.schema(), schema)?;
-            let projection = Projection::try_new(new_exprs, Arc::clone(input))?;
+            let new_exprs = coerce_exprs_for_schema(expr, input.schema(), schema)?;
+            let projection = Projection::try_new(new_exprs, input)?;
             Ok(LogicalPlan::Projection(projection))
         }
         _ => {
             let exprs: Vec<Expr> = plan.schema().iter().map(Expr::from).collect();
-
             let new_exprs = coerce_exprs_for_schema(exprs, plan.schema(), schema)?;
             let add_project = new_exprs.iter().any(|expr| expr.try_as_col().is_none());
             if add_project {
-                let projection = Projection::try_new(new_exprs, Arc::new(plan.clone()))?;
+                let projection = Projection::try_new(new_exprs, Arc::new(plan))?;
                 Ok(LogicalPlan::Projection(projection))
             } else {
-                Ok(plan.clone())
+                Ok(plan)
             }
         }
     }
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index aa28c1c192429..a96caa03d6110 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -147,7 +147,7 @@ impl LogicalPlanBuilder {
         }
         // Ensure that the recursive term has the same field types as the static term
         let coerced_recursive_term =
-            coerce_plan_expr_for_schema(&recursive_term, self.plan.schema())?;
+            coerce_plan_expr_for_schema(recursive_term, self.plan.schema())?;
         Ok(Self::from(LogicalPlan::RecursiveQuery(RecursiveQuery {
             name,
             static_term: Arc::new(self.plan.clone()),
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 40efbba6de7a5..7251a95d77d65 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -181,9 +181,10 @@ impl<'a> TypeCoercionRewriter<'a> {
         let union_schema = Arc::new(coerce_union_schema(&union_plan.inputs)?);
         let new_inputs = union_plan
             .inputs
-            .iter()
+            .into_iter()
             .map(|p| {
-                let plan = coerce_plan_expr_for_schema(p, &union_schema)?;
+                let plan =
+                    coerce_plan_expr_for_schema(Arc::unwrap_or_clone(p), &union_schema)?;
                 match plan {
                     LogicalPlan::Projection(Projection { expr, input, .. }) => {
                         Ok(Arc::new(project_with_column_index(
diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs
index 5f41e4f137b15..5d7895bba4d87 100644
--- a/datafusion/optimizer/src/eliminate_nested_union.rs
+++ b/datafusion/optimizer/src/eliminate_nested_union.rs
@@ -60,7 +60,7 @@ impl OptimizerRule for EliminateNestedUnion {
                 let inputs = inputs
                     .into_iter()
                     .flat_map(extract_plans_from_union)
-                    .map(|plan| coerce_plan_expr_for_schema(&plan, &schema))
+                    .map(|plan| coerce_plan_expr_for_schema(plan, &schema))
                     .collect::<Result<Vec<_>>>()?;
 
                 Ok(Transformed::yes(LogicalPlan::Union(Union {
@@ -75,7 +75,7 @@ impl OptimizerRule for EliminateNestedUnion {
                             .into_iter()
                             .map(extract_plan_from_distinct)
                             .flat_map(extract_plans_from_union)
-                            .map(|plan| coerce_plan_expr_for_schema(&plan, &schema))
+                            .map(|plan| coerce_plan_expr_for_schema(plan, &schema))
                             .collect::<Result<Vec<_>>>()?;
 
                         Ok(Transformed::yes(LogicalPlan::Distinct(Distinct::All(
diff --git a/datafusion/optimizer/src/eliminate_one_union.rs b/datafusion/optimizer/src/eliminate_one_union.rs
index 5e37b8cf7c1fa..43024107c4f81 100644
--- a/datafusion/optimizer/src/eliminate_one_union.rs
+++ b/datafusion/optimizer/src/eliminate_one_union.rs
@@ -107,7 +107,7 @@ mod tests {
     #[test]
     fn eliminate_one_union() -> Result<()> {
         let table_plan = coerce_plan_expr_for_schema(
-            &table_scan(Some("table"), &schema(), None)?.build()?,
+            table_scan(Some("table"), &schema(), None)?.build()?,
             &schema().to_dfschema()?,
         )?;
         let schema = Arc::clone(table_plan.schema());

From 7c5a8ebc325bf14a38b2a713b60334794ff9469c Mon Sep 17 00:00:00 2001
From: Eduard Karacharov <eduard.karacharov@gmail.com>
Date: Mon, 19 Aug 2024 20:26:27 +0300
Subject: [PATCH 340/357] fix: support min/max for Float16 type (#12050)

* fix: support min/max for Float16 type

* minor: uncomment arrow_typeof float16 in sqllocigtests
---
 datafusion-cli/Cargo.lock                     |  1 +
 datafusion/functions-aggregate/Cargo.toml     |  1 +
 datafusion/functions-aggregate/src/min_max.rs | 34 ++++++++++++++-----
 .../sqllogictest/test_files/aggregate.slt     | 28 +++++++++++++++
 .../sqllogictest/test_files/arrow_typeof.slt  | 16 ++++-----
 5 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 52e4a000355d7..b5637f785fb2d 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1340,6 +1340,7 @@ dependencies = [
  "datafusion-functions-aggregate-common",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
+ "half",
  "log",
  "paste",
  "sqlparser",
diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml
index 636b2e42d236c..d78f68a2604e7 100644
--- a/datafusion/functions-aggregate/Cargo.toml
+++ b/datafusion/functions-aggregate/Cargo.toml
@@ -47,6 +47,7 @@ datafusion-expr = { workspace = true }
 datafusion-functions-aggregate-common = { workspace = true }
 datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
+half = { workspace = true }
 log = { workspace = true }
 paste = "1.0.14"
 sqlparser = { workspace = true }
diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
index 4dcd5ac0e9515..961e8639604c8 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -34,18 +34,19 @@
 
 use arrow::array::{
     ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, Decimal256Array, Float32Array, Float64Array, Int16Array, Int32Array,
-    Int64Array, Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray,
-    IntervalYearMonthArray, LargeBinaryArray, LargeStringArray, StringArray,
-    StringViewArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
-    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
-    UInt64Array, UInt8Array,
+    Decimal128Array, Decimal256Array, Float16Array, Float32Array, Float64Array,
+    Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
+    IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray,
+    LargeStringArray, StringArray, StringViewArray, Time32MillisecondArray,
+    Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
+    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
 };
 use arrow::compute;
 use arrow::datatypes::{
-    DataType, Decimal128Type, Decimal256Type, Float32Type, Float64Type, Int16Type,
-    Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    DataType, Decimal128Type, Decimal256Type, Float16Type, Float32Type, Float64Type,
+    Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type,
+    UInt8Type,
 };
 use arrow_schema::IntervalUnit;
 use datafusion_common::{
@@ -66,6 +67,7 @@ use datafusion_expr::GroupsAccumulator;
 use datafusion_expr::{
     function::AccumulatorArgs, Accumulator, AggregateUDFImpl, Signature, Volatility,
 };
+use half::f16;
 use std::ops::Deref;
 
 fn get_min_max_result_type(input_types: &[DataType]) -> Result<Vec<DataType>> {
@@ -181,6 +183,7 @@ impl AggregateUDFImpl for Max {
                 | UInt16
                 | UInt32
                 | UInt64
+                | Float16
                 | Float32
                 | Float64
                 | Decimal128(_, _)
@@ -209,6 +212,9 @@ impl AggregateUDFImpl for Max {
             UInt16 => instantiate_max_accumulator!(data_type, u16, UInt16Type),
             UInt32 => instantiate_max_accumulator!(data_type, u32, UInt32Type),
             UInt64 => instantiate_max_accumulator!(data_type, u64, UInt64Type),
+            Float16 => {
+                instantiate_max_accumulator!(data_type, f16, Float16Type)
+            }
             Float32 => {
                 instantiate_max_accumulator!(data_type, f32, Float32Type)
             }
@@ -339,6 +345,9 @@ macro_rules! min_max_batch {
             DataType::Float32 => {
                 typed_min_max_batch!($VALUES, Float32Array, Float32, $OP)
             }
+            DataType::Float16 => {
+                typed_min_max_batch!($VALUES, Float16Array, Float16, $OP)
+            }
             DataType::Int64 => typed_min_max_batch!($VALUES, Int64Array, Int64, $OP),
             DataType::Int32 => typed_min_max_batch!($VALUES, Int32Array, Int32, $OP),
             DataType::Int16 => typed_min_max_batch!($VALUES, Int16Array, Int16, $OP),
@@ -623,6 +632,9 @@ macro_rules! min_max {
             (ScalarValue::Float32(lhs), ScalarValue::Float32(rhs)) => {
                 typed_min_max_float!(lhs, rhs, Float32, $OP)
             }
+            (ScalarValue::Float16(lhs), ScalarValue::Float16(rhs)) => {
+                typed_min_max_float!(lhs, rhs, Float16, $OP)
+            }
             (ScalarValue::UInt64(lhs), ScalarValue::UInt64(rhs)) => {
                 typed_min_max!(lhs, rhs, UInt64, $OP)
             }
@@ -950,6 +962,7 @@ impl AggregateUDFImpl for Min {
                 | UInt16
                 | UInt32
                 | UInt64
+                | Float16
                 | Float32
                 | Float64
                 | Decimal128(_, _)
@@ -978,6 +991,9 @@ impl AggregateUDFImpl for Min {
             UInt16 => instantiate_min_accumulator!(data_type, u16, UInt16Type),
             UInt32 => instantiate_min_accumulator!(data_type, u32, UInt32Type),
             UInt64 => instantiate_min_accumulator!(data_type, u64, UInt64Type),
+            Float16 => {
+                instantiate_min_accumulator!(data_type, f16, Float16Type)
+            }
             Float32 => {
                 instantiate_min_accumulator!(data_type, f32, Float32Type)
             }
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index ce4d16bb69383..b8b93b28aff61 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -5642,3 +5642,31 @@ query I??III?T
 select count(null), min(null), max(null), bit_and(NULL), bit_or(NULL), bit_xor(NULL), nth_value(NULL, 1), string_agg(NULL, ',');
 ----
 0 NULL NULL NULL NULL NULL NULL NULL
+
+# test min/max Float16 without group expression
+query RRTT
+WITH data AS (
+  SELECT arrow_cast(1, 'Float16') AS f
+  UNION ALL
+  SELECT arrow_cast(6, 'Float16') AS f
+)
+SELECT MIN(f), MAX(f), arrow_typeof(MIN(f)), arrow_typeof(MAX(f)) FROM data;
+----
+1 6 Float16 Float16
+
+# test min/max Float16 with group expression
+query IRRTT
+WITH data AS (
+  SELECT 1 as k, arrow_cast(1.8125, 'Float16') AS f
+  UNION ALL
+  SELECT 1 as k, arrow_cast(6.8007813, 'Float16') AS f
+  UNION ALL
+  SELECT 2 AS k, arrow_cast(8.5, 'Float16') AS f
+)
+SELECT k, MIN(f), MAX(f), arrow_typeof(MIN(f)), arrow_typeof(MAX(f))
+FROM data
+GROUP BY k
+ORDER BY k;
+----
+1 1.8125 6.8007813 Float16 Float16
+2 8.5 8.5 Float16 Float16
diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt
index 448706744305a..d1f49838f9322 100644
--- a/datafusion/sqllogictest/test_files/arrow_typeof.slt
+++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt
@@ -102,7 +102,7 @@ query error Error unrecognized word: unknown
 SELECT arrow_cast('1', 'unknown')
 
 # Round Trip tests:
-query TTTTTTTTTTTTTTTTTTTTTTT
+query TTTTTTTTTTTTTTTTTTTTTTTT
 SELECT
   arrow_typeof(arrow_cast(1, 'Int8')) as col_i8,
   arrow_typeof(arrow_cast(1, 'Int16')) as col_i16,
@@ -112,8 +112,7 @@ SELECT
   arrow_typeof(arrow_cast(1, 'UInt16')) as col_u16,
   arrow_typeof(arrow_cast(1, 'UInt32')) as col_u32,
   arrow_typeof(arrow_cast(1, 'UInt64')) as col_u64,
-  -- can't seem to cast to Float16 for some reason
-  -- arrow_typeof(arrow_cast(1, 'Float16')) as col_f16,
+  arrow_typeof(arrow_cast(1, 'Float16')) as col_f16,
   arrow_typeof(arrow_cast(1, 'Float32')) as col_f32,
   arrow_typeof(arrow_cast(1, 'Float64')) as col_f64,
   arrow_typeof(arrow_cast('foo', 'Utf8')) as col_utf8,
@@ -130,7 +129,7 @@ SELECT
   arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Nanosecond, Some("+08:00"))')) as col_tstz_ns,
   arrow_typeof(arrow_cast('foo', 'Dictionary(Int32, Utf8)')) as col_dict
 ----
-Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64 Utf8 LargeUtf8 Binary LargeBinary Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None) Timestamp(Second, Some("+08:00")) Timestamp(Millisecond, Some("+08:00")) Timestamp(Microsecond, Some("+08:00")) Timestamp(Nanosecond, Some("+08:00")) Dictionary(Int32, Utf8)
+Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float16 Float32 Float64 Utf8 LargeUtf8 Binary LargeBinary Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None) Timestamp(Second, Some("+08:00")) Timestamp(Millisecond, Some("+08:00")) Timestamp(Microsecond, Some("+08:00")) Timestamp(Nanosecond, Some("+08:00")) Dictionary(Int32, Utf8)
 
 
 
@@ -147,15 +146,14 @@ create table foo as select
   arrow_cast(1, 'UInt16') as col_u16,
   arrow_cast(1, 'UInt32') as col_u32,
   arrow_cast(1, 'UInt64') as col_u64,
-  -- can't seem to cast to Float16 for some reason
-  -- arrow_cast(1.0, 'Float16') as col_f16,
+  arrow_cast(1.0, 'Float16') as col_f16,
   arrow_cast(1.0, 'Float32') as col_f32,
   arrow_cast(1.0, 'Float64') as col_f64
 ;
 
 ## Ensure each column in the table has the expected type
 
-query TTTTTTTTTT
+query TTTTTTTTTTT
 SELECT
   arrow_typeof(col_i8),
   arrow_typeof(col_i16),
@@ -165,12 +163,12 @@ SELECT
   arrow_typeof(col_u16),
   arrow_typeof(col_u32),
   arrow_typeof(col_u64),
-  -- arrow_typeof(col_f16),
+  arrow_typeof(col_f16),
   arrow_typeof(col_f32),
   arrow_typeof(col_f64)
   FROM foo;
 ----
-Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64
+Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float16 Float32 Float64
 
 
 statement ok

From bd482629dc46ecdd88c3b9e4c03beeb476caa376 Mon Sep 17 00:00:00 2001
From: iamthinh <thinh.ducbui@gmail.com>
Date: Mon, 19 Aug 2024 10:26:51 -0700
Subject: [PATCH 341/357] implement utf8_view for replace (#12004)

* draft implement utf8_view for replace

* add function signature

* Add sql test

* move macro util to replace function

* remove unused import

* rust format

* change return type from utf8view to utf8

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions/src/string/replace.rs    | 86 ++++++++++++++++++-
 .../sqllogictest/test_files/functions.slt     | 10 +++
 .../sqllogictest/test_files/string_view.slt   | 19 ++--
 3 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs
index 4cebbba839fa0..13fa3d55672dd 100644
--- a/datafusion/functions/src/string/replace.rs
+++ b/datafusion/functions/src/string/replace.rs
@@ -18,10 +18,10 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::as_generic_string_array;
+use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Volatility};
@@ -45,7 +45,11 @@ impl ReplaceFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Utf8, Utf8])],
+                vec![
+                    Exact(vec![Utf8View, Utf8View, Utf8View]),
+                    Exact(vec![Utf8, Utf8, Utf8]),
+                    Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -73,6 +77,7 @@ impl ScalarUDFImpl for ReplaceFunc {
         match args[0].data_type() {
             DataType::Utf8 => make_scalar_function(replace::<i32>, vec![])(args),
             DataType::LargeUtf8 => make_scalar_function(replace::<i64>, vec![])(args),
+            DataType::Utf8View => make_scalar_function(replace_view, vec![])(args),
             other => {
                 exec_err!("Unsupported data type {other:?} for function replace")
             }
@@ -80,6 +85,23 @@ impl ScalarUDFImpl for ReplaceFunc {
     }
 }
 
+fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_array = as_string_view_array(&args[0])?;
+    let from_array = as_string_view_array(&args[1])?;
+    let to_array = as_string_view_array(&args[2])?;
+
+    let result = string_array
+        .iter()
+        .zip(from_array.iter())
+        .zip(to_array.iter())
+        .map(|((string, from), to)| match (string, from, to) {
+            (Some(string), Some(from), Some(to)) => Some(string.replace(from, to)),
+            _ => None,
+        })
+        .collect::<StringArray>();
+
+    Ok(Arc::new(result) as ArrayRef)
+}
 /// Replaces all occurrences in string of substring from with substring to.
 /// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef'
 fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
@@ -100,4 +122,60 @@ fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     Ok(Arc::new(result) as ArrayRef)
 }
 
-mod test {}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::utils::test::test_function;
+    use arrow::array::Array;
+    use arrow::array::LargeStringArray;
+    use arrow::array::StringArray;
+    use arrow::datatypes::DataType::{LargeUtf8, Utf8};
+    use datafusion_common::ScalarValue;
+    #[test]
+    fn test_functions() -> Result<()> {
+        test_function!(
+            ReplaceFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("aabbdqcbb")))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("bb")))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("ccc")))),
+            ],
+            Ok(Some("aacccdqcccc")),
+            &str,
+            Utf8,
+            StringArray
+        );
+
+        test_function!(
+            ReplaceFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from(
+                    "aabbb"
+                )))),
+                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("bbb")))),
+                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(String::from("cc")))),
+            ],
+            Ok(Some("aacc")),
+            &str,
+            LargeUtf8,
+            LargeStringArray
+        );
+
+        test_function!(
+            ReplaceFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
+                    "aabbbcw"
+                )))),
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("bb")))),
+                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("cc")))),
+            ],
+            Ok(Some("aaccbcw")),
+            &str,
+            Utf8,
+            StringArray
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index 074e4ef834816..b8519a463637f 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -826,6 +826,16 @@ SELECT replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'hello')
 ----
 foohello
 
+query T
+SELECT replace(arrow_cast('foobar', 'Utf8View'), arrow_cast('bar', 'Utf8View'), arrow_cast('hello', 'Utf8View'))
+----
+foohello
+
+query T
+SELECT replace(arrow_cast('foobar', 'LargeUtf8'), arrow_cast('bar', 'LargeUtf8'), arrow_cast('hello', 'LargeUtf8'))
+----
+foohello
+
 query T
 SELECT rtrim(' foo  ')
 ----
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 82a714a432baa..0b441bcbeb8fe 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -925,7 +925,6 @@ logical_plan
 01)Projection: regexp_replace(test.column1_utf8view, Utf8("^https?://(?:www\.)?([^/]+)/.*$"), Utf8("\1")) AS k
 02)--TableScan: test projection=[column1_utf8view]
 
-
 ## Ensure no casts for REPEAT
 query TT
 EXPLAIN SELECT
@@ -937,7 +936,6 @@ logical_plan
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for REPLACE
-## TODO file ticket
 query TT
 EXPLAIN SELECT
   REPLACE(column1_utf8view, 'foo', 'bar') as c1,
@@ -945,9 +943,20 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: replace(__common_expr_1, Utf8("foo"), Utf8("bar")) AS c1, replace(__common_expr_1, CAST(test.column2_utf8view AS Utf8), Utf8("bar")) AS c2
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+01)Projection: replace(test.column1_utf8view, Utf8View("foo"), Utf8View("bar")) AS c1, replace(test.column1_utf8view, test.column2_utf8view, Utf8View("bar")) AS c2
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
+
+query TT
+SELECT
+  REPLACE(column1_utf8view, 'foo', 'bar') as c1,
+  REPLACE(column1_utf8view, column2_utf8view, 'bar') as c2
+FROM test;
+----
+Andrew Andrew
+Xiangpeng bar
+Raphael baraphael
+NULL NULL
+
 
 ## Ensure no casts for REVERSE
 query TT

From c2cbba26dfebf40744c01fd36e1ea1598795208e Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 19 Aug 2024 14:26:23 -0400
Subject: [PATCH 342/357] Fix CI check when version changes -- remove checked
 in file that is created by doc example (#12034)

* Remove checked in file that is created by doc example

* revert changes to .gitignore

* Update example to use tmpdir
---
 datafusion/core/example.parquet                   | Bin 976 -> 0 bytes
 .../library-user-guide/using-the-dataframe-api.md |   3 ++-
 2 files changed, 2 insertions(+), 1 deletion(-)
 delete mode 100644 datafusion/core/example.parquet

diff --git a/datafusion/core/example.parquet b/datafusion/core/example.parquet
deleted file mode 100644
index 17f7473cd221426b545a5f437c42efdc6b1702b3..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 976
zcmb7@KTE?v7{=dR+8CijE50u|xIqLD2dOP6WGIC$#i2uS>LzKesGx<m>gv#0ocst5
zj)D#j4jueV4hr5iy<DKt;t{wf@00uGxqt1(VHI;&#LX5mgf)Qem)&pZGyoik8-liX
zlwCv*J<;)v6C|osQr{*cnR;OkFwF6N@QOTeA}kQ2JBOYfS(hN}NUOqxW^0=JOFaD(
z-4^2+C-9A07O%Sy&y2K;FrnGnAMxBcT}C|P1in$r;`J8dnUR(eCNx{qTwY?_u4hUj
z<78|zTV)%Ww$0y^ZRFe*)WosE02efI6FJStQhn7rC`7~I;O1!5JMCXY=&AkOV(LW9
z`|iB&0WmIkO};rI``k38|F3gHUp}tWbOq}mKiw2Gz1tI4elUeK)3z$isObfz`&A^m
z^)>d@OJUQyG|kINUSZ90Srul~X4prKeb-oea*0Ww9&+pIQt{(x9GzT^&IXqo*ZtwF
W->a4@<qEF}fcSxPbtl{EF8=^D2wJoN

diff --git a/docs/source/library-user-guide/using-the-dataframe-api.md b/docs/source/library-user-guide/using-the-dataframe-api.md
index 9e7774cbb944c..3bd47ef50e516 100644
--- a/docs/source/library-user-guide/using-the-dataframe-api.md
+++ b/docs/source/library-user-guide/using-the-dataframe-api.md
@@ -188,8 +188,9 @@ async fn main() -> Result<()> {
     // read example.csv file into a DataFrame
     let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     // stream the contents of the DataFrame to the `example.parquet` file
+    let target_path = tempfile::tempdir()?.path().join("example.parquet");
     df.write_parquet(
-        "example.parquet",
+        target_path.to_str().unwrap(),
         DataFrameWriteOptions::new(),
         None, // writer_options
     ).await;

From 67cf1d606b901eaf591f44389702e285557d39e1 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 19 Aug 2024 21:31:32 -0400
Subject: [PATCH 343/357] Minor: update sqllogictest to treat Utf8View as text
 (#12033)

* Minor: update sqllogictest to treat Utf8View as text

* fmt
---
 .../sqllogictest/src/engines/datafusion_engine/normalize.rs   | 4 +++-
 datafusion/sqllogictest/test_files/arrow_typeof.slt           | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
index 66ffeadf8cec8..b6b583b9fbdb2 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
@@ -267,7 +267,9 @@ pub(crate) fn convert_schema_to_types(columns: &Fields) -> Vec<DFColumnType> {
             | DataType::Float64
             | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _) => DFColumnType::Float,
-            DataType::Utf8 | DataType::LargeUtf8 => DFColumnType::Text,
+            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
+                DFColumnType::Text
+            }
             DataType::Date32
             | DataType::Date64
             | DataType::Time32(_)
diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt
index d1f49838f9322..b9ceb5bf05f10 100644
--- a/datafusion/sqllogictest/test_files/arrow_typeof.slt
+++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt
@@ -422,7 +422,7 @@ select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)');
 [1, 2, 3]
 
 # Tests for Utf8View
-query ?T
+query TT
 select arrow_cast('MyAwesomeString', 'Utf8View'), arrow_typeof(arrow_cast('MyAwesomeString', 'Utf8View'))
 ----
 MyAwesomeString Utf8View

From 37e54ee874e9027a329ba4f6c1e0e6359d63a33c Mon Sep 17 00:00:00 2001
From: FANNG <xiaojing@datastrato.com>
Date: Tue, 20 Aug 2024 15:28:30 +0800
Subject: [PATCH 344/357] [MINOR] correct document mistakes (#12068)

* mistake

* mistake
---
 datafusion/catalog/src/catalog.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/datafusion/catalog/src/catalog.rs b/datafusion/catalog/src/catalog.rs
index 026c3c008f59f..9ee94e8f1fc33 100644
--- a/datafusion/catalog/src/catalog.rs
+++ b/datafusion/catalog/src/catalog.rs
@@ -34,7 +34,7 @@ use datafusion_common::Result;
 /// * [`CatalogProviderList`]: a collection of `CatalogProvider`s
 /// * [`CatalogProvider`]: a collection of `SchemaProvider`s (sometimes called a "database" in other systems)
 /// * [`SchemaProvider`]:  a collection of `TableProvider`s (often called a "schema" in other systems)
-/// * [`TableProvider]`:  individual tables
+/// * [`TableProvider`]:  individual tables
 ///
 /// # Implementing Catalogs
 ///
@@ -99,7 +99,7 @@ use datafusion_common::Result;
 /// [delta-rs]: https://github.com/delta-io/delta-rs
 /// [`UnityCatalogProvider`]: https://github.com/delta-io/delta-rs/blob/951436ecec476ce65b5ed3b58b50fb0846ca7b91/crates/deltalake-core/src/data_catalog/unity/datafusion.rs#L111-L123
 ///
-/// [`TableProvider]: crate::datasource::TableProvider
+/// [`TableProvider`]: crate::TableProvider
 
 pub trait CatalogProvider: Sync + Send {
     /// Returns the catalog provider as [`Any`]

From 6e34280b2bd475af51b14d88dc1a1b0867fc877d Mon Sep 17 00:00:00 2001
From: Alexander Alexandrov <alexander.s.alexandrov@gmail.com>
Date: Tue, 20 Aug 2024 20:02:30 +0300
Subject: [PATCH 345/357] Plan `LATERAL` subqueries (#11456)

* Planner: support `LATERAL` subqueries

* Planner: use `DFSchema::merge` in `create_relation_subquery`

In order to compute the `set_outer_from_schema` argument we currently
use `DFSchema::join`. When we combine the current outer FROM schema with
the current outer query schema columns from the latter should override
columns from the first, so the correct way is to use `DFSchema::merge`.

To witness the fix, note that the query in the fixed test case isn't
planned as expected without the accompanying changes.

* Update plans

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/sql/src/planner.rs                |  27 +++++
 datafusion/sql/src/relation/join.rs          |  49 ++++++++-
 datafusion/sql/src/relation/mod.rs           |  51 +++++++++
 datafusion/sql/src/select.rs                 |  31 ++++--
 datafusion/sql/tests/sql_integration.rs      | 108 +++++++++++++++++++
 datafusion/sqllogictest/test_files/joins.slt |  48 +++++++++
 6 files changed, 300 insertions(+), 14 deletions(-)

diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 9ad515087a364..5cbe1d7c014ad 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -135,6 +135,9 @@ pub struct PlannerContext {
     ctes: HashMap<String, Arc<LogicalPlan>>,
     /// The query schema of the outer query plan, used to resolve the columns in subquery
     outer_query_schema: Option<DFSchemaRef>,
+    /// The joined schemas of all FROM clauses planned so far. When planning LATERAL
+    /// FROM clauses, this should become a suffix of the `outer_query_schema`.
+    outer_from_schema: Option<DFSchemaRef>,
 }
 
 impl Default for PlannerContext {
@@ -150,6 +153,7 @@ impl PlannerContext {
             prepare_param_data_types: Arc::new(vec![]),
             ctes: HashMap::new(),
             outer_query_schema: None,
+            outer_from_schema: None,
         }
     }
 
@@ -177,6 +181,29 @@ impl PlannerContext {
         schema
     }
 
+    // return a clone of the outer FROM schema
+    pub fn outer_from_schema(&self) -> Option<Arc<DFSchema>> {
+        self.outer_from_schema.clone()
+    }
+
+    /// sets the outer FROM schema, returning the existing one, if any
+    pub fn set_outer_from_schema(
+        &mut self,
+        mut schema: Option<DFSchemaRef>,
+    ) -> Option<DFSchemaRef> {
+        std::mem::swap(&mut self.outer_from_schema, &mut schema);
+        schema
+    }
+
+    /// extends the FROM schema, returning the existing one, if any
+    pub fn extend_outer_from_schema(&mut self, schema: &DFSchemaRef) -> Result<()> {
+        self.outer_from_schema = match self.outer_from_schema.as_ref() {
+            Some(from_schema) => Some(Arc::new(from_schema.join(schema)?)),
+            None => Some(Arc::clone(schema)),
+        };
+        Ok(())
+    }
+
     /// Return the types of parameters (`$1`, `$2`, etc) if known
     pub fn prepare_param_data_types(&self) -> &[DataType] {
         &self.prepare_param_data_types
diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs
index fb1d00b7e48a5..409533a3eaa58 100644
--- a/datafusion/sql/src/relation/join.rs
+++ b/datafusion/sql/src/relation/join.rs
@@ -18,7 +18,7 @@
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use datafusion_common::{not_impl_err, Column, Result};
 use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder};
-use sqlparser::ast::{Join, JoinConstraint, JoinOperator, TableWithJoins};
+use sqlparser::ast::{Join, JoinConstraint, JoinOperator, TableFactor, TableWithJoins};
 use std::collections::HashSet;
 
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -27,10 +27,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         t: TableWithJoins,
         planner_context: &mut PlannerContext,
     ) -> Result<LogicalPlan> {
-        let mut left = self.create_relation(t.relation, planner_context)?;
-        for join in t.joins.into_iter() {
+        let mut left = if is_lateral(&t.relation) {
+            self.create_relation_subquery(t.relation, planner_context)?
+        } else {
+            self.create_relation(t.relation, planner_context)?
+        };
+        let old_outer_from_schema = planner_context.outer_from_schema();
+        for join in t.joins {
+            planner_context.extend_outer_from_schema(left.schema())?;
             left = self.parse_relation_join(left, join, planner_context)?;
         }
+        planner_context.set_outer_from_schema(old_outer_from_schema);
         Ok(left)
     }
 
@@ -40,7 +47,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         join: Join,
         planner_context: &mut PlannerContext,
     ) -> Result<LogicalPlan> {
-        let right = self.create_relation(join.relation, planner_context)?;
+        let right = if is_lateral_join(&join)? {
+            self.create_relation_subquery(join.relation, planner_context)?
+        } else {
+            self.create_relation(join.relation, planner_context)?
+        };
         match join.join_operator {
             JoinOperator::LeftOuter(constraint) => {
                 self.parse_join(left, right, constraint, JoinType::Left, planner_context)
@@ -144,3 +155,33 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         }
     }
 }
+
+/// Return `true` iff the given [`TableFactor`] is lateral.
+pub(crate) fn is_lateral(factor: &TableFactor) -> bool {
+    match factor {
+        TableFactor::Derived { lateral, .. } => *lateral,
+        TableFactor::Function { lateral, .. } => *lateral,
+        _ => false,
+    }
+}
+
+/// Return `true` iff the given [`Join`] is lateral.
+pub(crate) fn is_lateral_join(join: &Join) -> Result<bool> {
+    let is_lateral_syntax = is_lateral(&join.relation);
+    let is_apply_syntax = match join.join_operator {
+        JoinOperator::FullOuter(..)
+        | JoinOperator::RightOuter(..)
+        | JoinOperator::RightAnti(..)
+        | JoinOperator::RightSemi(..)
+            if is_lateral_syntax =>
+        {
+            return not_impl_err!(
+                "LATERAL syntax is not supported for \
+                 FULL OUTER and RIGHT [OUTER | ANTI | SEMI] joins"
+            );
+        }
+        JoinOperator::CrossApply | JoinOperator::OuterApply => true,
+        _ => false,
+    };
+    Ok(is_lateral_syntax || is_apply_syntax)
+}
diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs
index c5fe180c23025..86e49780724b2 100644
--- a/datafusion/sql/src/relation/mod.rs
+++ b/datafusion/sql/src/relation/mod.rs
@@ -15,11 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::sync::Arc;
+
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{not_impl_err, plan_err, DFSchema, Result, TableReference};
+use datafusion_expr::builder::subquery_alias;
 use datafusion_expr::{expr::Unnest, Expr, LogicalPlan, LogicalPlanBuilder};
+use datafusion_expr::{Subquery, SubqueryAlias};
 use sqlparser::ast::{FunctionArg, FunctionArgExpr, TableFactor};
 
 mod join;
@@ -153,6 +157,53 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             Ok(optimized_plan)
         }
     }
+
+    pub(crate) fn create_relation_subquery(
+        &self,
+        subquery: TableFactor,
+        planner_context: &mut PlannerContext,
+    ) -> Result<LogicalPlan> {
+        // At this point for a syntacitally valid query the outer_from_schema is
+        // guaranteed to be set, so the `.unwrap()` call will never panic. This
+        // is the case because we only call this method for lateral table
+        // factors, and those can never be the first factor in a FROM list. This
+        // means we arrived here through the `for` loop in `plan_from_tables` or
+        // the `for` loop in `plan_table_with_joins`.
+        let old_from_schema = planner_context
+            .set_outer_from_schema(None)
+            .unwrap_or_else(|| Arc::new(DFSchema::empty()));
+        let new_query_schema = match planner_context.outer_query_schema() {
+            Some(old_query_schema) => {
+                let mut new_query_schema = old_from_schema.as_ref().clone();
+                new_query_schema.merge(old_query_schema);
+                Some(Arc::new(new_query_schema))
+            }
+            None => Some(Arc::clone(&old_from_schema)),
+        };
+        let old_query_schema = planner_context.set_outer_query_schema(new_query_schema);
+
+        let plan = self.create_relation(subquery, planner_context)?;
+        let outer_ref_columns = plan.all_out_ref_exprs();
+
+        planner_context.set_outer_query_schema(old_query_schema);
+        planner_context.set_outer_from_schema(Some(old_from_schema));
+
+        match plan {
+            LogicalPlan::SubqueryAlias(SubqueryAlias { input, alias, .. }) => {
+                subquery_alias(
+                    LogicalPlan::Subquery(Subquery {
+                        subquery: input,
+                        outer_ref_columns,
+                    }),
+                    alias,
+                )
+            }
+            plan => Ok(LogicalPlan::Subquery(Subquery {
+                subquery: Arc::new(plan),
+                outer_ref_columns,
+            })),
+        }
+    }
 }
 
 fn optimize_subquery_sort(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 339234d9965ca..f42dec40149ff 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -496,19 +496,30 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         match from.len() {
             0 => Ok(LogicalPlanBuilder::empty(true).build()?),
             1 => {
-                let from = from.remove(0);
-                self.plan_table_with_joins(from, planner_context)
+                let input = from.remove(0);
+                self.plan_table_with_joins(input, planner_context)
             }
             _ => {
-                let mut plans = from
-                    .into_iter()
-                    .map(|t| self.plan_table_with_joins(t, planner_context));
-
-                let mut left = LogicalPlanBuilder::from(plans.next().unwrap()?);
-
-                for right in plans {
-                    left = left.cross_join(right?)?;
+                let mut from = from.into_iter();
+
+                let mut left = LogicalPlanBuilder::from({
+                    let input = from.next().unwrap();
+                    self.plan_table_with_joins(input, planner_context)?
+                });
+                let old_outer_from_schema = {
+                    let left_schema = Some(Arc::clone(left.schema()));
+                    planner_context.set_outer_from_schema(left_schema)
+                };
+                for input in from {
+                    // Join `input` with the current result (`left`).
+                    let right = self.plan_table_with_joins(input, planner_context)?;
+                    left = left.cross_join(right)?;
+                    // Update the outer FROM schema.
+                    let left_schema = Some(Arc::clone(left.schema()));
+                    planner_context.set_outer_from_schema(left_schema);
                 }
+                planner_context.set_outer_from_schema(old_outer_from_schema);
+
                 Ok(left.build()?)
             }
         }
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 7ce3565fa29f6..5685e09c9c9fb 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -3103,6 +3103,114 @@ fn join_on_complex_condition() {
     quick_test(sql, expected);
 }
 
+#[test]
+fn lateral_constant() {
+    let sql = "SELECT * FROM j1, LATERAL (SELECT 1) AS j2";
+    let expected = "Projection: *\
+            \n  CrossJoin:\
+            \n    TableScan: j1\
+            \n    SubqueryAlias: j2\
+            \n      Subquery:\
+            \n        Projection: Int64(1)\
+            \n          EmptyRelation";
+    quick_test(sql, expected);
+}
+
+#[test]
+fn lateral_comma_join() {
+    let sql = "SELECT j1_string, j2_string FROM
+            j1, \
+            LATERAL (SELECT * FROM j2 WHERE j1_id < j2_id) AS j2";
+    let expected = "Projection: j1.j1_string, j2.j2_string\
+            \n  CrossJoin:\
+            \n    TableScan: j1\
+            \n    SubqueryAlias: j2\
+            \n      Subquery:\
+            \n        Projection: *\
+            \n          Filter: outer_ref(j1.j1_id) < j2.j2_id\
+            \n            TableScan: j2";
+    quick_test(sql, expected);
+}
+
+#[test]
+fn lateral_comma_join_referencing_join_rhs() {
+    let sql = "SELECT * FROM\
+            \n  j1 JOIN (j2 JOIN j3 ON(j2_id = j3_id - 2)) ON(j1_id = j2_id),\
+            \n  LATERAL (SELECT * FROM j3 WHERE j3_string = j2_string) as j4;";
+    let expected = "Projection: *\
+            \n  CrossJoin:\
+            \n    Inner Join:  Filter: j1.j1_id = j2.j2_id\
+            \n      TableScan: j1\
+            \n      Inner Join:  Filter: j2.j2_id = j3.j3_id - Int64(2)\
+            \n        TableScan: j2\
+            \n        TableScan: j3\
+            \n    SubqueryAlias: j4\
+            \n      Subquery:\
+            \n        Projection: *\
+            \n          Filter: j3.j3_string = outer_ref(j2.j2_string)\
+            \n            TableScan: j3";
+    quick_test(sql, expected);
+}
+
+#[test]
+fn lateral_comma_join_with_shadowing() {
+    // The j1_id on line 3 references the (closest) j1 definition from line 2.
+    let sql = "\
+            SELECT * FROM j1, LATERAL (\
+              SELECT * FROM j1, LATERAL (\
+                SELECT * FROM j2 WHERE j1_id = j2_id\
+              ) as j2\
+            ) as j2;";
+    let expected = "Projection: *\
+            \n  CrossJoin:\
+            \n    TableScan: j1\
+            \n    SubqueryAlias: j2\
+            \n      Subquery:\
+            \n        Projection: *\
+            \n          CrossJoin:\
+            \n            TableScan: j1\
+            \n            SubqueryAlias: j2\
+            \n              Subquery:\
+            \n                Projection: *\
+            \n                  Filter: outer_ref(j1.j1_id) = j2.j2_id\
+            \n                    TableScan: j2";
+    quick_test(sql, expected);
+}
+
+#[test]
+fn lateral_left_join() {
+    let sql = "SELECT j1_string, j2_string FROM \
+            j1 \
+            LEFT JOIN LATERAL (SELECT * FROM j2 WHERE j1_id < j2_id) AS j2 ON(true);";
+    let expected = "Projection: j1.j1_string, j2.j2_string\
+            \n  Left Join:  Filter: Boolean(true)\
+            \n    TableScan: j1\
+            \n    SubqueryAlias: j2\
+            \n      Subquery:\
+            \n        Projection: *\
+            \n          Filter: outer_ref(j1.j1_id) < j2.j2_id\
+            \n            TableScan: j2";
+    quick_test(sql, expected);
+}
+
+#[test]
+fn lateral_nested_left_join() {
+    let sql = "SELECT * FROM 
+            j1, \
+            (j2 LEFT JOIN LATERAL (SELECT * FROM j3 WHERE j1_id + j2_id = j3_id) AS j3 ON(true))";
+    let expected = "Projection: *\
+            \n  CrossJoin:\
+            \n    TableScan: j1\
+            \n    Left Join:  Filter: Boolean(true)\
+            \n      TableScan: j2\
+            \n      SubqueryAlias: j3\
+            \n        Subquery:\
+            \n          Projection: *\
+            \n            Filter: outer_ref(j1.j1_id) + outer_ref(j2.j2_id) = j3.j3_id\
+            \n              TableScan: j3";
+    quick_test(sql, expected);
+}
+
 #[test]
 fn hive_aggregate_with_filter() -> Result<()> {
     let dialect = &HiveDialect {};
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 7af145fe3e818..0ef745a6b8e65 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -4046,6 +4046,54 @@ physical_plan
 05)------MemoryExec: partitions=1, partition_sizes=[1]
 
 
+# Test CROSS JOIN LATERAL syntax (planning)
+query TT
+explain select t1_id, t1_name, i from join_t1 t1 cross join lateral (select * from unnest(generate_series(1, t1_int))) as series(i);
+----
+logical_plan
+01)CrossJoin:
+02)--SubqueryAlias: t1
+03)----TableScan: join_t1 projection=[t1_id, t1_name]
+04)--SubqueryAlias: series
+05)----Subquery:
+06)------Projection: UNNEST(generate_series(Int64(1),outer_ref(t1.t1_int))) AS i
+07)--------Unnest: lists[UNNEST(generate_series(Int64(1),outer_ref(t1.t1_int)))] structs[]
+08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS UNNEST(generate_series(Int64(1),outer_ref(t1.t1_int)))
+09)------------EmptyRelation
+
+
+# Test CROSS JOIN LATERAL syntax (execution)
+# TODO: https://github.com/apache/datafusion/issues/10048
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(UInt32, Column \{ relation: Some\(Bare \{ table: "t1" \}\), name: "t1_int" \}\)
+select t1_id, t1_name, i from join_t1 t1 cross join lateral (select * from unnest(generate_series(1, t1_int))) as series(i);
+
+
+# Test INNER JOIN LATERAL syntax (planning)
+query TT
+explain select t1_id, t1_name, i from join_t1 t2 inner join lateral (select * from unnest(generate_series(1, t1_int))) as series(i) on(t1_id > i);
+----
+logical_plan
+01)Inner Join:  Filter: CAST(t2.t1_id AS Int64) > series.i
+02)--SubqueryAlias: t2
+03)----TableScan: join_t1 projection=[t1_id, t1_name]
+04)--SubqueryAlias: series
+05)----Subquery:
+06)------Projection: UNNEST(generate_series(Int64(1),outer_ref(t2.t1_int))) AS i
+07)--------Unnest: lists[UNNEST(generate_series(Int64(1),outer_ref(t2.t1_int)))] structs[]
+08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS UNNEST(generate_series(Int64(1),outer_ref(t2.t1_int)))
+09)------------EmptyRelation
+
+
+# Test INNER JOIN LATERAL syntax (execution)
+# TODO: https://github.com/apache/datafusion/issues/10048
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(UInt32, Column \{ relation: Some\(Bare \{ table: "t2" \}\), name: "t1_int" \}\)
+select t1_id, t1_name, i from join_t1 t2 inner join lateral (select * from unnest(generate_series(1, t1_int))) as series(i) on(t1_id > i);
+
+# Test RIGHT JOIN LATERAL syntax (unsupported)
+query error DataFusion error: This feature is not implemented: LATERAL syntax is not supported for FULL OUTER and RIGHT \[OUTER \| ANTI \| SEMI\] joins
+select t1_id, t1_name, i from join_t1 t1 right join lateral (select * from unnest(generate_series(1, t1_int))) as series(i);
+
+
 # Functional dependencies across a join
 statement ok
 CREATE TABLE sales_global (

From ecb5a9144819e3954c073eebcb4bc2bb5a173cf1 Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Wed, 21 Aug 2024 02:33:58 +0800
Subject: [PATCH 346/357] vectorize random() scalar function (#12078)

---
 datafusion/functions/Cargo.toml         |  5 +++
 datafusion/functions/benches/random.rs  | 49 +++++++++++++++++++++++++
 datafusion/functions/src/math/random.rs |  7 +++-
 3 files changed, 59 insertions(+), 2 deletions(-)
 create mode 100644 datafusion/functions/benches/random.rs

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 2b3f80fc930bf..9ef020b772f04 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -156,3 +156,8 @@ required-features = ["unicode_expressions"]
 harness = false
 name = "repeat"
 required-features = ["string_expressions"]
+
+[[bench]]
+harness = false
+name = "random"
+required-features = ["math_expressions"]
diff --git a/datafusion/functions/benches/random.rs b/datafusion/functions/benches/random.rs
new file mode 100644
index 0000000000000..a721836bb68ce
--- /dev/null
+++ b/datafusion/functions/benches/random.rs
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ScalarUDFImpl;
+use datafusion_functions::math::random::RandomFunc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let random_func = RandomFunc::new();
+
+    // Benchmark to evaluate 1M rows in batch size 8192
+    let iterations = 1_000_000 / 8192; // Calculate how many iterations are needed to reach approximately 1M rows
+    c.bench_function("random_1M_rows_batch_8192", |b| {
+        b.iter(|| {
+            for _ in 0..iterations {
+                black_box(random_func.invoke_no_args(8192).unwrap());
+            }
+        })
+    });
+
+    // Benchmark to evaluate 1M rows in batch size 128
+    let iterations_128 = 1_000_000 / 128; // Calculate how many iterations are needed to reach approximately 1M rows with batch size 128
+    c.bench_function("random_1M_rows_batch_128", |b| {
+        b.iter(|| {
+            for _ in 0..iterations_128 {
+                black_box(random_func.invoke_no_args(128).unwrap());
+            }
+        })
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/math/random.rs b/datafusion/functions/src/math/random.rs
index b5eece212a3be..20591a02a930d 100644
--- a/datafusion/functions/src/math/random.rs
+++ b/datafusion/functions/src/math/random.rs
@@ -69,8 +69,11 @@ impl ScalarUDFImpl for RandomFunc {
 
     fn invoke_no_args(&self, num_rows: usize) -> Result<ColumnarValue> {
         let mut rng = thread_rng();
-        let values = std::iter::repeat_with(|| rng.gen_range(0.0..1.0)).take(num_rows);
-        let array = Float64Array::from_iter_values(values);
+        let mut values = vec![0.0; num_rows];
+        // Equivalent to set each element with rng.gen_range(0.0..1.0), but more efficient
+        rng.fill(&mut values[..]);
+        let array = Float64Array::from(values);
+
         Ok(ColumnarValue::Array(Arc::new(array)))
     }
 }

From 9bd183f4171b01bc72f869b92b55dca84d3dd3ae Mon Sep 17 00:00:00 2001
From: Nick Cameron <nrc@ncameron.org>
Date: Wed, 21 Aug 2024 06:55:36 +1200
Subject: [PATCH 347/357] functions: support strpos with mixed string types
 (#12072)

Signed-off-by: Nick Cameron <nrc@ncameron.org>
---
 datafusion/functions/src/unicode/strpos.rs | 81 ++++++++++++++++++----
 datafusion/functions/src/utils.rs          |  2 +-
 2 files changed, 70 insertions(+), 13 deletions(-)

diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs
index 395fd0b77d127..702baf6e8fa77 100644
--- a/datafusion/functions/src/unicode/strpos.rs
+++ b/datafusion/functions/src/unicode/strpos.rs
@@ -78,10 +78,18 @@ impl ScalarUDFImpl for StrposFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(strpos::<Int32Type>, vec![])(args),
-            DataType::LargeUtf8 => {
-                make_scalar_function(strpos::<Int64Type>, vec![])(args)
+        match (args[0].data_type(), args[1].data_type()) {
+            (DataType::Utf8, DataType::Utf8) => {
+                make_scalar_function(strpos::<Int32Type, Int32Type>, vec![])(args)
+            }
+            (DataType::Utf8, DataType::LargeUtf8) => {
+                make_scalar_function(strpos::<Int32Type, Int64Type>, vec![])(args)
+            }
+            (DataType::LargeUtf8, DataType::Utf8) => {
+                make_scalar_function(strpos::<Int64Type, Int32Type>, vec![])(args)
+            }
+            (DataType::LargeUtf8, DataType::LargeUtf8) => {
+                make_scalar_function(strpos::<Int64Type, Int64Type>, vec![])(args)
             }
             other => exec_err!("Unsupported data type {other:?} for function strpos"),
         }
@@ -95,15 +103,18 @@ impl ScalarUDFImpl for StrposFunc {
 /// Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)
 /// strpos('high', 'ig') = 2
 /// The implementation uses UTF-8 code points as characters
-fn strpos<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
+fn strpos<T0: ArrowPrimitiveType, T1: ArrowPrimitiveType>(
+    args: &[ArrayRef],
+) -> Result<ArrayRef>
 where
-    T::Native: OffsetSizeTrait,
+    T0::Native: OffsetSizeTrait,
+    T1::Native: OffsetSizeTrait,
 {
-    let string_array: &GenericStringArray<T::Native> =
-        as_generic_string_array::<T::Native>(&args[0])?;
+    let string_array: &GenericStringArray<T0::Native> =
+        as_generic_string_array::<T0::Native>(&args[0])?;
 
-    let substring_array: &GenericStringArray<T::Native> =
-        as_generic_string_array::<T::Native>(&args[1])?;
+    let substring_array: &GenericStringArray<T1::Native> =
+        as_generic_string_array::<T1::Native>(&args[1])?;
 
     let result = string_array
         .iter()
@@ -112,7 +123,7 @@ where
             (Some(string), Some(substring)) => {
                 // the find method returns the byte index of the substring
                 // Next, we count the number of the chars until that byte
-                T::Native::from_usize(
+                T0::Native::from_usize(
                     string
                         .find(substring)
                         .map(|x| string[..x].chars().count() + 1)
@@ -121,7 +132,53 @@ where
             }
             _ => None,
         })
-        .collect::<PrimitiveArray<T>>();
+        .collect::<PrimitiveArray<T0>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::utils::test::test_function;
+    use arrow::{
+        array::{Array as _, Int32Array, Int64Array},
+        datatypes::DataType::{Int32, Int64},
+    };
+    use datafusion_common::ScalarValue;
+
+    macro_rules! test_strpos {
+        ($lhs:literal, $rhs:literal -> $result:literal; $t1:ident $t2:ident $t3:ident $t4:ident $t5:ident) => {
+            test_function!(
+                StrposFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::$t1(Some($lhs.to_owned()))),
+                    ColumnarValue::Scalar(ScalarValue::$t2(Some($rhs.to_owned()))),
+                ],
+                Ok(Some($result)),
+                $t3,
+                $t4,
+                $t5
+            )
+        };
+    }
+
+    #[test]
+    fn strpos() {
+        test_strpos!("foo", "bar" -> 0; Utf8 Utf8 i32 Int32 Int32Array);
+        test_strpos!("foobar", "foo" -> 1; Utf8 Utf8 i32 Int32 Int32Array);
+        test_strpos!("foobar", "bar" -> 4; Utf8 Utf8 i32 Int32 Int32Array);
+
+        test_strpos!("foo", "bar" -> 0; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
+        test_strpos!("foobar", "foo" -> 1; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
+        test_strpos!("foobar", "bar" -> 4; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
+
+        test_strpos!("foo", "bar" -> 0; Utf8 LargeUtf8 i32 Int32 Int32Array);
+        test_strpos!("foobar", "foo" -> 1; Utf8 LargeUtf8 i32 Int32 Int32Array);
+        test_strpos!("foobar", "bar" -> 4; Utf8 LargeUtf8 i32 Int32 Int32Array);
+
+        test_strpos!("foo", "bar" -> 0; LargeUtf8 Utf8 i64 Int64 Int64Array);
+        test_strpos!("foobar", "foo" -> 1; LargeUtf8 Utf8 i64 Int64 Int64Array);
+        test_strpos!("foobar", "bar" -> 4; LargeUtf8 Utf8 i64 Int64 Int64Array);
+    }
+}
diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs
index 7b367174006d4..d36c5473ba01d 100644
--- a/datafusion/functions/src/utils.rs
+++ b/datafusion/functions/src/utils.rs
@@ -144,7 +144,7 @@ pub mod test {
                     assert_eq!(return_type.unwrap(), $EXPECTED_DATA_TYPE);
 
                     let result = func.invoke($ARGS);
-                    assert_eq!(result.is_ok(), true);
+                    assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err());
 
                     let len = $ARGS
                         .iter()

From e6e1eb229440591263c82bb2b913a4d5a16f9b70 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Tue, 20 Aug 2024 20:58:43 +0200
Subject: [PATCH 348/357] Update to `clap` 4.5.16 (#12064)

---
 datafusion-cli/Cargo.lock          | 190 +++++++++++++++--------------
 datafusion-cli/Cargo.toml          |   2 +-
 datafusion-cli/src/command.rs      |   2 +-
 datafusion-cli/src/main.rs         |  47 +++----
 datafusion-cli/src/pool_type.rs    |   2 +-
 datafusion-cli/src/print_format.rs |   4 +-
 datafusion/sqllogictest/Cargo.toml |   2 +-
 7 files changed, 122 insertions(+), 127 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index b5637f785fb2d..a164b74c55a5e 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -82,12 +82,55 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "anstream"
+version = "0.6.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
 [[package]]
 name = "anstyle"
 version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
 
+[[package]]
+name = "anstyle-parse"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
+dependencies = [
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
+dependencies = [
+ "anstyle",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "apache-avro"
 version = "0.16.0"
@@ -177,7 +220,7 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "half",
- "hashbrown 0.14.5",
+ "hashbrown",
  "num",
 ]
 
@@ -272,7 +315,7 @@ dependencies = [
  "arrow-schema",
  "chrono",
  "half",
- "indexmap 2.4.0",
+ "indexmap",
  "lexical-core",
  "num",
  "serde",
@@ -927,42 +970,43 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "3.2.25"
+version = "4.5.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
+checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019"
 dependencies = [
- "atty",
- "bitflags 1.3.2",
+ "clap_builder",
  "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
+dependencies = [
+ "anstream",
+ "anstyle",
  "clap_lex",
- "indexmap 1.9.3",
- "once_cell",
  "strsim",
- "termcolor",
- "textwrap",
 ]
 
 [[package]]
 name = "clap_derive"
-version = "3.2.25"
+version = "4.5.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae6371b8bdc8b7d3959e9cf7b22d4435ef3e79e138688421ec654acf8c81b008"
+checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
 dependencies = [
- "heck 0.4.1",
- "proc-macro-error",
+ "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "clap_lex"
-version = "0.2.4"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
-dependencies = [
- "os_str_bytes",
-]
+checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
 
 [[package]]
 name = "clipboard-win"
@@ -975,6 +1019,12 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "colorchoice"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
+
 [[package]]
 name = "comfy-table"
 version = "7.1.1"
@@ -1122,7 +1172,7 @@ checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28"
 dependencies = [
  "cfg-if",
  "crossbeam-utils",
- "hashbrown 0.14.5",
+ "hashbrown",
  "lock_api",
  "once_cell",
  "parking_lot_core",
@@ -1164,8 +1214,8 @@ dependencies = [
  "futures",
  "glob",
  "half",
- "hashbrown 0.14.5",
- "indexmap 2.4.0",
+ "hashbrown",
+ "indexmap",
  "itertools 0.12.1",
  "log",
  "num-traits",
@@ -1237,7 +1287,7 @@ dependencies = [
  "arrow-schema",
  "chrono",
  "half",
- "hashbrown 0.14.5",
+ "hashbrown",
  "instant",
  "libc",
  "num_cpus",
@@ -1264,7 +1314,7 @@ dependencies = [
  "datafusion-common",
  "datafusion-expr",
  "futures",
- "hashbrown 0.14.5",
+ "hashbrown",
  "log",
  "object_store",
  "parking_lot",
@@ -1315,7 +1365,7 @@ dependencies = [
  "datafusion-common",
  "datafusion-execution",
  "datafusion-expr",
- "hashbrown 0.14.5",
+ "hashbrown",
  "hex",
  "itertools 0.12.1",
  "log",
@@ -1398,8 +1448,8 @@ dependencies = [
  "datafusion-common",
  "datafusion-expr",
  "datafusion-physical-expr",
- "hashbrown 0.14.5",
- "indexmap 2.4.0",
+ "hashbrown",
+ "indexmap",
  "itertools 0.12.1",
  "log",
  "paste",
@@ -1426,9 +1476,9 @@ dependencies = [
  "datafusion-functions-aggregate-common",
  "datafusion-physical-expr-common",
  "half",
- "hashbrown 0.14.5",
+ "hashbrown",
  "hex",
- "indexmap 2.4.0",
+ "indexmap",
  "itertools 0.12.1",
  "log",
  "paste",
@@ -1444,7 +1494,7 @@ dependencies = [
  "arrow",
  "datafusion-common",
  "datafusion-expr-common",
- "hashbrown 0.14.5",
+ "hashbrown",
  "rand",
 ]
 
@@ -1496,8 +1546,8 @@ dependencies = [
  "datafusion-physical-expr-functions-aggregate",
  "futures",
  "half",
- "hashbrown 0.14.5",
- "indexmap 2.4.0",
+ "hashbrown",
+ "indexmap",
  "itertools 0.12.1",
  "log",
  "once_cell",
@@ -1862,7 +1912,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "http 0.2.12",
- "indexmap 2.4.0",
+ "indexmap",
  "slab",
  "tokio",
  "tokio-util",
@@ -1881,7 +1931,7 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "http 1.1.0",
- "indexmap 2.4.0",
+ "indexmap",
  "slab",
  "tokio",
  "tokio-util",
@@ -1899,12 +1949,6 @@ dependencies = [
  "num-traits",
 ]
 
-[[package]]
-name = "hashbrown"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
-
 [[package]]
 name = "hashbrown"
 version = "0.14.5"
@@ -2161,16 +2205,6 @@ dependencies = [
  "unicode-normalization",
 ]
 
-[[package]]
-name = "indexmap"
-version = "1.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
-dependencies = [
- "autocfg",
- "hashbrown 0.12.3",
-]
-
 [[package]]
 name = "indexmap"
 version = "2.4.0"
@@ -2178,7 +2212,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c"
 dependencies = [
  "equivalent",
- "hashbrown 0.14.5",
+ "hashbrown",
 ]
 
 [[package]]
@@ -2205,6 +2239,12 @@ version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
 
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+
 [[package]]
 name = "itertools"
 version = "0.12.1"
@@ -2343,7 +2383,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d"
 dependencies = [
  "core2",
- "hashbrown 0.14.5",
+ "hashbrown",
  "rle-decode-fast",
 ]
 
@@ -2644,12 +2684,6 @@ dependencies = [
  "num-traits",
 ]
 
-[[package]]
-name = "os_str_bytes"
-version = "6.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1"
-
 [[package]]
 name = "outref"
 version = "0.5.1"
@@ -2700,7 +2734,7 @@ dependencies = [
  "flate2",
  "futures",
  "half",
- "hashbrown 0.14.5",
+ "hashbrown",
  "lz4_flex",
  "num",
  "num-bigint",
@@ -2743,7 +2777,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
 dependencies = [
  "fixedbitset",
- "indexmap 2.4.0",
+ "indexmap",
 ]
 
 [[package]]
@@ -2867,30 +2901,6 @@ dependencies = [
  "termtree",
 ]
 
-[[package]]
-name = "proc-macro-error"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
-dependencies = [
- "proc-macro-error-attr",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
- "version_check",
-]
-
-[[package]]
-name = "proc-macro-error-attr"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
-dependencies = [
- "proc-macro2",
- "quote",
- "version_check",
-]
-
 [[package]]
 name = "proc-macro2"
 version = "1.0.86"
@@ -3576,9 +3586,9 @@ checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0"
 
 [[package]]
 name = "strsim"
-version = "0.10.0"
+version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
 [[package]]
 name = "strum"
@@ -3683,12 +3693,6 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
 
-[[package]]
-name = "textwrap"
-version = "0.16.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
-
 [[package]]
 name = "thiserror"
 version = "1.0.63"
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 252d056e8b83f..0a4523a1c04ea 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -34,7 +34,7 @@ arrow = { version = "52.2.0" }
 async-trait = "0.1.73"
 aws-config = "0.55"
 aws-credential-types = "0.55"
-clap = { version = "3", features = ["derive", "cargo"] }
+clap = { version = "4.5.16", features = ["derive", "cargo"] }
 datafusion = { path = "../datafusion/core", version = "41.0.0", features = [
     "avro",
     "crypto_expressions",
diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs
index 05c00d634c942..f0eb58a233910 100644
--- a/datafusion-cli/src/command.rs
+++ b/datafusion-cli/src/command.rs
@@ -22,7 +22,7 @@ use crate::exec::{exec_and_print, exec_from_lines};
 use crate::functions::{display_all_functions, Function};
 use crate::print_format::PrintFormat;
 use crate::print_options::PrintOptions;
-use clap::ArgEnum;
+use clap::ValueEnum;
 use datafusion::arrow::array::{ArrayRef, StringArray};
 use datafusion::arrow::datatypes::{DataType, Field, Schema};
 use datafusion::arrow::record_batch::RecordBatch;
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 1810d3cef57cd..6e94e6ea4186e 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -49,7 +49,7 @@ struct Args {
         short = 'p',
         long,
         help = "Path to your data, default to current directory",
-        validator(is_valid_data_dir)
+        value_parser(parse_valid_data_dir)
     )]
     data_path: Option<String>,
 
@@ -57,16 +57,16 @@ struct Args {
         short = 'b',
         long,
         help = "The batch size of each query, or use DataFusion default",
-        validator(is_valid_batch_size)
+        value_parser(parse_batch_size)
     )]
     batch_size: Option<usize>,
 
     #[clap(
         short = 'c',
         long,
-        multiple_values = true,
+        num_args = 0..,
         help = "Execute the given command string(s), then exit. Commands are expected to be non empty.",
-        validator(is_valid_command)
+        value_parser(parse_command)
     )]
     command: Vec<String>,
 
@@ -74,30 +74,30 @@ struct Args {
         short = 'm',
         long,
         help = "The memory pool limitation (e.g. '10g'), default to None (no limit)",
-        validator(is_valid_memory_pool_size)
+        value_parser(extract_memory_pool_size)
     )]
-    memory_limit: Option<String>,
+    memory_limit: Option<usize>,
 
     #[clap(
         short,
         long,
-        multiple_values = true,
+        num_args = 0..,
         help = "Execute commands from file(s), then exit",
-        validator(is_valid_file)
+        value_parser(parse_valid_file)
     )]
     file: Vec<String>,
 
     #[clap(
         short = 'r',
         long,
-        multiple_values = true,
+        num_args = 0..,
         help = "Run the provided files on startup instead of ~/.datafusionrc",
-        validator(is_valid_file),
+        value_parser(parse_valid_file),
         conflicts_with = "file"
     )]
     rc: Option<Vec<String>>,
 
-    #[clap(long, arg_enum, default_value_t = PrintFormat::Automatic)]
+    #[clap(long, value_enum, default_value_t = PrintFormat::Automatic)]
     format: PrintFormat,
 
     #[clap(
@@ -160,8 +160,6 @@ async fn main_inner() -> Result<()> {
     let rt_config =
         // set memory pool size
         if let Some(memory_limit) = args.memory_limit {
-            // unwrap is safe here because is_valid_memory_pool_size already checked the value
-            let memory_limit = extract_memory_pool_size(&memory_limit).unwrap();
             // set memory pool type
             match args.mem_pool_type {
                 PoolType::Fair => rt_config
@@ -235,39 +233,32 @@ fn create_runtime_env(rn_config: RuntimeConfig) -> Result<RuntimeEnv> {
     RuntimeEnv::new(rn_config)
 }
 
-fn is_valid_file(dir: &str) -> Result<(), String> {
+fn parse_valid_file(dir: &str) -> Result<String, String> {
     if Path::new(dir).is_file() {
-        Ok(())
+        Ok(dir.to_string())
     } else {
         Err(format!("Invalid file '{}'", dir))
     }
 }
 
-fn is_valid_data_dir(dir: &str) -> Result<(), String> {
+fn parse_valid_data_dir(dir: &str) -> Result<String, String> {
     if Path::new(dir).is_dir() {
-        Ok(())
+        Ok(dir.to_string())
     } else {
         Err(format!("Invalid data directory '{}'", dir))
     }
 }
 
-fn is_valid_batch_size(size: &str) -> Result<(), String> {
+fn parse_batch_size(size: &str) -> Result<usize, String> {
     match size.parse::<usize>() {
-        Ok(size) if size > 0 => Ok(()),
+        Ok(size) if size > 0 => Ok(size),
         _ => Err(format!("Invalid batch size '{}'", size)),
     }
 }
 
-fn is_valid_memory_pool_size(size: &str) -> Result<(), String> {
-    match extract_memory_pool_size(size) {
-        Ok(_) => Ok(()),
-        Err(e) => Err(e),
-    }
-}
-
-fn is_valid_command(command: &str) -> Result<(), String> {
+fn parse_command(command: &str) -> Result<String, String> {
     if !command.is_empty() {
-        Ok(())
+        Ok(command.to_string())
     } else {
         Err("-c flag expects only non empty commands".to_string())
     }
diff --git a/datafusion-cli/src/pool_type.rs b/datafusion-cli/src/pool_type.rs
index 25763eba5c8cb..269790b61f5a5 100644
--- a/datafusion-cli/src/pool_type.rs
+++ b/datafusion-cli/src/pool_type.rs
@@ -20,7 +20,7 @@ use std::{
     str::FromStr,
 };
 
-#[derive(PartialEq, Debug)]
+#[derive(PartialEq, Debug, Clone)]
 pub enum PoolType {
     Greedy,
     Fair,
diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs
index c95bde7fc6c71..92cb106d622bf 100644
--- a/datafusion-cli/src/print_format.rs
+++ b/datafusion-cli/src/print_format.rs
@@ -30,7 +30,7 @@ use datafusion::common::format::DEFAULT_FORMAT_OPTIONS;
 use datafusion::error::Result;
 
 /// Allow records to be printed in different formats
-#[derive(Debug, PartialEq, Eq, clap::ArgEnum, Clone, Copy)]
+#[derive(Debug, PartialEq, Eq, clap::ValueEnum, Clone, Copy)]
 pub enum PrintFormat {
     Csv,
     Tsv,
@@ -44,7 +44,7 @@ impl FromStr for PrintFormat {
     type Err = String;
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
-        clap::ArgEnum::from_str(s, true)
+        clap::ValueEnum::from_str(s, true)
     }
 }
 
diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml
index 28ef6fe9adb66..36aff613962be 100644
--- a/datafusion/sqllogictest/Cargo.toml
+++ b/datafusion/sqllogictest/Cargo.toml
@@ -39,7 +39,7 @@ async-trait = { workspace = true }
 bigdecimal = { workspace = true }
 bytes = { workspace = true, optional = true }
 chrono = { workspace = true, optional = true }
-clap = { version = "4.4.8", features = ["derive", "env"] }
+clap = { version = "4.5.16", features = ["derive", "env"] }
 datafusion = { workspace = true, default-features = true, features = ["avro"] }
 datafusion-common = { workspace = true, default-features = true }
 datafusion-common-runtime = { workspace = true, default-features = true }

From 6786f1592f6b923210673c0246ea121a714aec49 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Wed, 21 Aug 2024 08:47:49 +0800
Subject: [PATCH 349/357] Fix the schema mismatch between logical and physical
 for aggregate function, add `AggregateUDFImpl::is_null` (#11989)

* schema assertion and fix the mismatch from logical and physical

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add more msg

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm test1

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* nullable for scalar func

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* nullable

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm field

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm unsafe block and use internal error

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm func_name

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm nullable option

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add more msg

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm row number

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* Update datafusion/expr/src/udaf.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Update datafusion/expr/src/udaf.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* fix failed test from #12050

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../src/physical_optimizer/enforce_sorting.rs | 34 +++++++-------
 .../src/physical_optimizer/sanity_checker.rs  |  4 +-
 datafusion/core/src/physical_planner.rs       | 11 +++--
 datafusion/expr/src/expr_schema.rs            | 26 +++++++----
 datafusion/expr/src/logical_plan/plan.rs      | 12 ++---
 datafusion/expr/src/udaf.rs                   | 29 +++++++++++-
 datafusion/expr/src/udf.rs                    |  8 ++++
 .../src/aggregate.rs                          | 10 +++--
 datafusion/functions-aggregate/src/count.rs   | 10 ++++-
 datafusion/functions/src/core/arrow_cast.rs   |  8 +++-
 datafusion/functions/src/core/coalesce.rs     |  9 +++-
 .../src/analyzer/count_wildcard_rule.rs       |  2 +-
 .../optimizer/src/analyzer/type_coercion.rs   | 31 +++++++++----
 .../src/aggregate.rs                          | 18 +++++++-
 .../physical-expr/src/scalar_function.rs      | 45 ++++++++++++-------
 .../physical-expr/src/window/aggregate.rs     | 11 +++--
 .../physical-expr/src/window/built_in.rs      |  1 -
 .../src/window/sliding_aggregate.rs           |  5 ++-
 .../src/windows/bounded_window_agg_exec.rs    | 23 ++--------
 datafusion/physical-plan/src/windows/mod.rs   |  1 +
 datafusion/physical-plan/src/windows/utils.rs | 35 +++++++++++++++
 .../src/windows/window_agg_exec.rs            | 19 +-------
 datafusion/sql/src/select.rs                  |  8 ++--
 .../test_files/count_star_rule.slt            |  2 +-
 datafusion/sqllogictest/test_files/insert.slt |  6 +--
 .../test_files/insert_to_external.slt         |  4 +-
 datafusion/sqllogictest/test_files/union.slt  | 15 +++++++
 datafusion/sqllogictest/test_files/window.slt | 22 ++++-----
 28 files changed, 274 insertions(+), 135 deletions(-)
 create mode 100644 datafusion/physical-plan/src/windows/utils.rs

diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index bda6d598b6ff3..14afe35466332 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -845,17 +845,17 @@ mod tests {
 
         let physical_plan = bounded_window_exec("non_nullable_col", sort_exprs, filter);
 
-        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "  FilterExec: NOT non_nullable_col@1",
             "    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]",
-            "      BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "      BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "        CoalesceBatchesExec: target_batch_size=128",
             "          SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]",
             "            MemoryExec: partitions=1, partition_sizes=[0]"];
 
-        let expected_optimized = ["WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(NULL), is_causal: false }]",
+        let expected_optimized = ["WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(NULL), is_causal: false }]",
             "  FilterExec: NOT non_nullable_col@1",
-            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "      CoalesceBatchesExec: target_batch_size=128",
             "        SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]",
             "          MemoryExec: partitions=1, partition_sizes=[0]"];
@@ -1722,7 +1722,7 @@ mod tests {
         // corresponding SortExecs together. Also, the inputs of these `SortExec`s
         // are not necessarily the same to be able to remove them.
         let expected_input = [
-            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "  SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]",
             "    UnionExec",
             "      SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]",
@@ -1730,7 +1730,7 @@ mod tests {
             "      SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]",
             "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
         let expected_optimized = [
-            "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(NULL), is_causal: false }]",
+            "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(NULL), is_causal: false }]",
             "  SortPreservingMergeExec: [nullable_col@0 ASC]",
             "    UnionExec",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]",
@@ -1760,14 +1760,14 @@ mod tests {
 
         // The `WindowAggExec` can get its required sorting from the leaf nodes directly.
         // The unnecessary SortExecs should be removed
-        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "  SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
             "    UnionExec",
             "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
             "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
-        let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+        let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "  SortPreservingMergeExec: [nullable_col@0 ASC]",
             "    UnionExec",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
@@ -2060,15 +2060,15 @@ mod tests {
         let physical_plan =
             bounded_window_exec("non_nullable_col", sort_exprs1, window_agg2);
 
-        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "  BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "      SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
             "        MemoryExec: partitions=1, partition_sizes=[0]"];
 
-        let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+        let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "  BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "        MemoryExec: partitions=1, partition_sizes=[0]"];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
@@ -2134,7 +2134,7 @@ mod tests {
         let expected_input = vec![
             "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
             "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "      MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         assert_eq!(
@@ -2386,7 +2386,7 @@ mod tests {
         let physical_plan = bounded_window_exec("a", sort_exprs, spm);
 
         let expected_input = [
-            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
             "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC,b@1 ASC",
             "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
@@ -2394,7 +2394,7 @@ mod tests {
             "          CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
         ];
         let expected_optimized = [
-            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "  SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
             "    CoalescePartitionsExec",
             "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
diff --git a/datafusion/core/src/physical_optimizer/sanity_checker.rs b/datafusion/core/src/physical_optimizer/sanity_checker.rs
index 6e37c3f40ffaf..bd80d31224ef9 100644
--- a/datafusion/core/src/physical_optimizer/sanity_checker.rs
+++ b/datafusion/core/src/physical_optimizer/sanity_checker.rs
@@ -437,7 +437,7 @@ mod tests {
         let sort = sort_exec(sort_exprs.clone(), source);
         let bw = bounded_window_exec("c9", sort_exprs, sort);
         assert_plan(bw.as_ref(), vec![
-            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "  SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]",
             "    MemoryExec: partitions=1, partition_sizes=[0]"
         ]);
@@ -460,7 +460,7 @@ mod tests {
         )];
         let bw = bounded_window_exec("c9", sort_exprs, source);
         assert_plan(bw.as_ref(), vec![
-            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "  MemoryExec: partitions=1, partition_sizes=[0]"
         ]);
         // Order requirement of the `BoundedWindowAggExec` is not satisfied. We expect to receive error during sanity check.
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 41ab4ccc98ff7..6536f9a01439f 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -670,6 +670,12 @@ impl DefaultPhysicalPlanner {
                 let input_exec = children.one()?;
                 let physical_input_schema = input_exec.schema();
                 let logical_input_schema = input.as_ref().schema();
+                let physical_input_schema_from_logical: Arc<Schema> =
+                    logical_input_schema.as_ref().clone().into();
+
+                if physical_input_schema != physical_input_schema_from_logical {
+                    return internal_err!("Physical input schema should be the same as the one converted from logical input schema.");
+                }
 
                 let groups = self.create_grouping_physical_expr(
                     group_expr,
@@ -1548,7 +1554,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
     e: &Expr,
     name: Option<String>,
     logical_input_schema: &DFSchema,
-    _physical_input_schema: &Schema,
+    physical_input_schema: &Schema,
     execution_props: &ExecutionProps,
 ) -> Result<AggregateExprWithOptionalArgs> {
     match e {
@@ -1599,11 +1605,10 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                 let ordering_reqs: Vec<PhysicalSortExpr> =
                     physical_sort_exprs.clone().unwrap_or(vec![]);
 
-                let schema: Schema = logical_input_schema.clone().into();
                 let agg_expr =
                     AggregateExprBuilder::new(func.to_owned(), physical_args.to_vec())
                         .order_by(ordering_reqs.to_vec())
-                        .schema(Arc::new(schema))
+                        .schema(Arc::new(physical_input_schema.to_owned()))
                         .alias(name)
                         .with_ignore_nulls(ignore_nulls)
                         .with_distinct(*distinct)
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index f6489fef14a16..10ec10e61239f 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -335,18 +335,28 @@ impl ExprSchemable for Expr {
                 }
             }
             Expr::Cast(Cast { expr, .. }) => expr.nullable(input_schema),
+            Expr::ScalarFunction(ScalarFunction { func, args }) => {
+                Ok(func.is_nullable(args, input_schema))
+            }
             Expr::AggregateFunction(AggregateFunction { func, .. }) => {
-                // TODO: UDF should be able to customize nullability
-                if func.name() == "count" {
-                    Ok(false)
-                } else {
-                    Ok(true)
-                }
+                Ok(func.is_nullable())
             }
+            Expr::WindowFunction(WindowFunction { fun, .. }) => match fun {
+                WindowFunctionDefinition::BuiltInWindowFunction(func) => {
+                    if func.name() == "RANK"
+                        || func.name() == "NTILE"
+                        || func.name() == "CUME_DIST"
+                    {
+                        Ok(false)
+                    } else {
+                        Ok(true)
+                    }
+                }
+                WindowFunctionDefinition::AggregateUDF(func) => Ok(func.is_nullable()),
+                WindowFunctionDefinition::WindowUDF(udwf) => Ok(udwf.nullable()),
+            },
             Expr::ScalarVariable(_, _)
             | Expr::TryCast { .. }
-            | Expr::ScalarFunction(..)
-            | Expr::WindowFunction { .. }
             | Expr::Unnest(_)
             | Expr::Placeholder(_) => Ok(true),
             Expr::IsNull(_)
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index f9b30351677d0..f93b7c0fedd09 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -2015,10 +2015,9 @@ impl Projection {
 /// produced by the projection operation. If the schema computation is successful,
 /// the `Result` will contain the schema; otherwise, it will contain an error.
 pub fn projection_schema(input: &LogicalPlan, exprs: &[Expr]) -> Result<Arc<DFSchema>> {
-    let mut schema = DFSchema::new_with_metadata(
-        exprlist_to_fields(exprs, input)?,
-        input.schema().metadata().clone(),
-    )?;
+    let metadata = input.schema().metadata().clone();
+    let mut schema =
+        DFSchema::new_with_metadata(exprlist_to_fields(exprs, input)?, metadata)?;
     schema = schema.with_functional_dependencies(calc_func_dependencies_for_project(
         exprs, input,
     )?)?;
@@ -2655,7 +2654,10 @@ impl Aggregate {
 
         qualified_fields.extend(exprlist_to_fields(aggr_expr.as_slice(), &input)?);
 
-        let schema = DFSchema::new_with_metadata(qualified_fields, HashMap::new())?;
+        let schema = DFSchema::new_with_metadata(
+            qualified_fields,
+            input.schema().metadata().clone(),
+        )?;
 
         Self::try_new_with_schema(input, group_expr, aggr_expr, Arc::new(schema))
     }
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index d136aeaf09087..cb278c7679749 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -25,7 +25,7 @@ use std::vec;
 
 use arrow::datatypes::{DataType, Field};
 
-use datafusion_common::{exec_err, not_impl_err, Result};
+use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
 
 use crate::expr::AggregateFunction;
 use crate::function::{
@@ -163,6 +163,10 @@ impl AggregateUDF {
         self.inner.name()
     }
 
+    pub fn is_nullable(&self) -> bool {
+        self.inner.is_nullable()
+    }
+
     /// Returns the aliases for this function.
     pub fn aliases(&self) -> &[String] {
         self.inner.aliases()
@@ -257,6 +261,11 @@ impl AggregateUDF {
     pub fn is_descending(&self) -> Option<bool> {
         self.inner.is_descending()
     }
+
+    /// See [`AggregateUDFImpl::default_value`] for more details.
+    pub fn default_value(&self, data_type: &DataType) -> Result<ScalarValue> {
+        self.inner.default_value(data_type)
+    }
 }
 
 impl<F> From<F> for AggregateUDF
@@ -342,6 +351,16 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
     /// the arguments
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType>;
 
+    /// Whether the aggregate function is nullable.
+    ///
+    /// Nullable means that that the function could return `null` for any inputs.
+    /// For example, aggregate functions like `COUNT` always return a non null value
+    /// but others like `MIN` will return `NULL` if there is nullable input.
+    /// Note that if the function is declared as *not* nullable, make sure the [`AggregateUDFImpl::default_value`] is `non-null`
+    fn is_nullable(&self) -> bool {
+        true
+    }
+
     /// Return a new [`Accumulator`] that aggregates values for a specific
     /// group during query execution.
     ///
@@ -552,6 +571,14 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
     fn is_descending(&self) -> Option<bool> {
         None
     }
+
+    /// Returns default value of the function given the input is all `null`.
+    ///
+    /// Most of the aggregate function return Null if input is Null,
+    /// while `count` returns 0 if input is Null
+    fn default_value(&self, data_type: &DataType) -> Result<ScalarValue> {
+        ScalarValue::try_from(data_type)
+    }
 }
 
 pub enum ReversedUDAF {
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index f5434726e23d7..a4584038e48bc 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -205,6 +205,10 @@ impl ScalarUDF {
         self.inner.invoke(args)
     }
 
+    pub fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
+        self.inner.is_nullable(args, schema)
+    }
+
     /// Invoke the function without `args` but number of rows, returning the appropriate result.
     ///
     /// See [`ScalarUDFImpl::invoke_no_args`] for more details.
@@ -416,6 +420,10 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
         self.return_type(arg_types)
     }
 
+    fn is_nullable(&self, _args: &[Expr], _schema: &dyn ExprSchema) -> bool {
+        true
+    }
+
     /// Invoke the function on `args`, returning the appropriate result
     ///
     /// The function will be invoked passed with the slice of [`ColumnarValue`]
diff --git a/datafusion/functions-aggregate-common/src/aggregate.rs b/datafusion/functions-aggregate-common/src/aggregate.rs
index 016e54e688357..698d1350cb619 100644
--- a/datafusion/functions-aggregate-common/src/aggregate.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate.rs
@@ -19,9 +19,8 @@
 //! (built-in and custom) need to satisfy.
 
 use crate::order::AggregateOrderSensitivity;
-use arrow::datatypes::Field;
-use datafusion_common::exec_err;
-use datafusion_common::{not_impl_err, Result};
+use arrow::datatypes::{DataType, Field};
+use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
 use datafusion_expr_common::accumulator::Accumulator;
 use datafusion_expr_common::groups_accumulator::GroupsAccumulator;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
@@ -171,6 +170,11 @@ pub trait AggregateExpr: Send + Sync + Debug + PartialEq<dyn Any> {
     fn get_minmax_desc(&self) -> Option<(Field, bool)> {
         None
     }
+
+    /// Returns default value of the function given the input is Null
+    /// Most of the aggregate function return Null if input is Null,
+    /// while `count` returns 0 if input is Null
+    fn default_value(&self, data_type: &DataType) -> Result<ScalarValue>;
 }
 
 /// Stores the physical expressions used inside the `AggregateExpr`.
diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index 04b1921c7b9e5..417e28e72a71f 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -121,6 +121,10 @@ impl AggregateUDFImpl for Count {
         Ok(DataType::Int64)
     }
 
+    fn is_nullable(&self) -> bool {
+        false
+    }
+
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         if args.is_distinct {
             Ok(vec![Field::new_list(
@@ -133,7 +137,7 @@ impl AggregateUDFImpl for Count {
             Ok(vec![Field::new(
                 format_state_name(args.name, "count"),
                 DataType::Int64,
-                true,
+                false,
             )])
         }
     }
@@ -283,6 +287,10 @@ impl AggregateUDFImpl for Count {
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
+
+    fn default_value(&self, _data_type: &DataType) -> Result<ScalarValue> {
+        Ok(ScalarValue::Int64(Some(0)))
+    }
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs
index c4db3e77049df..a1b74228a5039 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_cast.rs
@@ -26,7 +26,9 @@ use datafusion_common::{
 };
 
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
-use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    ColumnarValue, Expr, ExprSchemable, ScalarUDFImpl, Signature, Volatility,
+};
 
 /// Implements casting to arbitrary arrow types (rather than SQL types)
 ///
@@ -87,6 +89,10 @@ impl ScalarUDFImpl for ArrowCastFunc {
         internal_err!("arrow_cast should return type from exprs")
     }
 
+    fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
+        args.iter().any(|e| e.nullable(schema).ok().unwrap_or(true))
+    }
+
     fn return_type_from_exprs(
         &self,
         args: &[Expr],
diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs
index 15a3ddd9d6e9d..19db58c181e7c 100644
--- a/datafusion/functions/src/core/coalesce.rs
+++ b/datafusion/functions/src/core/coalesce.rs
@@ -22,9 +22,9 @@ use arrow::compute::kernels::zip::zip;
 use arrow::compute::{and, is_not_null, is_null};
 use arrow::datatypes::DataType;
 
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{exec_err, ExprSchema, Result};
 use datafusion_expr::type_coercion::binary::type_union_resolution;
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ColumnarValue, Expr, ExprSchemable};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
@@ -63,6 +63,11 @@ impl ScalarUDFImpl for CoalesceFunc {
         Ok(arg_types[0].clone())
     }
 
+    // If all the element in coalesce is non-null, the result is non-null
+    fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
+        args.iter().any(|e| e.nullable(schema).ok().unwrap_or(true))
+    }
+
     /// coalesce evaluates to the first value which is not NULL
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         // do not accept 0 arguments.
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index 593dab2bc9a21..e114efb99960e 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -240,7 +240,7 @@ mod tests {
             .build()?;
 
         let expected = "Projection: count(Int64(1)) AS count(*) [count(*):Int64]\
-        \n  WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [test.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [test.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]] [a:UInt32, b:UInt32, c:UInt32, count(*) ORDER BY [test.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING:Int64;N]\
+        \n  WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [test.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [test.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]] [a:UInt32, b:UInt32, c:UInt32, count(*) ORDER BY [test.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING:Int64]\
         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
         assert_plan_eq(plan, expected)
     }
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 7251a95d77d65..68ab2e13005f3 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -17,7 +17,6 @@
 
 //! Optimizer rule for type validation and coercion
 
-use std::collections::HashMap;
 use std::sync::Arc;
 
 use itertools::izip;
@@ -822,9 +821,18 @@ fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
         .iter()
         .map(|f| f.is_nullable())
         .collect::<Vec<_>>();
+    let mut union_field_meta = base_schema
+        .fields()
+        .iter()
+        .map(|f| f.metadata().clone())
+        .collect::<Vec<_>>();
+
+    let mut metadata = base_schema.metadata().clone();
 
     for (i, plan) in inputs.iter().enumerate().skip(1) {
         let plan_schema = plan.schema();
+        metadata.extend(plan_schema.metadata().clone());
+
         if plan_schema.fields().len() != base_schema.fields().len() {
             return plan_err!(
                 "Union schemas have different number of fields: \
@@ -834,11 +842,13 @@ fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
                 plan_schema.fields().len()
             );
         }
+
         // coerce data type and nullablity for each field
-        for (union_datatype, union_nullable, plan_field) in izip!(
+        for (union_datatype, union_nullable, union_field_map, plan_field) in izip!(
             union_datatypes.iter_mut(),
             union_nullabilities.iter_mut(),
-            plan_schema.fields()
+            union_field_meta.iter_mut(),
+            plan_schema.fields().iter()
         ) {
             let coerced_type =
                 comparison_coercion(union_datatype, plan_field.data_type()).ok_or_else(
@@ -852,21 +862,26 @@ fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
                         )
                     },
                 )?;
+
             *union_datatype = coerced_type;
             *union_nullable = *union_nullable || plan_field.is_nullable();
+            union_field_map.extend(plan_field.metadata().clone());
         }
     }
     let union_qualified_fields = izip!(
         base_schema.iter(),
         union_datatypes.into_iter(),
-        union_nullabilities
+        union_nullabilities,
+        union_field_meta.into_iter()
     )
-    .map(|((qualifier, field), datatype, nullable)| {
-        let field = Arc::new(Field::new(field.name().clone(), datatype, nullable));
-        (qualifier.cloned(), field)
+    .map(|((qualifier, field), datatype, nullable, metadata)| {
+        let mut field = Field::new(field.name().clone(), datatype, nullable);
+        field.set_metadata(metadata);
+        (qualifier.cloned(), field.into())
     })
     .collect::<Vec<_>>();
-    DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())
+
+    DFSchema::new_with_metadata(union_qualified_fields, metadata)
 }
 
 /// See `<https://github.com/apache/datafusion/pull/2108>`
diff --git a/datafusion/physical-expr-functions-aggregate/src/aggregate.rs b/datafusion/physical-expr-functions-aggregate/src/aggregate.rs
index 8185f0fdd51f6..aa1d1999a3395 100644
--- a/datafusion/physical-expr-functions-aggregate/src/aggregate.rs
+++ b/datafusion/physical-expr-functions-aggregate/src/aggregate.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion_common::ScalarValue;
 use datafusion_common::{internal_err, not_impl_err, Result};
 use datafusion_expr::expr::create_function_physical_name;
 use datafusion_expr::AggregateUDF;
@@ -109,6 +110,7 @@ impl AggregateExprBuilder {
         )?;
 
         let data_type = fun.return_type(&input_exprs_types)?;
+        let is_nullable = fun.is_nullable();
         let name = match alias {
             // TODO: Ideally, we should build the name from physical expressions
             None => create_function_physical_name(fun.name(), is_distinct, &[], None)?,
@@ -127,6 +129,7 @@ impl AggregateExprBuilder {
             is_distinct,
             input_types: input_exprs_types,
             is_reversed,
+            is_nullable,
         }))
     }
 
@@ -194,6 +197,7 @@ pub struct AggregateFunctionExpr {
     is_distinct: bool,
     is_reversed: bool,
     input_types: Vec<DataType>,
+    is_nullable: bool,
 }
 
 impl AggregateFunctionExpr {
@@ -216,6 +220,10 @@ impl AggregateFunctionExpr {
     pub fn is_reversed(&self) -> bool {
         self.is_reversed
     }
+
+    pub fn is_nullable(&self) -> bool {
+        self.is_nullable
+    }
 }
 
 impl AggregateExpr for AggregateFunctionExpr {
@@ -241,7 +249,11 @@ impl AggregateExpr for AggregateFunctionExpr {
     }
 
     fn field(&self) -> Result<Field> {
-        Ok(Field::new(&self.name, self.data_type.clone(), true))
+        Ok(Field::new(
+            &self.name,
+            self.data_type.clone(),
+            self.is_nullable,
+        ))
     }
 
     fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
@@ -435,6 +447,10 @@ impl AggregateExpr for AggregateFunctionExpr {
             .is_descending()
             .and_then(|flag| self.field().ok().map(|f| (f, flag)))
     }
+
+    fn default_value(&self, data_type: &DataType) -> Result<ScalarValue> {
+        self.fun.default_value(data_type)
+    }
 }
 
 impl PartialEq<dyn Any> for AggregateFunctionExpr {
diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs
index 83272fc9b2691..130c335d1c95e 100644
--- a/datafusion/physical-expr/src/scalar_function.rs
+++ b/datafusion/physical-expr/src/scalar_function.rs
@@ -51,6 +51,7 @@ pub struct ScalarFunctionExpr {
     name: String,
     args: Vec<Arc<dyn PhysicalExpr>>,
     return_type: DataType,
+    nullable: bool,
 }
 
 impl Debug for ScalarFunctionExpr {
@@ -77,6 +78,7 @@ impl ScalarFunctionExpr {
             name: name.to_owned(),
             args,
             return_type,
+            nullable: true,
         }
     }
 
@@ -99,6 +101,15 @@ impl ScalarFunctionExpr {
     pub fn return_type(&self) -> &DataType {
         &self.return_type
     }
+
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        self.nullable = nullable;
+        self
+    }
+
+    pub fn nullable(&self) -> bool {
+        self.nullable
+    }
 }
 
 impl fmt::Display for ScalarFunctionExpr {
@@ -118,7 +129,7 @@ impl PhysicalExpr for ScalarFunctionExpr {
     }
 
     fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(true)
+        Ok(self.nullable)
     }
 
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
@@ -151,12 +162,15 @@ impl PhysicalExpr for ScalarFunctionExpr {
         self: Arc<Self>,
         children: Vec<Arc<dyn PhysicalExpr>>,
     ) -> Result<Arc<dyn PhysicalExpr>> {
-        Ok(Arc::new(ScalarFunctionExpr::new(
-            &self.name,
-            Arc::clone(&self.fun),
-            children,
-            self.return_type().clone(),
-        )))
+        Ok(Arc::new(
+            ScalarFunctionExpr::new(
+                &self.name,
+                Arc::clone(&self.fun),
+                children,
+                self.return_type().clone(),
+            )
+            .with_nullable(self.nullable),
+        ))
     }
 
     fn evaluate_bounds(&self, children: &[&Interval]) -> Result<Interval> {
@@ -209,8 +223,6 @@ impl PartialEq<dyn Any> for ScalarFunctionExpr {
 }
 
 /// Create a physical expression for the UDF.
-///
-/// Arguments:
 pub fn create_physical_expr(
     fun: &ScalarUDF,
     input_phy_exprs: &[Arc<dyn PhysicalExpr>],
@@ -230,10 +242,13 @@ pub fn create_physical_expr(
     let return_type =
         fun.return_type_from_exprs(args, input_dfschema, &input_expr_types)?;
 
-    Ok(Arc::new(ScalarFunctionExpr::new(
-        fun.name(),
-        Arc::new(fun.clone()),
-        input_phy_exprs.to_vec(),
-        return_type,
-    )))
+    Ok(Arc::new(
+        ScalarFunctionExpr::new(
+            fun.name(),
+            Arc::new(fun.clone()),
+            input_phy_exprs.to_vec(),
+            return_type,
+        )
+        .with_nullable(fun.is_nullable(args, input_dfschema)),
+    ))
 }
diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs
index 5892f7f3f3b05..52015f4252179 100644
--- a/datafusion/physical-expr/src/window/aggregate.rs
+++ b/datafusion/physical-expr/src/window/aggregate.rs
@@ -176,9 +176,9 @@ impl AggregateWindowExpr for PlainAggregateWindowExpr {
         value_slice: &[ArrayRef],
         accumulator: &mut Box<dyn Accumulator>,
     ) -> Result<ScalarValue> {
-        let value = if cur_range.start == cur_range.end {
-            // We produce None if the window is empty.
-            ScalarValue::try_from(self.aggregate.field()?.data_type())?
+        if cur_range.start == cur_range.end {
+            self.aggregate
+                .default_value(self.aggregate.field()?.data_type())
         } else {
             // Accumulate any new rows that have entered the window:
             let update_bound = cur_range.end - last_range.end;
@@ -193,8 +193,7 @@ impl AggregateWindowExpr for PlainAggregateWindowExpr {
                     .collect();
                 accumulator.update_batch(&update)?
             }
-            accumulator.evaluate()?
-        };
-        Ok(value)
+            accumulator.evaluate()
+        }
     }
 }
diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs
index 04d359903eae9..8ff277db37dfd 100644
--- a/datafusion/physical-expr/src/window/built_in.rs
+++ b/datafusion/physical-expr/src/window/built_in.rs
@@ -26,7 +26,6 @@ use crate::expressions::PhysicalSortExpr;
 use crate::window::window_expr::{get_orderby_values, WindowFn};
 use crate::window::{PartitionBatches, PartitionWindowAggStates, WindowState};
 use crate::{reverse_order_bys, EquivalenceProperties, PhysicalExpr};
-
 use arrow::array::{new_empty_array, ArrayRef};
 use arrow::compute::SortOptions;
 use arrow::datatypes::Field;
diff --git a/datafusion/physical-expr/src/window/sliding_aggregate.rs b/datafusion/physical-expr/src/window/sliding_aggregate.rs
index 50e9632b2196c..afa799e86953b 100644
--- a/datafusion/physical-expr/src/window/sliding_aggregate.rs
+++ b/datafusion/physical-expr/src/window/sliding_aggregate.rs
@@ -183,8 +183,8 @@ impl AggregateWindowExpr for SlidingAggregateWindowExpr {
         accumulator: &mut Box<dyn Accumulator>,
     ) -> Result<ScalarValue> {
         if cur_range.start == cur_range.end {
-            // We produce None if the window is empty.
-            ScalarValue::try_from(self.aggregate.field()?.data_type())
+            self.aggregate
+                .default_value(self.aggregate.field()?.data_type())
         } else {
             // Accumulate any new rows that have entered the window:
             let update_bound = cur_range.end - last_range.end;
@@ -195,6 +195,7 @@ impl AggregateWindowExpr for SlidingAggregateWindowExpr {
                     .collect();
                 accumulator.update_batch(&update)?
             }
+
             // Remove rows that have now left the window:
             let retract_bound = cur_range.start - last_range.start;
             if retract_bound > 0 {
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 29ead35895fee..efb5dea1ec6e3 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -27,6 +27,7 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
+use super::utils::create_schema;
 use crate::expressions::PhysicalSortExpr;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::windows::{
@@ -38,11 +39,11 @@ use crate::{
     ExecutionPlanProperties, InputOrderMode, PlanProperties, RecordBatchStream,
     SendableRecordBatchStream, Statistics, WindowExpr,
 };
-
+use ahash::RandomState;
 use arrow::{
     array::{Array, ArrayRef, RecordBatchOptions, UInt32Builder},
     compute::{concat, concat_batches, sort_to_indices},
-    datatypes::{Schema, SchemaBuilder, SchemaRef},
+    datatypes::SchemaRef,
     record_batch::RecordBatch,
 };
 use datafusion_common::hash_utils::create_hashes;
@@ -59,8 +60,6 @@ use datafusion_physical_expr::window::{
     PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState,
 };
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
-
-use ahash::RandomState;
 use futures::stream::Stream;
 use futures::{ready, StreamExt};
 use hashbrown::raw::RawTable;
@@ -852,20 +851,6 @@ impl SortedSearch {
     }
 }
 
-fn create_schema(
-    input_schema: &Schema,
-    window_expr: &[Arc<dyn WindowExpr>],
-) -> Result<Schema> {
-    let capacity = input_schema.fields().len() + window_expr.len();
-    let mut builder = SchemaBuilder::with_capacity(capacity);
-    builder.extend(input_schema.fields.iter().cloned());
-    // append results to the schema
-    for expr in window_expr {
-        builder.push(expr.field()?);
-    }
-    Ok(builder.finish())
-}
-
 /// Stream for the bounded window aggregation plan.
 pub struct BoundedWindowAggStream {
     schema: SchemaRef,
@@ -1736,7 +1721,7 @@ mod tests {
 
         let expected_plan = vec![
             "ProjectionExec: expr=[sn@0 as sn, hash@1 as hash, count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]@2 as col_2]",
-            "  BoundedWindowAggExec: wdw=[count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Ok(Field { name: \"count([Column { name: \\\"sn\\\", index: 0 }]) PARTITION BY: [[Column { name: \\\"hash\\\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \\\"sn\\\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Linear]",
+            "  BoundedWindowAggExec: wdw=[count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Ok(Field { name: \"count([Column { name: \\\"sn\\\", index: 0 }]) PARTITION BY: [[Column { name: \\\"hash\\\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \\\"sn\\\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Linear]",
             "    StreamingTableExec: partition_sizes=1, projection=[sn, hash], infinite_source=true, output_ordering=[sn@0 ASC NULLS LAST]",
         ];
 
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 154beb79f729e..f938f4410a992 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -46,6 +46,7 @@ use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilde
 use itertools::Itertools;
 
 mod bounded_window_agg_exec;
+mod utils;
 mod window_agg_exec;
 
 pub use bounded_window_agg_exec::BoundedWindowAggExec;
diff --git a/datafusion/physical-plan/src/windows/utils.rs b/datafusion/physical-plan/src/windows/utils.rs
new file mode 100644
index 0000000000000..3cf92daae0fb2
--- /dev/null
+++ b/datafusion/physical-plan/src/windows/utils.rs
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_schema::{Schema, SchemaBuilder};
+use datafusion_common::Result;
+use datafusion_physical_expr::window::WindowExpr;
+use std::sync::Arc;
+
+pub(crate) fn create_schema(
+    input_schema: &Schema,
+    window_expr: &[Arc<dyn WindowExpr>],
+) -> Result<Schema> {
+    let capacity = input_schema.fields().len() + window_expr.len();
+    let mut builder = SchemaBuilder::with_capacity(capacity);
+    builder.extend(input_schema.fields().iter().cloned());
+    // append results to the schema
+    for expr in window_expr {
+        builder.push(expr.field()?);
+    }
+    Ok(builder.finish())
+}
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index 1d5c6061a0f97..d2f7090fca170 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -22,6 +22,7 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
+use super::utils::create_schema;
 use crate::expressions::PhysicalSortExpr;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::windows::{
@@ -33,10 +34,9 @@ use crate::{
     ExecutionPlan, ExecutionPlanProperties, PhysicalExpr, PlanProperties,
     RecordBatchStream, SendableRecordBatchStream, Statistics, WindowExpr,
 };
-
 use arrow::array::ArrayRef;
 use arrow::compute::{concat, concat_batches};
-use arrow::datatypes::{Schema, SchemaBuilder, SchemaRef};
+use arrow::datatypes::SchemaRef;
 use arrow::error::ArrowError;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::stats::Precision;
@@ -44,7 +44,6 @@ use datafusion_common::utils::{evaluate_partition_ranges, transpose};
 use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::PhysicalSortRequirement;
-
 use futures::{ready, Stream, StreamExt};
 
 /// Window execution plan
@@ -265,20 +264,6 @@ impl ExecutionPlan for WindowAggExec {
     }
 }
 
-fn create_schema(
-    input_schema: &Schema,
-    window_expr: &[Arc<dyn WindowExpr>],
-) -> Result<Schema> {
-    let capacity = input_schema.fields().len() + window_expr.len();
-    let mut builder = SchemaBuilder::with_capacity(capacity);
-    builder.extend(input_schema.fields().iter().cloned());
-    // append results to the schema
-    for expr in window_expr {
-        builder.push(expr.field()?);
-    }
-    Ok(builder.finish())
-}
-
 /// Compute the window aggregate columns
 fn compute_window_aggregates(
     window_expr: &[Arc<dyn WindowExpr>],
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index f42dec40149ff..4e0ce33f1334d 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -92,6 +92,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         // having and group by clause may reference aliases defined in select projection
         let projected_plan = self.project(base_plan.clone(), select_exprs.clone())?;
+
         // Place the fields of the base plan at the front so that when there are references
         // with the same name, the fields of the base plan will be searched first.
         // See https://github.com/apache/datafusion/issues/9162
@@ -288,9 +289,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             plan
         };
 
-        let plan = self.order_by(plan, order_by_rex)?;
-
-        Ok(plan)
+        self.order_by(plan, order_by_rex)
     }
 
     /// Try converting Expr(Unnest(Expr)) to Projection/Unnest/Projection
@@ -519,8 +518,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     planner_context.set_outer_from_schema(left_schema);
                 }
                 planner_context.set_outer_from_schema(old_outer_from_schema);
-
-                Ok(left.build()?)
+                left.build()
             }
         }
     }
diff --git a/datafusion/sqllogictest/test_files/count_star_rule.slt b/datafusion/sqllogictest/test_files/count_star_rule.slt
index b552e6053769a..3625da68b39ee 100644
--- a/datafusion/sqllogictest/test_files/count_star_rule.slt
+++ b/datafusion/sqllogictest/test_files/count_star_rule.slt
@@ -85,7 +85,7 @@ logical_plan
 03)----TableScan: t1 projection=[a]
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as count_a]
-02)--WindowAggExec: wdw=[count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt
index 9115cb5325408..230ea4d98fc3a 100644
--- a/datafusion/sqllogictest/test_files/insert.slt
+++ b/datafusion/sqllogictest/test_files/insert.slt
@@ -68,7 +68,7 @@ physical_plan
 02)--ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -128,7 +128,7 @@ physical_plan
 01)DataSinkExec: sink=MemoryTable (partitions=1)
 02)--CoalescePartitionsExec
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=8192
 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -179,7 +179,7 @@ physical_plan
 02)--ProjectionExec: expr=[a1@0 as a1, a2@1 as a2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt
index 8f6bafd92e419..c40f62c3ba801 100644
--- a/datafusion/sqllogictest/test_files/insert_to_external.slt
+++ b/datafusion/sqllogictest/test_files/insert_to_external.slt
@@ -357,7 +357,7 @@ physical_plan
 02)--ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -418,7 +418,7 @@ physical_plan
 01)DataSinkExec: sink=ParquetSink(file_groups=[])
 02)--CoalescePartitionsExec
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=8192
 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index e1fd5eb726f1d..d2c013373d0c1 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -150,6 +150,21 @@ GROUP BY c1
 2 2
 3 3
 
+# This test goes through schema check in aggregate plan, if count's nullable is not matched, this test failed
+query II rowsort
+SELECT c1, SUM(c2) FROM (
+    SELECT 1 as c1, 1::int as c2
+    UNION
+    SELECT 2 as c1, 2::int as c2
+    UNION
+    SELECT 3 as c1, count(1) as c2
+) as a
+GROUP BY c1
+----
+1 1
+2 2
+3 1
+
 # union_all_with_count
 statement ok
 CREATE table t as SELECT 1 as a
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index af882c3a404a7..78055f8c1c11b 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -1311,7 +1311,7 @@ logical_plan
 05)--------TableScan: aggregate_test_100 projection=[c1, c2, c4]
 physical_plan
 01)ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@2 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-02)--BoundedWindowAggExec: wdw=[count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 03)----SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 04)------CoalesceBatchesExec: target_batch_size=4096
 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -2558,10 +2558,10 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, sum3@2 as sum3, min1@3 as min1, min2@4 as min2, min3@5 as min3, max1@6 as max1, max2@7 as max2, max3@8 as max3, cnt1@9 as cnt1, cnt2@10 as cnt2, sumr1@11 as sumr1, sumr2@12 as sumr2, sumr3@13 as sumr3, minr1@14 as minr1, minr2@15 as minr2, minr3@16 as minr3, maxr1@17 as maxr1, maxr2@18 as maxr2, maxr3@19 as maxr3, cntr1@20 as cntr1, cntr2@21 as cntr2, sum4@22 as sum4, cnt3@23 as cnt3]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@24 DESC], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@14 as sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@15 as sum3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as min1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as min2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as min3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as max1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as max2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as max3, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@22 as cnt1, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@23 as cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@2 as sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@3 as sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sumr3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as minr1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@6 as minr2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@7 as minr3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as maxr1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as maxr2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as maxr3, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@11 as cntr1, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@12 as cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as sum4, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@25 as cnt3, inc_col@1 as inc_col]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(8)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(8)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(2)), end_bound: Following(Int32(6)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(8)), is_causal: false }], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(8)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(8)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(2)), end_bound: Following(Int32(6)), is_causal: false }, count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(8)), is_causal: false }], mode=[Sorted]
 08)--------------ProjectionExec: expr=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col, desc_col@2 as desc_col]
 09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
@@ -2716,8 +2716,8 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, min1@2 as min1, min2@3 as min2, max1@4 as max1, max2@5 as max2, count1@6 as count1, count2@7 as count2, avg1@8 as avg1, avg2@9 as avg2]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@10 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }], mode=[Sorted]
 06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col]
 07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
@@ -2813,8 +2813,8 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, count1@2 as count1, count2@3 as count2]
 02)--ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2, ts@1 as ts]
 03)----GlobalLimitExec: skip=0, fetch=5
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted]
 06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col]
 07)------------StreamingTableExec: partition_sizes=1, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST]
 
@@ -2859,8 +2859,8 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, count1@2 as count1, count2@3 as count2]
 02)--ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2, ts@1 as ts]
 03)----GlobalLimitExec: skip=0, fetch=5
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted]
 06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col]
 07)------------StreamingTableExec: partition_sizes=1, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST]
 
@@ -4094,7 +4094,7 @@ logical_plan
 04)------TableScan: a projection=[a]
 physical_plan
 01)ProjectionExec: expr=[count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--BoundedWindowAggExec: wdw=[count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1

From 846befb6a620d3b8c0c7ff01be7c35c45fb72360 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Tue, 20 Aug 2024 19:53:11 -0600
Subject: [PATCH 350/357] minor: Remove unused create_row_hashes (#12083)

* remove redundant loop

* remove redundant loop

* remove unused methods
---
 datafusion/common/src/hash_utils.rs | 65 -----------------------------
 1 file changed, 65 deletions(-)

diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs
index f57ec0152e3fd..f3d2a0a4f9ab3 100644
--- a/datafusion/common/src/hash_utils.rs
+++ b/datafusion/common/src/hash_utils.rs
@@ -23,7 +23,6 @@ use std::sync::Arc;
 use ahash::RandomState;
 use arrow::array::*;
 use arrow::datatypes::*;
-use arrow::row::Rows;
 #[cfg(not(feature = "force_hash_collisions"))]
 use arrow::{downcast_dictionary_array, downcast_primitive_array};
 use arrow_buffer::IntervalDayTime;
@@ -363,38 +362,6 @@ pub fn create_hashes<'a>(
     Ok(hashes_buffer)
 }
 
-/// Test version of `create_row_hashes` that produces the same value for
-/// all hashes (to test collisions)
-///
-/// See comments on `hashes_buffer` for more details
-#[cfg(feature = "force_hash_collisions")]
-pub fn create_row_hashes<'a>(
-    _rows: &[Vec<u8>],
-    _random_state: &RandomState,
-    hashes_buffer: &'a mut Vec<u64>,
-) -> Result<&'a mut Vec<u64>> {
-    for hash in hashes_buffer.iter_mut() {
-        *hash = 0
-    }
-    Ok(hashes_buffer)
-}
-
-/// Creates hash values for every row, based on their raw bytes.
-#[cfg(not(feature = "force_hash_collisions"))]
-pub fn create_row_hashes<'a>(
-    rows: &[Vec<u8>],
-    random_state: &RandomState,
-    hashes_buffer: &'a mut Vec<u64>,
-) -> Result<&'a mut Vec<u64>> {
-    for hash in hashes_buffer.iter_mut() {
-        *hash = 0
-    }
-    for (i, hash) in hashes_buffer.iter_mut().enumerate() {
-        *hash = random_state.hash_one(&rows[i]);
-    }
-    Ok(hashes_buffer)
-}
-
 /// Creates hash values for every row, based on the values in the
 /// columns.
 ///
@@ -468,38 +435,6 @@ pub fn create_hashes<'a>(
     Ok(hashes_buffer)
 }
 
-/// Test version of `create_row_hashes_v2` that produces the same value for
-/// all hashes (to test collisions)
-///
-/// See comments on `hashes_buffer` for more details
-#[cfg(feature = "force_hash_collisions")]
-pub fn create_row_hashes_v2<'a>(
-    _rows: &Rows,
-    _random_state: &RandomState,
-    hashes_buffer: &'a mut Vec<u64>,
-) -> Result<&'a mut Vec<u64>> {
-    for hash in hashes_buffer.iter_mut() {
-        *hash = 0
-    }
-    Ok(hashes_buffer)
-}
-
-/// Creates hash values for every row, based on their raw bytes.
-#[cfg(not(feature = "force_hash_collisions"))]
-pub fn create_row_hashes_v2<'a>(
-    rows: &Rows,
-    random_state: &RandomState,
-    hashes_buffer: &'a mut Vec<u64>,
-) -> Result<&'a mut Vec<u64>> {
-    for hash in hashes_buffer.iter_mut() {
-        *hash = 0
-    }
-    for (i, hash) in hashes_buffer.iter_mut().enumerate() {
-        *hash = random_state.hash_one(rows.row(i));
-    }
-    Ok(hashes_buffer)
-}
-
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;

From 1c7209b280ba0d8a7faa6a4a63ff5bc52a6fd9bc Mon Sep 17 00:00:00 2001
From: Georgi Krastev <georgi.krastev@coralogix.com>
Date: Wed, 21 Aug 2024 18:19:03 +0300
Subject: [PATCH 351/357] Use `schema_name` to create the `physical_name`
 (#11977)

More consistency and less opportunity for column name mismatch.
---
 datafusion/core/src/physical_planner.rs       |  13 +-
 datafusion/expr/src/expr.rs                   | 272 +-----------------
 .../src/aggregate.rs                          |   4 +-
 .../physical-plan/src/aggregates/mod.rs       |   1 +
 4 files changed, 17 insertions(+), 273 deletions(-)

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 6536f9a01439f..8d6c5089fa34d 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -73,8 +73,7 @@ use datafusion_common::{
 };
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
-    self, create_function_physical_name, physical_name, AggregateFunction, Alias,
-    GroupingSet, WindowFunction,
+    self, physical_name, AggregateFunction, Alias, GroupingSet, WindowFunction,
 };
 use datafusion_expr::expr_rewriter::unnormalize_cols;
 use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
@@ -1569,12 +1568,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
             let name = if let Some(name) = name {
                 name
             } else {
-                create_function_physical_name(
-                    func.name(),
-                    *distinct,
-                    args,
-                    order_by.as_ref(),
-                )?
+                physical_name(e)?
             };
 
             let physical_args =
@@ -1588,8 +1582,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                 None => None,
             };
 
-            let ignore_nulls = null_treatment
-                .unwrap_or(sqlparser::ast::NullTreatment::RespectNulls)
+            let ignore_nulls = null_treatment.unwrap_or(NullTreatment::RespectNulls)
                 == NullTreatment::IgnoreNulls;
 
             let (agg_expr, filter, order_by) = {
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 88939ccf41b8c..85ba80396c8e8 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -38,8 +38,7 @@ use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
 use datafusion_common::{
-    internal_err, not_impl_err, plan_err, Column, DFSchema, Result, ScalarValue,
-    TableReference,
+    plan_err, Column, DFSchema, Result, ScalarValue, TableReference,
 };
 use sqlparser::ast::{
     display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
@@ -1082,7 +1081,7 @@ impl Expr {
     /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
     /// [`Schema`] will have a field with this name.
     ///
-    /// Note that the resulting string is subtlety different than the `Display`
+    /// Note that the resulting string is subtlety different from the `Display`
     /// representation for certain `Expr`. Some differences:
     ///
     /// 1. [`Expr::Alias`], which shows only the alias itself
@@ -1104,6 +1103,7 @@ impl Expr {
     }
 
     /// Returns a full and complete string representation of this expression.
+    #[deprecated(note = "use format! instead")]
     pub fn canonical_name(&self) -> String {
         format!("{self}")
     }
@@ -2386,263 +2386,13 @@ fn fmt_function(
     write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
 }
 
-pub fn create_function_physical_name(
-    fun: &str,
-    distinct: bool,
-    args: &[Expr],
-    order_by: Option<&Vec<Expr>>,
-) -> Result<String> {
-    let names: Vec<String> = args
-        .iter()
-        .map(|e| create_physical_name(e, false))
-        .collect::<Result<_>>()?;
-
-    let distinct_str = match distinct {
-        true => "DISTINCT ",
-        false => "",
-    };
-
-    let phys_name = format!("{}({}{})", fun, distinct_str, names.join(","));
-
-    Ok(order_by
-        .map(|order_by| format!("{} ORDER BY [{}]", phys_name, expr_vec_fmt!(order_by)))
-        .unwrap_or(phys_name))
-}
-
-pub fn physical_name(e: &Expr) -> Result<String> {
-    create_physical_name(e, true)
-}
-
-fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
-    match e {
-        Expr::Unnest(_) => {
-            internal_err!(
-                "Expr::Unnest should have been converted to LogicalPlan::Unnest"
-            )
-        }
-        Expr::Column(c) => {
-            if is_first_expr {
-                Ok(c.name.clone())
-            } else {
-                Ok(c.flat_name())
-            }
-        }
-        Expr::Alias(Alias { name, .. }) => Ok(name.clone()),
-        Expr::ScalarVariable(_, variable_names) => Ok(variable_names.join(".")),
-        Expr::Literal(value) => Ok(format!("{value:?}")),
-        Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
-            let left = create_physical_name(left, false)?;
-            let right = create_physical_name(right, false)?;
-            Ok(format!("{left} {op} {right}"))
-        }
-        Expr::Case(case) => {
-            let mut name = "CASE ".to_string();
-            if let Some(e) = &case.expr {
-                let _ = write!(name, "{} ", create_physical_name(e, false)?);
-            }
-            for (w, t) in &case.when_then_expr {
-                let _ = write!(
-                    name,
-                    "WHEN {} THEN {} ",
-                    create_physical_name(w, false)?,
-                    create_physical_name(t, false)?
-                );
-            }
-            if let Some(e) = &case.else_expr {
-                let _ = write!(name, "ELSE {} ", create_physical_name(e, false)?);
-            }
-            name += "END";
-            Ok(name)
-        }
-        Expr::Cast(Cast { expr, .. }) => {
-            // CAST does not change the expression name
-            create_physical_name(expr, false)
-        }
-        Expr::TryCast(TryCast { expr, .. }) => {
-            // CAST does not change the expression name
-            create_physical_name(expr, false)
-        }
-        Expr::Not(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("NOT {expr}"))
-        }
-        Expr::Negative(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("(- {expr})"))
-        }
-        Expr::IsNull(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NULL"))
-        }
-        Expr::IsNotNull(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT NULL"))
-        }
-        Expr::IsTrue(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS TRUE"))
-        }
-        Expr::IsFalse(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS FALSE"))
-        }
-        Expr::IsUnknown(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS UNKNOWN"))
-        }
-        Expr::IsNotTrue(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT TRUE"))
-        }
-        Expr::IsNotFalse(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT FALSE"))
-        }
-        Expr::IsNotUnknown(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT UNKNOWN"))
-        }
-        Expr::ScalarFunction(fun) => fun.func.schema_name(&fun.args),
-        Expr::WindowFunction(WindowFunction {
-            fun,
-            args,
-            order_by,
-            ..
-        }) => {
-            create_function_physical_name(&fun.to_string(), false, args, Some(order_by))
-        }
-        Expr::AggregateFunction(AggregateFunction {
-            func,
-            distinct,
-            args,
-            filter: _,
-            order_by,
-            null_treatment: _,
-        }) => {
-            create_function_physical_name(func.name(), *distinct, args, order_by.as_ref())
-        }
-        Expr::GroupingSet(grouping_set) => match grouping_set {
-            GroupingSet::Rollup(exprs) => Ok(format!(
-                "ROLLUP ({})",
-                exprs
-                    .iter()
-                    .map(|e| create_physical_name(e, false))
-                    .collect::<Result<Vec<_>>>()?
-                    .join(", ")
-            )),
-            GroupingSet::Cube(exprs) => Ok(format!(
-                "CUBE ({})",
-                exprs
-                    .iter()
-                    .map(|e| create_physical_name(e, false))
-                    .collect::<Result<Vec<_>>>()?
-                    .join(", ")
-            )),
-            GroupingSet::GroupingSets(lists_of_exprs) => {
-                let mut strings = vec![];
-                for exprs in lists_of_exprs {
-                    let exprs_str = exprs
-                        .iter()
-                        .map(|e| create_physical_name(e, false))
-                        .collect::<Result<Vec<_>>>()?
-                        .join(", ");
-                    strings.push(format!("({exprs_str})"));
-                }
-                Ok(format!("GROUPING SETS ({})", strings.join(", ")))
-            }
-        },
-
-        Expr::InList(InList {
-            expr,
-            list,
-            negated,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let list = list.iter().map(|expr| create_physical_name(expr, false));
-            if *negated {
-                Ok(format!("{expr} NOT IN ({list:?})"))
-            } else {
-                Ok(format!("{expr} IN ({list:?})"))
-            }
-        }
-        Expr::Exists { .. } => {
-            not_impl_err!("EXISTS is not yet supported in the physical plan")
-        }
-        Expr::InSubquery(_) => {
-            not_impl_err!("IN subquery is not yet supported in the physical plan")
-        }
-        Expr::ScalarSubquery(_) => {
-            not_impl_err!("Scalar subqueries are not yet supported in the physical plan")
-        }
-        Expr::Between(Between {
-            expr,
-            negated,
-            low,
-            high,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let low = create_physical_name(low, false)?;
-            let high = create_physical_name(high, false)?;
-            if *negated {
-                Ok(format!("{expr} NOT BETWEEN {low} AND {high}"))
-            } else {
-                Ok(format!("{expr} BETWEEN {low} AND {high}"))
-            }
-        }
-        Expr::Like(Like {
-            negated,
-            expr,
-            pattern,
-            escape_char,
-            case_insensitive,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let pattern = create_physical_name(pattern, false)?;
-            let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
-            let escape = if let Some(char) = escape_char {
-                format!("CHAR '{char}'")
-            } else {
-                "".to_string()
-            };
-            if *negated {
-                Ok(format!("{expr} NOT {op_name} {pattern}{escape}"))
-            } else {
-                Ok(format!("{expr} {op_name} {pattern}{escape}"))
-            }
-        }
-        Expr::SimilarTo(Like {
-            negated,
-            expr,
-            pattern,
-            escape_char,
-            case_insensitive: _,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let pattern = create_physical_name(pattern, false)?;
-            let escape = if let Some(char) = escape_char {
-                format!("CHAR '{char}'")
-            } else {
-                "".to_string()
-            };
-            if *negated {
-                Ok(format!("{expr} NOT SIMILAR TO {pattern}{escape}"))
-            } else {
-                Ok(format!("{expr} SIMILAR TO {pattern}{escape}"))
-            }
-        }
-        Expr::Sort { .. } => {
-            internal_err!("Create physical name does not support sort expression")
-        }
-        Expr::Wildcard { qualifier, options } => match qualifier {
-            Some(qualifier) => Ok(format!("{}.*{}", qualifier, options)),
-            None => Ok(format!("*{}", options)),
-        },
-        Expr::Placeholder(_) => {
-            internal_err!("Create physical name does not support placeholder")
-        }
-        Expr::OuterReferenceColumn(_, _) => {
-            internal_err!("Create physical name does not support OuterReferenceColumn")
-        }
+/// The name of the column (field) that this `Expr` will produce in the physical plan.
+/// The difference from [Expr::schema_name] is that top-level columns are unqualified.
+pub fn physical_name(expr: &Expr) -> Result<String> {
+    if let Expr::Column(col) = expr {
+        Ok(col.name.clone())
+    } else {
+        Ok(expr.schema_name().to_string())
     }
 }
 
@@ -2658,6 +2408,7 @@ mod test {
     use std::any::Any;
 
     #[test]
+    #[allow(deprecated)]
     fn format_case_when() -> Result<()> {
         let expr = case(col("a"))
             .when(lit(1), lit(true))
@@ -2670,6 +2421,7 @@ mod test {
     }
 
     #[test]
+    #[allow(deprecated)]
     fn format_cast() -> Result<()> {
         let expr = Expr::Cast(Cast {
             expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)))),
diff --git a/datafusion/physical-expr-functions-aggregate/src/aggregate.rs b/datafusion/physical-expr-functions-aggregate/src/aggregate.rs
index aa1d1999a3395..fd986e00a7ef3 100644
--- a/datafusion/physical-expr-functions-aggregate/src/aggregate.rs
+++ b/datafusion/physical-expr-functions-aggregate/src/aggregate.rs
@@ -18,7 +18,6 @@
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::ScalarValue;
 use datafusion_common::{internal_err, not_impl_err, Result};
-use datafusion_expr::expr::create_function_physical_name;
 use datafusion_expr::AggregateUDF;
 use datafusion_expr::ReversedUDAF;
 use datafusion_expr_common::accumulator::Accumulator;
@@ -112,8 +111,7 @@ impl AggregateExprBuilder {
         let data_type = fun.return_type(&input_exprs_types)?;
         let is_nullable = fun.is_nullable();
         let name = match alias {
-            // TODO: Ideally, we should build the name from physical expressions
-            None => create_function_physical_name(fun.name(), is_distinct, &[], None)?,
+            None => return internal_err!("alias should be provided"),
             Some(alias) => alias,
         };
 
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 89d4c452cca65..5aa255e7c341a 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -2179,6 +2179,7 @@ mod tests {
             .map(|order_by_expr| {
                 let ordering_req = order_by_expr.unwrap_or_default();
                 AggregateExprBuilder::new(array_agg_udaf(), vec![Arc::clone(col_a)])
+                    .alias("a")
                     .order_by(ordering_req.to_vec())
                     .schema(Arc::clone(&test_schema))
                     .build()

From 78f58c80476ef2d2b10f4551230db4a610a9a32d Mon Sep 17 00:00:00 2001
From: JC <1950050+jc4x4@users.noreply.github.com>
Date: Wed, 21 Aug 2024 23:21:52 +0800
Subject: [PATCH 352/357] Add new user doc to translate logical plan to
 physical plan (#12026)

* Add new user doc to translate logical plan to physical plan

https://github.com/apache/datafusion/issues/7306

* prettier

* Run doc examples as part of cargo --doc

* Update first example to run

* Fix next example

* fix last example

* prettier

* clarify table source

* prettier

* Revert changes

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/core/src/lib.rs                    |   6 +
 datafusion/expr/src/logical_plan/mod.rs       |   2 +-
 .../building-logical-plans.md                 | 187 ++++++++++++------
 3 files changed, 139 insertions(+), 56 deletions(-)

diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index daeb21db9d05c..735a381586ad1 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -678,6 +678,12 @@ doc_comment::doctest!(
     library_user_guide_sql_api
 );
 
+#[cfg(doctest)]
+doc_comment::doctest!(
+    "../../../docs/source/library-user-guide/building-logical-plans.md",
+    library_user_guide_logical_plans
+);
+
 #[cfg(doctest)]
 doc_comment::doctest!(
     "../../../docs/source/library-user-guide/using-the-dataframe-api.md",
diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs
index b58208591920b..5b5a842fa4cf8 100644
--- a/datafusion/expr/src/logical_plan/mod.rs
+++ b/datafusion/expr/src/logical_plan/mod.rs
@@ -26,7 +26,7 @@ pub mod tree_node;
 
 pub use builder::{
     build_join_schema, table_scan, union, wrap_projection_for_join_if_necessary,
-    LogicalPlanBuilder, UNNAMED_TABLE,
+    LogicalPlanBuilder, LogicalTableSource, UNNAMED_TABLE,
 };
 pub use ddl::{
     CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction,
diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md
index fe922d8eaeb11..556deb02e9800 100644
--- a/docs/source/library-user-guide/building-logical-plans.md
+++ b/docs/source/library-user-guide/building-logical-plans.md
@@ -31,44 +31,52 @@ explained in more detail in the [Query Planning and Execution Overview] section
 DataFusion's [LogicalPlan] is an enum containing variants representing all the supported operators, and also
 contains an `Extension` variant that allows projects building on DataFusion to add custom logical operators.
 
-It is possible to create logical plans by directly creating instances of the [LogicalPlan] enum as follows, but is is
+It is possible to create logical plans by directly creating instances of the [LogicalPlan] enum as shown, but it is
 much easier to use the [LogicalPlanBuilder], which is described in the next section.
 
 Here is an example of building a logical plan directly:
 
-<!-- source for this example is in datafusion_docs::library_logical_plan::plan_1 -->
-
 ```rust
-// create a logical table source
-let schema = Schema::new(vec![
-    Field::new("id", DataType::Int32, true),
-    Field::new("name", DataType::Utf8, true),
-]);
-let table_source = LogicalTableSource::new(SchemaRef::new(schema));
-
-// create a TableScan plan
-let projection = None; // optional projection
-let filters = vec![]; // optional filters to push down
-let fetch = None; // optional LIMIT
-let table_scan = LogicalPlan::TableScan(TableScan::try_new(
-    "person",
-    Arc::new(table_source),
-    projection,
-    filters,
-    fetch,
-)?);
-
-// create a Filter plan that evaluates `id > 500` that wraps the TableScan
-let filter_expr = col("id").gt(lit(500));
-let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?);
-
-// print the plan
-println!("{}", plan.display_indent_schema());
+use datafusion::common::DataFusionError;
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::logical_expr::{Filter, LogicalPlan, TableScan, LogicalTableSource};
+use datafusion::prelude::*;
+use std::sync::Arc;
+
+fn main() -> Result<(), DataFusionError> {
+    // create a logical table source
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, true),
+        Field::new("name", DataType::Utf8, true),
+    ]);
+    let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+    // create a TableScan plan
+    let projection = None; // optional projection
+    let filters = vec![]; // optional filters to push down
+    let fetch = None; // optional LIMIT
+    let table_scan = LogicalPlan::TableScan(TableScan::try_new(
+        "person",
+        Arc::new(table_source),
+        projection,
+        filters,
+        fetch,
+        )?
+    );
+
+    // create a Filter plan that evaluates `id > 500` that wraps the TableScan
+    let filter_expr = col("id").gt(lit(500));
+    let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan)) ? );
+
+    // print the plan
+    println!("{}", plan.display_indent_schema());
+    Ok(())
+}
 ```
 
 This example produces the following plan:
 
-```
+```text
 Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N]
   TableScan: person [id:Int32;N, name:Utf8;N]
 ```
@@ -78,7 +86,7 @@ Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N]
 DataFusion logical plans can be created using the [LogicalPlanBuilder] struct. There is also a [DataFrame] API which is
 a higher-level API that delegates to [LogicalPlanBuilder].
 
-The following associated functions can be used to create a new builder:
+There are several functions that can can be used to create a new builder, such as
 
 - `empty` - create an empty plan with no fields
 - `values` - create a plan from a set of literal values
@@ -102,41 +110,107 @@ The following example demonstrates building the same simple query plan as the pr
 <!-- source for this example is in datafusion_docs::library_logical_plan::plan_builder_1 -->
 
 ```rust
-// create a logical table source
-let schema = Schema::new(vec![
-    Field::new("id", DataType::Int32, true),
-    Field::new("name", DataType::Utf8, true),
-]);
-let table_source = LogicalTableSource::new(SchemaRef::new(schema));
-
-// optional projection
-let projection = None;
-
-// create a LogicalPlanBuilder for a table scan
-let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?;
-
-// perform a filter operation and build the plan
-let plan = builder
-    .filter(col("id").gt(lit(500)))? // WHERE id > 500
-    .build()?;
-
-// print the plan
-println!("{}", plan.display_indent_schema());
+use datafusion::common::DataFusionError;
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::logical_expr::{LogicalPlanBuilder, LogicalTableSource};
+use datafusion::prelude::*;
+use std::sync::Arc;
+
+fn main() -> Result<(), DataFusionError> {
+    // create a logical table source
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, true),
+        Field::new("name", DataType::Utf8, true),
+    ]);
+    let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+    // optional projection
+    let projection = None;
+
+    // create a LogicalPlanBuilder for a table scan
+    let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?;
+
+    // perform a filter operation and build the plan
+    let plan = builder
+        .filter(col("id").gt(lit(500)))? // WHERE id > 500
+        .build()?;
+
+    // print the plan
+    println!("{}", plan.display_indent_schema());
+    Ok(())
+}
 ```
 
 This example produces the following plan:
 
-```
+```text
 Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N]
   TableScan: person [id:Int32;N, name:Utf8;N]
 ```
 
+## Translating Logical Plan to Physical Plan
+
+Logical plans can not be directly executed. They must be "compiled" into an
+[`ExecutionPlan`], which is often referred to as a "physical plan".
+
+Compared to `LogicalPlan`s `ExecutionPlans` have many more details such as
+specific algorithms and detailed optimizations compared to. Given a
+`LogicalPlan` the easiest way to create an `ExecutionPlan` is using
+[`SessionState::create_physical_plan`] as shown below
+
+```rust
+use datafusion::datasource::{provider_as_source, MemTable};
+use datafusion::common::DataFusionError;
+use datafusion::physical_plan::display::DisplayableExecutionPlan;
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::logical_expr::{LogicalPlanBuilder, LogicalTableSource};
+use datafusion::prelude::*;
+use std::sync::Arc;
+
+// Creating physical plans may access remote catalogs and data sources
+// thus it must be run with an async runtime.
+#[tokio::main]
+async fn main() -> Result<(), DataFusionError> {
+
+    // create a default table source
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, true),
+        Field::new("name", DataType::Utf8, true),
+    ]);
+    // To create an ExecutionPlan we must provide an actual
+    // TableProvider. For this example, we don't provide any data
+    // but in production code, this would have `RecordBatch`es with
+    // in memory data
+    let table_provider = Arc::new(MemTable::try_new(Arc::new(schema), vec![])?);
+    // Use the provider_as_source function to convert the TableProvider to a table source
+    let table_source = provider_as_source(table_provider);
+
+    // create a LogicalPlanBuilder for a table scan without projection or filters
+    let logical_plan = LogicalPlanBuilder::scan("person", table_source, None)?.build()?;
+
+    // Now create the physical plan by calling `create_physical_plan`
+    let ctx = SessionContext::new();
+    let physical_plan = ctx.state().create_physical_plan(&logical_plan).await?;
+
+    // print the plan
+    println!("{}", DisplayableExecutionPlan::new(physical_plan.as_ref()).indent(true));
+    Ok(())
+}
+```
+
+This example produces the following physical plan:
+
+```text
+MemoryExec: partitions=0, partition_sizes=[]
+```
+
 ## Table Sources
 
-The previous example used a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also
-suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical planner. However, if you
-want to use a [TableSource] that can be executed in DataFusion then you will need to use [DefaultTableSource], which is a
-wrapper for a [TableProvider].
+The previous examples use a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also
+suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical planner.
+
+However, it is more common to use a [TableProvider]. To get a [TableSource] from a
+[TableProvider], use [provider_as_source] or [DefaultTableSource].
 
 [query planning and execution overview]: https://docs.rs/datafusion/latest/datafusion/index.html#query-planning-and-execution-overview
 [architecture guide]: https://docs.rs/datafusion/latest/datafusion/index.html#architecture
@@ -145,5 +219,8 @@ wrapper for a [TableProvider].
 [dataframe]: using-the-dataframe-api.md
 [logicaltablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html
 [defaulttablesource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html
+[provider_as_source]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/fn.provider_as_source.html
 [tableprovider]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html
 [tablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html
+[`executionplan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html
+[`sessionstate::create_physical_plan`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html#method.create_physical_plan

From 7eeac2f5c25c8bf606e463172916004b9d645da7 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Wed, 21 Aug 2024 08:23:28 -0700
Subject: [PATCH 353/357] Improve rpad udf by using a GenericStringBuilder
 (#12070)

* Improve rpad udf by using a GenericStringBuilder

* fix format

* refine code
---
 datafusion/functions/benches/pad.rs      |  11 +-
 datafusion/functions/src/unicode/rpad.rs | 333 ++++++++++++-----------
 2 files changed, 180 insertions(+), 164 deletions(-)

diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs
index 5ff1e2fb860d4..0c496bc633477 100644
--- a/datafusion/functions/benches/pad.rs
+++ b/datafusion/functions/benches/pad.rs
@@ -127,11 +127,12 @@ fn criterion_benchmark(c: &mut Criterion) {
         group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| {
             b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
         });
-        //
-        // let args = create_args::<i32>(size, 32, true);
-        // group.bench_function(BenchmarkId::new("stringview type", size), |b| {
-        //     b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
-        // });
+
+        // rpad for stringview type
+        let args = create_args::<i32>(size, 32, true);
+        group.bench_function(BenchmarkId::new("stringview type", size), |b| {
+            b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
+        });
 
         group.finish();
     }
diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs
index 4bcf102c8793d..c1d6f327928f2 100644
--- a/datafusion/functions/src/unicode/rpad.rs
+++ b/datafusion/functions/src/unicode/rpad.rs
@@ -15,20 +15,23 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
-use arrow::datatypes::DataType;
-use datafusion_common::cast::{
-    as_generic_string_array, as_int64_array, as_string_view_array,
-};
-use unicode_segmentation::UnicodeSegmentation;
-
+use crate::string::common::StringArrayType;
 use crate::utils::{make_scalar_function, utf8_to_str_type};
+use arrow::array::{
+    ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
+    OffsetSizeTrait, StringViewArray,
+};
+use arrow::datatypes::DataType;
+use datafusion_common::cast::as_int64_array;
+use datafusion_common::DataFusionError;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::fmt::Write;
+use std::sync::Arc;
+use unicode_segmentation::UnicodeSegmentation;
+use DataType::{LargeUtf8, Utf8, Utf8View};
 
 #[derive(Debug)]
 pub struct RPadFunc {
@@ -84,170 +87,182 @@ impl ScalarUDFImpl for RPadFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args.len() {
-            2 => match args[0].data_type() {
-                DataType::Utf8 | DataType::Utf8View => {
-                    make_scalar_function(rpad::<i32, i32>, vec![])(args)
-                }
-                DataType::LargeUtf8 => {
-                    make_scalar_function(rpad::<i64, i64>, vec![])(args)
-                }
-                other => exec_err!("Unsupported data type {other:?} for function rpad"),
-            },
-            3 => match (args[0].data_type(), args[2].data_type()) {
-                (
-                    DataType::Utf8 | DataType::Utf8View,
-                    DataType::Utf8 | DataType::Utf8View,
-                ) => make_scalar_function(rpad::<i32, i32>, vec![])(args),
-                (DataType::LargeUtf8, DataType::LargeUtf8) => {
-                    make_scalar_function(rpad::<i64, i64>, vec![])(args)
-                }
-                (DataType::LargeUtf8, DataType::Utf8View | DataType::Utf8) => {
-                    make_scalar_function(rpad::<i64, i32>, vec![])(args)
-                }
-                (DataType::Utf8View | DataType::Utf8, DataType::LargeUtf8) => {
-                    make_scalar_function(rpad::<i32, i64>, vec![])(args)
-                }
-                (first_type, last_type) => {
-                    exec_err!("unsupported arguments type for rpad, first argument type is {}, last argument type is {}", first_type, last_type)
-                }
-            },
-            number => {
-                exec_err!("unsupported arguments number {} for rpad", number)
+        match (
+            args.len(),
+            args[0].data_type(),
+            args.get(2).map(|arg| arg.data_type()),
+        ) {
+            (2, Utf8 | Utf8View, _) => {
+                make_scalar_function(rpad::<i32, i32>, vec![])(args)
+            }
+            (2, LargeUtf8, _) => make_scalar_function(rpad::<i64, i64>, vec![])(args),
+            (3, Utf8 | Utf8View, Some(Utf8 | Utf8View)) => {
+                make_scalar_function(rpad::<i32, i32>, vec![])(args)
+            }
+            (3, LargeUtf8, Some(LargeUtf8)) => {
+                make_scalar_function(rpad::<i64, i64>, vec![])(args)
+            }
+            (3, Utf8 | Utf8View, Some(LargeUtf8)) => {
+                make_scalar_function(rpad::<i32, i64>, vec![])(args)
+            }
+            (3, LargeUtf8, Some(Utf8 | Utf8View)) => {
+                make_scalar_function(rpad::<i64, i32>, vec![])(args)
+            }
+            (_, _, _) => {
+                exec_err!("Unsupported combination of data types for function rpad")
             }
         }
     }
 }
 
-macro_rules! process_rpad {
-    // For the two-argument case
-    ($string_array:expr, $length_array:expr) => {{
-        $string_array
-            .iter()
-            .zip($length_array.iter())
-            .map(|(string, length)| match (string, length) {
-                (Some(string), Some(length)) => {
-                    if length > i32::MAX as i64 {
-                        return exec_err!("rpad requested length {} too large", length);
-                    }
-
-                    let length = if length < 0 { 0 } else { length as usize };
-                    if length == 0 {
-                        Ok(Some("".to_string()))
-                    } else {
-                        let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                        if length < graphemes.len() {
-                            Ok(Some(graphemes[..length].concat()))
-                        } else {
-                            let mut s = string.to_string();
-                            s.push_str(" ".repeat(length - graphemes.len()).as_str());
-                            Ok(Some(s))
-                        }
-                    }
-                }
-                _ => Ok(None),
-            })
-            .collect::<Result<GenericStringArray<StringArrayLen>>>()
-    }};
-
-    // For the three-argument case
-    ($string_array:expr, $length_array:expr, $fill_array:expr) => {{
-        $string_array
-            .iter()
-            .zip($length_array.iter())
-            .zip($fill_array.iter())
-            .map(|((string, length), fill)| match (string, length, fill) {
-                (Some(string), Some(length), Some(fill)) => {
-                    if length > i32::MAX as i64 {
-                        return exec_err!("rpad requested length {} too large", length);
-                    }
-
-                    let length = if length < 0 { 0 } else { length as usize };
-                    let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                    let fill_chars = fill.chars().collect::<Vec<char>>();
+pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
+    args: &[ArrayRef],
+) -> Result<ArrayRef> {
+    if args.len() < 2 || args.len() > 3 {
+        return exec_err!(
+            "rpad was called with {} arguments. It requires 2 or 3 arguments.",
+            args.len()
+        );
+    }
 
-                    if length < graphemes.len() {
-                        Ok(Some(graphemes[..length].concat()))
-                    } else if fill_chars.is_empty() {
-                        Ok(Some(string.to_string()))
-                    } else {
-                        let mut s = string.to_string();
-                        let char_vector: Vec<char> = (0..length - graphemes.len())
-                            .map(|l| fill_chars[l % fill_chars.len()])
-                            .collect();
-                        s.push_str(&char_vector.iter().collect::<String>());
-                        Ok(Some(s))
-                    }
-                }
-                _ => Ok(None),
-            })
-            .collect::<Result<GenericStringArray<StringArrayLen>>>()
-    }};
+    let length_array = as_int64_array(&args[1])?;
+    match (
+        args.len(),
+        args[0].data_type(),
+        args.get(2).map(|arg| arg.data_type()),
+    ) {
+        (2, Utf8View, _) => {
+            rpad_impl::<&StringViewArray, &StringViewArray, StringArrayLen>(
+                args[0].as_string_view(),
+                length_array,
+                None,
+            )
+        }
+        (3, Utf8View, Some(Utf8View)) => {
+            rpad_impl::<&StringViewArray, &StringViewArray, StringArrayLen>(
+                args[0].as_string_view(),
+                length_array,
+                Some(args[2].as_string_view()),
+            )
+        }
+        (3, Utf8View, Some(Utf8 | LargeUtf8)) => {
+            rpad_impl::<&StringViewArray, &GenericStringArray<FillArrayLen>, StringArrayLen>(
+                args[0].as_string_view(),
+                length_array,
+                Some(args[2].as_string::<FillArrayLen>()),
+            )
+        }
+        (3, Utf8 | LargeUtf8, Some(Utf8View)) => rpad_impl::<
+            &GenericStringArray<StringArrayLen>,
+            &StringViewArray,
+            StringArrayLen,
+        >(
+            args[0].as_string::<StringArrayLen>(),
+            length_array,
+            Some(args[2].as_string_view()),
+        ),
+        (_, _, _) => rpad_impl::<
+            &GenericStringArray<StringArrayLen>,
+            &GenericStringArray<FillArrayLen>,
+            StringArrayLen,
+        >(
+            args[0].as_string::<StringArrayLen>(),
+            length_array,
+            args.get(2).map(|arg| arg.as_string::<FillArrayLen>()),
+        ),
+    }
 }
 
 /// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated.
 /// rpad('hi', 5, 'xy') = 'hixyx'
-pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
-    args: &[ArrayRef],
-) -> Result<ArrayRef> {
-    match (args.len(), args[0].data_type()) {
-        (2, DataType::Utf8View) => {
-            let string_array = as_string_view_array(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
+pub fn rpad_impl<'a, StringArrType, FillArrType, StringArrayLen>(
+    string_array: StringArrType,
+    length_array: &Int64Array,
+    fill_array: Option<FillArrType>,
+) -> Result<ArrayRef>
+where
+    StringArrType: StringArrayType<'a>,
+    FillArrType: StringArrayType<'a>,
+    StringArrayLen: OffsetSizeTrait,
+{
+    let mut builder: GenericStringBuilder<StringArrayLen> = GenericStringBuilder::new();
 
-            let result = process_rpad!(string_array, length_array)?;
-            Ok(Arc::new(result) as ArrayRef)
+    match fill_array {
+        None => {
+            string_array.iter().zip(length_array.iter()).try_for_each(
+                |(string, length)| -> Result<(), DataFusionError> {
+                    match (string, length) {
+                        (Some(string), Some(length)) => {
+                            if length > i32::MAX as i64 {
+                                return exec_err!(
+                                    "rpad requested length {} too large",
+                                    length
+                                );
+                            }
+                            let length = if length < 0 { 0 } else { length as usize };
+                            if length == 0 {
+                                builder.append_value("");
+                            } else {
+                                let graphemes =
+                                    string.graphemes(true).collect::<Vec<&str>>();
+                                if length < graphemes.len() {
+                                    builder.append_value(graphemes[..length].concat());
+                                } else {
+                                    builder.write_str(string)?;
+                                    builder.write_str(
+                                        &" ".repeat(length - graphemes.len()),
+                                    )?;
+                                    builder.append_value("");
+                                }
+                            }
+                        }
+                        _ => builder.append_null(),
+                    }
+                    Ok(())
+                },
+            )?;
         }
-        (2, _) => {
-            let string_array = as_generic_string_array::<StringArrayLen>(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
+        Some(fill_array) => {
+            string_array
+                .iter()
+                .zip(length_array.iter())
+                .zip(fill_array.iter())
+                .try_for_each(
+                    |((string, length), fill)| -> Result<(), DataFusionError> {
+                        match (string, length, fill) {
+                            (Some(string), Some(length), Some(fill)) => {
+                                if length > i32::MAX as i64 {
+                                    return exec_err!(
+                                        "rpad requested length {} too large",
+                                        length
+                                    );
+                                }
+                                let length = if length < 0 { 0 } else { length as usize };
+                                let graphemes =
+                                    string.graphemes(true).collect::<Vec<&str>>();
 
-            let result = process_rpad!(string_array, length_array)?;
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        (3, DataType::Utf8View) => {
-            let string_array = as_string_view_array(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
-            match args[2].data_type() {
-                DataType::Utf8View => {
-                    let fill_array = as_string_view_array(&args[2])?;
-                    let result = process_rpad!(string_array, length_array, fill_array)?;
-                    Ok(Arc::new(result) as ArrayRef)
-                }
-                DataType::Utf8 | DataType::LargeUtf8 => {
-                    let fill_array = as_generic_string_array::<FillArrayLen>(&args[2])?;
-                    let result = process_rpad!(string_array, length_array, fill_array)?;
-                    Ok(Arc::new(result) as ArrayRef)
-                }
-                other_type => {
-                    exec_err!("unsupported type for rpad's third operator: {}", other_type)
-                }
-            }
-        }
-        (3, _) => {
-            let string_array = as_generic_string_array::<StringArrayLen>(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
-            match args[2].data_type() {
-                DataType::Utf8View => {
-                    let fill_array = as_string_view_array(&args[2])?;
-                    let result = process_rpad!(string_array, length_array, fill_array)?;
-                    Ok(Arc::new(result) as ArrayRef)
-                }
-                DataType::Utf8 | DataType::LargeUtf8 => {
-                    let fill_array = as_generic_string_array::<FillArrayLen>(&args[2])?;
-                    let result = process_rpad!(string_array, length_array, fill_array)?;
-                    Ok(Arc::new(result) as ArrayRef)
-                }
-                other_type => {
-                    exec_err!("unsupported type for rpad's third operator: {}", other_type)
-                }
-            }
+                                if length < graphemes.len() {
+                                    builder.append_value(graphemes[..length].concat());
+                                } else if fill.is_empty() {
+                                    builder.append_value(string);
+                                } else {
+                                    builder.write_str(string)?;
+                                    fill.chars()
+                                        .cycle()
+                                        .take(length - graphemes.len())
+                                        .for_each(|ch| builder.write_char(ch).unwrap());
+                                    builder.append_value("");
+                                }
+                            }
+                            _ => builder.append_null(),
+                        }
+                        Ok(())
+                    },
+                )?;
         }
-        (other, other_type) => exec_err!(
-            "rpad requires 2 or 3 arguments with corresponding types, but got {}. number of arguments with {}",
-            other, other_type
-        ),
     }
+
+    Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
 #[cfg(test)]

From 9d076bde4fb099329abf33f57e319fd47c523561 Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Wed, 21 Aug 2024 23:23:49 +0800
Subject: [PATCH 354/357] fix: Panic non-integer for the second argument of
 `nth_value` function (#12076)

* fix: Panic non-integer for nth_value function

* chore: Display actual value

* Update datafusion/physical-plan/src/windows/mod.rs

Co-authored-by: Marco Neumann <marco@crepererum.net>

* chore

---------

Co-authored-by: Marco Neumann <marco@crepererum.net>
---
 datafusion/physical-plan/src/windows/mod.rs   |  8 ++++++--
 datafusion/sqllogictest/test_files/window.slt | 13 +++++++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index f938f4410a992..63f4ffcfaacc2 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -30,7 +30,9 @@ use crate::{
 
 use arrow::datatypes::Schema;
 use arrow_schema::{DataType, Field, SchemaRef};
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{
+    exec_datafusion_err, exec_err, DataFusionError, Result, ScalarValue,
+};
 use datafusion_expr::{
     BuiltInWindowFunction, PartitionEvaluator, WindowFrame, WindowFunctionDefinition,
     WindowUDF,
@@ -284,7 +286,9 @@ fn create_built_in_window_expr(
                 args[1]
                     .as_any()
                     .downcast_ref::<Literal>()
-                    .unwrap()
+                    .ok_or_else(|| {
+                        exec_datafusion_err!("Expected a signed integer literal for the second argument of nth_value, got {}", args[1])
+                    })?
                     .value()
                     .clone(),
             )?;
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 78055f8c1c11b..5bf5cf83284f6 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -4861,3 +4861,16 @@ select a, row_number(a) over (order by b) as rn from t;
 
 statement ok
 drop table t;
+
+statement ok
+DROP TABLE t1;
+
+# https://github.com/apache/datafusion/issues/12073
+statement ok
+CREATE TABLE t1(v1 BIGINT);
+
+query error DataFusion error: Execution error: Expected a signed integer literal for the second argument of nth_value, got v1@0
+SELECT NTH_VALUE('+Inf'::Double, v1) OVER (PARTITION BY v1) FROM t1;
+
+statement ok
+DROP TABLE t1;
\ No newline at end of file

From eca71c4cb144795d1c073cbf918e94bd9d0e4102 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 21 Aug 2024 11:24:03 -0400
Subject: [PATCH 355/357] Remove vestigal `datafusion-docs` module compilation
 (#12081)

* Remove vestigal `datafusion-docs` module compilation

* fix build
---
 Cargo.toml                       |  1 -
 docs/Cargo.toml                  | 35 --------------
 docs/src/lib.rs                  | 19 --------
 docs/src/library_logical_plan.rs | 78 --------------------------------
 4 files changed, 133 deletions(-)
 delete mode 100644 docs/Cargo.toml
 delete mode 100644 docs/src/lib.rs
 delete mode 100644 docs/src/library_logical_plan.rs

diff --git a/Cargo.toml b/Cargo.toml
index ae344a46a1bd3..d82443f5d1c8d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -45,7 +45,6 @@ members = [
     "datafusion/substrait",
     "datafusion/wasmtest",
     "datafusion-examples",
-    "docs",
     "test-utils",
     "benchmarks",
 ]
diff --git a/docs/Cargo.toml b/docs/Cargo.toml
deleted file mode 100644
index 14398c8415791..0000000000000
--- a/docs/Cargo.toml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "datafusion-docs-tests"
-description = "DataFusion Documentation Tests"
-publish = false
-version = { workspace = true }
-edition = { workspace = true }
-readme = { workspace = true }
-homepage = { workspace = true }
-repository = { workspace = true }
-license = { workspace = true }
-authors = { workspace = true }
-rust-version = { workspace = true }
-
-[lints]
-workspace = true
-
-[dependencies]
-datafusion = { workspace = true }
diff --git a/docs/src/lib.rs b/docs/src/lib.rs
deleted file mode 100644
index f73132468ec9e..0000000000000
--- a/docs/src/lib.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[cfg(test)]
-mod library_logical_plan;
diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs
deleted file mode 100644
index 3550039415706..0000000000000
--- a/docs/src/library_logical_plan.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion::error::Result;
-use datafusion::logical_expr::builder::LogicalTableSource;
-use datafusion::logical_expr::{Filter, LogicalPlan, LogicalPlanBuilder, TableScan};
-use datafusion::prelude::*;
-use std::sync::Arc;
-
-#[test]
-fn plan_1() -> Result<()> {
-    // create a logical table source
-    let schema = Schema::new(vec![
-        Field::new("id", DataType::Int32, true),
-        Field::new("name", DataType::Utf8, true),
-    ]);
-    let table_source = LogicalTableSource::new(SchemaRef::new(schema));
-
-    // create a TableScan plan
-    let projection = None; // optional projection
-    let filters = vec![]; // optional filters to push down
-    let fetch = None; // optional LIMIT
-    let table_scan = LogicalPlan::TableScan(TableScan::try_new(
-        "person",
-        Arc::new(table_source),
-        projection,
-        filters,
-        fetch,
-    )?);
-
-    // create a Filter plan that evaluates `id > 500` and wraps the TableScan
-    let filter_expr = col("id").gt(lit(500));
-    let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?);
-
-    // print the plan
-    println!("{}", plan.display_indent_schema());
-
-    Ok(())
-}
-
-#[test]
-fn plan_builder_1() -> Result<()> {
-    // create a logical table source
-    let schema = Schema::new(vec![
-        Field::new("id", DataType::Int32, true),
-        Field::new("name", DataType::Utf8, true),
-    ]);
-    let table_source = LogicalTableSource::new(SchemaRef::new(schema));
-
-    // optional projection
-    let projection = None;
-
-    // create a LogicalPlanBuilder for a table scan
-    let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?;
-
-    // perform a filter that evaluates `id > 500`, and build the plan
-    let plan = builder.filter(col("id").gt(lit(500)))?.build()?;
-
-    // print the plan
-    println!("{}", plan.display_indent_schema());
-
-    Ok(())
-}

From ad583a8dfa1cdf269c7a52eb4cb030d65d370a4c Mon Sep 17 00:00:00 2001
From: HuSen <husen.xjtu@gmail.com>
Date: Thu, 22 Aug 2024 01:03:09 +0800
Subject: [PATCH 356/357] Add test to verify count aggregate function should
 not be nullable (#12100)

---
 datafusion/sqllogictest/test_files/aggregate.slt | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index b8b93b28aff61..d39bf6538ecbc 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -5387,6 +5387,18 @@ physical_plan
 statement ok
 DROP TABLE empty;
 
+# verify count aggregate function should not be nullable
+statement ok
+create table empty;
+
+query I
+select distinct count() from empty;
+----
+0
+
+statement ok
+DROP TABLE empty;
+
 statement ok
 CREATE TABLE t(col0 INTEGER) as VALUES(2);
 

From 121f330a6ccca008da4bf6ffc4efa4ffbf961fd7 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 21 Aug 2024 13:12:37 -0400
Subject: [PATCH 357/357] Minor: Extract `BatchCoalescer` to its own module
 (#12047)

---
 datafusion/physical-plan/src/coalesce/mod.rs  | 588 ++++++++++++++++++
 .../physical-plan/src/coalesce_batches.rs     | 546 +---------------
 datafusion/physical-plan/src/lib.rs           |   1 +
 3 files changed, 593 insertions(+), 542 deletions(-)
 create mode 100644 datafusion/physical-plan/src/coalesce/mod.rs

diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs
new file mode 100644
index 0000000000000..5befa5ecda99b
--- /dev/null
+++ b/datafusion/physical-plan/src/coalesce/mod.rs
@@ -0,0 +1,588 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::compute::concat_batches;
+use arrow_array::builder::StringViewBuilder;
+use arrow_array::cast::AsArray;
+use arrow_array::{Array, ArrayRef, RecordBatch};
+use arrow_schema::SchemaRef;
+use std::sync::Arc;
+
+/// Concatenate multiple [`RecordBatch`]es
+///
+/// `BatchCoalescer` concatenates multiple small [`RecordBatch`]es, produced by
+/// operations such as `FilterExec` and `RepartitionExec`, into larger ones for
+/// more efficient processing by subsequent operations.
+///
+/// # Background
+///
+/// Generally speaking, larger [`RecordBatch`]es are more efficient to process
+/// than smaller record batches (until the CPU cache is exceeded) because there
+/// is fixed processing overhead per batch. DataFusion tries to operate on
+/// batches of `target_batch_size` rows to amortize this overhead
+///
+/// ```text
+/// ┌────────────────────┐
+/// │    RecordBatch     │
+/// │   num_rows = 23    │
+/// └────────────────────┘                 ┌────────────────────┐
+///                                        │                    │
+/// ┌────────────────────┐     Coalesce    │                    │
+/// │                    │      Batches    │                    │
+/// │    RecordBatch     │                 │                    │
+/// │   num_rows = 50    │  ─ ─ ─ ─ ─ ─ ▶  │                    │
+/// │                    │                 │    RecordBatch     │
+/// │                    │                 │   num_rows = 106   │
+/// └────────────────────┘                 │                    │
+///                                        │                    │
+/// ┌────────────────────┐                 │                    │
+/// │                    │                 │                    │
+/// │    RecordBatch     │                 │                    │
+/// │   num_rows = 33    │                 └────────────────────┘
+/// │                    │
+/// └────────────────────┘
+/// ```
+///
+/// # Notes:
+///
+/// 1. Output rows are produced in the same order as the input rows
+///
+/// 2. The output is a sequence of batches, with all but the last being at least
+///    `target_batch_size` rows.
+///
+/// 3. Eventually this may also be able to handle other optimizations such as a
+///    combined filter/coalesce operation.
+///
+#[derive(Debug)]
+pub struct BatchCoalescer {
+    /// The input schema
+    schema: SchemaRef,
+    /// Minimum number of rows for coalesces batches
+    target_batch_size: usize,
+    /// Total number of rows returned so far
+    total_rows: usize,
+    /// Buffered batches
+    buffer: Vec<RecordBatch>,
+    /// Buffered row count
+    buffered_rows: usize,
+    /// Limit: maximum number of rows to fetch, `None` means fetch all rows
+    fetch: Option<usize>,
+}
+
+impl BatchCoalescer {
+    /// Create a new `BatchCoalescer`
+    ///
+    /// # Arguments
+    /// - `schema` - the schema of the output batches
+    /// - `target_batch_size` - the minimum number of rows for each
+    ///    output batch (until limit reached)
+    /// - `fetch` - the maximum number of rows to fetch, `None` means fetch all rows
+    pub fn new(
+        schema: SchemaRef,
+        target_batch_size: usize,
+        fetch: Option<usize>,
+    ) -> Self {
+        Self {
+            schema,
+            target_batch_size,
+            total_rows: 0,
+            buffer: vec![],
+            buffered_rows: 0,
+            fetch,
+        }
+    }
+
+    /// Return the schema of the output batches
+    pub fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+
+    /// Push next batch, and returns [`CoalescerState`] indicating the current
+    /// state of the buffer.
+    pub fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState {
+        let batch = gc_string_view_batch(&batch);
+        if self.limit_reached(&batch) {
+            CoalescerState::LimitReached
+        } else if self.target_reached(batch) {
+            CoalescerState::TargetReached
+        } else {
+            CoalescerState::Continue
+        }
+    }
+
+    /// Return true if the there is no data buffered
+    pub fn is_empty(&self) -> bool {
+        self.buffer.is_empty()
+    }
+
+    /// Checks if the buffer will reach the specified limit after getting
+    /// `batch`.
+    ///
+    /// If fetch would be exceeded, slices the received batch, updates the
+    /// buffer with it, and returns `true`.
+    ///
+    /// Otherwise: does nothing and returns `false`.
+    fn limit_reached(&mut self, batch: &RecordBatch) -> bool {
+        match self.fetch {
+            Some(fetch) if self.total_rows + batch.num_rows() >= fetch => {
+                // Limit is reached
+                let remaining_rows = fetch - self.total_rows;
+                debug_assert!(remaining_rows > 0);
+
+                let batch = batch.slice(0, remaining_rows);
+                self.buffered_rows += batch.num_rows();
+                self.total_rows = fetch;
+                self.buffer.push(batch);
+                true
+            }
+            _ => false,
+        }
+    }
+
+    /// Updates the buffer with the given batch.
+    ///
+    /// If the target batch size is reached, returns `true`. Otherwise, returns
+    /// `false`.
+    fn target_reached(&mut self, batch: RecordBatch) -> bool {
+        if batch.num_rows() == 0 {
+            false
+        } else {
+            self.total_rows += batch.num_rows();
+            self.buffered_rows += batch.num_rows();
+            self.buffer.push(batch);
+            self.buffered_rows >= self.target_batch_size
+        }
+    }
+
+    /// Concatenates and returns all buffered batches, and clears the buffer.
+    pub fn finish_batch(&mut self) -> datafusion_common::Result<RecordBatch> {
+        let batch = concat_batches(&self.schema, &self.buffer)?;
+        self.buffer.clear();
+        self.buffered_rows = 0;
+        Ok(batch)
+    }
+}
+
+/// Indicates the state of the [`BatchCoalescer`] buffer after the
+/// [`BatchCoalescer::push_batch()`] operation.
+///
+/// The caller should take diferent actions, depending on the variant returned.
+pub enum CoalescerState {
+    /// Neither the limit nor the target batch size is reached.
+    ///
+    /// Action: continue pushing batches.
+    Continue,
+    /// The limit has been reached.
+    ///
+    /// Action: call [`BatchCoalescer::finish_batch()`] to get the final
+    /// buffered results as a batch and finish the query.
+    LimitReached,
+    /// The specified minimum number of rows a batch should have is reached.
+    ///
+    /// Action: call [`BatchCoalescer::finish_batch()`] to get the current
+    /// buffered results as a batch and then continue pushing batches.
+    TargetReached,
+}
+
+/// Heuristically compact `StringViewArray`s to reduce memory usage, if needed
+///
+/// Decides when to consolidate the StringView into a new buffer to reduce
+/// memory usage and improve string locality for better performance.
+///
+/// This differs from `StringViewArray::gc` because:
+/// 1. It may not compact the array depending on a heuristic.
+/// 2. It uses a precise block size to reduce the number of buffers to track.
+///
+/// # Heuristic
+///
+/// If the average size of each view is larger than 32 bytes, we compact the array.
+///
+/// `StringViewArray` include pointers to buffer that hold the underlying data.
+/// One of the great benefits of `StringViewArray` is that many operations
+/// (e.g., `filter`) can be done without copying the underlying data.
+///
+/// However, after a while (e.g., after `FilterExec` or `HashJoinExec`) the
+/// `StringViewArray` may only refer to a small portion of the buffer,
+/// significantly increasing memory usage.
+fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
+    let new_columns: Vec<ArrayRef> = batch
+        .columns()
+        .iter()
+        .map(|c| {
+            // Try to re-create the `StringViewArray` to prevent holding the underlying buffer too long.
+            let Some(s) = c.as_string_view_opt() else {
+                return Arc::clone(c);
+            };
+            let ideal_buffer_size: usize = s
+                .views()
+                .iter()
+                .map(|v| {
+                    let len = (*v as u32) as usize;
+                    if len > 12 {
+                        len
+                    } else {
+                        0
+                    }
+                })
+                .sum();
+            let actual_buffer_size = s.get_buffer_memory_size();
+
+            // Re-creating the array copies data and can be time consuming.
+            // We only do it if the array is sparse
+            if actual_buffer_size > (ideal_buffer_size * 2) {
+                // We set the block size to `ideal_buffer_size` so that the new StringViewArray only has one buffer, which accelerate later concat_batches.
+                // See https://github.com/apache/arrow-rs/issues/6094 for more details.
+                let mut builder = StringViewBuilder::with_capacity(s.len());
+                if ideal_buffer_size > 0 {
+                    builder = builder.with_block_size(ideal_buffer_size as u32);
+                }
+
+                for v in s.iter() {
+                    builder.append_option(v);
+                }
+
+                let gc_string = builder.finish();
+
+                debug_assert!(gc_string.data_buffers().len() <= 1); // buffer count can be 0 if the `ideal_buffer_size` is 0
+
+                Arc::new(gc_string)
+            } else {
+                Arc::clone(c)
+            }
+        })
+        .collect();
+    RecordBatch::try_new(batch.schema(), new_columns)
+        .expect("Failed to re-create the gc'ed record batch")
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use super::*;
+
+    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow_array::builder::ArrayBuilder;
+    use arrow_array::{StringViewArray, UInt32Array};
+
+    #[test]
+    fn test_coalesce() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // expected output is batches of at least 20 rows (except for the final batch)
+            .with_target_batch_size(21)
+            .with_expected_output_sizes(vec![24, 24, 24, 8])
+            .run()
+    }
+
+    #[test]
+    fn test_coalesce_with_fetch_larger_than_input_size() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 100
+            // expected to behave the same as `test_concat_batches`
+            .with_target_batch_size(21)
+            .with_fetch(Some(100))
+            .with_expected_output_sizes(vec![24, 24, 24, 8])
+            .run();
+    }
+
+    #[test]
+    fn test_coalesce_with_fetch_less_than_input_size() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 50
+            .with_target_batch_size(21)
+            .with_fetch(Some(50))
+            .with_expected_output_sizes(vec![24, 24, 2])
+            .run();
+    }
+
+    #[test]
+    fn test_coalesce_with_fetch_less_than_target_and_no_remaining_rows() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 48
+            .with_target_batch_size(21)
+            .with_fetch(Some(48))
+            .with_expected_output_sizes(vec![24, 24])
+            .run();
+    }
+
+    #[test]
+    fn test_coalesce_with_fetch_less_target_batch_size() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 10
+            .with_target_batch_size(21)
+            .with_fetch(Some(10))
+            .with_expected_output_sizes(vec![10])
+            .run();
+    }
+
+    #[test]
+    fn test_coalesce_single_large_batch_over_fetch() {
+        let large_batch = uint32_batch(0..100);
+        Test::new()
+            .with_batch(large_batch)
+            .with_target_batch_size(20)
+            .with_fetch(Some(7))
+            .with_expected_output_sizes(vec![7])
+            .run()
+    }
+
+    /// Test for [`BatchCoalescer`]
+    ///
+    /// Pushes the input batches to the coalescer and verifies that the resulting
+    /// batches have the expected number of rows and contents.
+    #[derive(Debug, Clone, Default)]
+    struct Test {
+        /// Batches to feed to the coalescer. Tests must have at least one
+        /// schema
+        input_batches: Vec<RecordBatch>,
+        /// Expected output sizes of the resulting batches
+        expected_output_sizes: Vec<usize>,
+        /// target batch size
+        target_batch_size: usize,
+        /// Fetch (limit)
+        fetch: Option<usize>,
+    }
+
+    impl Test {
+        fn new() -> Self {
+            Self::default()
+        }
+
+        /// Set the target batch size
+        fn with_target_batch_size(mut self, target_batch_size: usize) -> Self {
+            self.target_batch_size = target_batch_size;
+            self
+        }
+
+        /// Set the fetch (limit)
+        fn with_fetch(mut self, fetch: Option<usize>) -> Self {
+            self.fetch = fetch;
+            self
+        }
+
+        /// Extend the input batches with `batch`
+        fn with_batch(mut self, batch: RecordBatch) -> Self {
+            self.input_batches.push(batch);
+            self
+        }
+
+        /// Extends the input batches with `batches`
+        fn with_batches(
+            mut self,
+            batches: impl IntoIterator<Item = RecordBatch>,
+        ) -> Self {
+            self.input_batches.extend(batches);
+            self
+        }
+
+        /// Extends `sizes` to expected output sizes
+        fn with_expected_output_sizes(
+            mut self,
+            sizes: impl IntoIterator<Item = usize>,
+        ) -> Self {
+            self.expected_output_sizes.extend(sizes);
+            self
+        }
+
+        /// Runs the test -- see documentation on [`Test`] for details
+        fn run(self) {
+            let Self {
+                input_batches,
+                target_batch_size,
+                fetch,
+                expected_output_sizes,
+            } = self;
+
+            let schema = input_batches[0].schema();
+
+            // create a single large input batch for output comparison
+            let single_input_batch = concat_batches(&schema, &input_batches).unwrap();
+
+            let mut coalescer =
+                BatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch);
+
+            let mut output_batches = vec![];
+            for batch in input_batches {
+                match coalescer.push_batch(batch) {
+                    CoalescerState::Continue => {}
+                    CoalescerState::LimitReached => {
+                        output_batches.push(coalescer.finish_batch().unwrap());
+                        break;
+                    }
+                    CoalescerState::TargetReached => {
+                        coalescer.buffered_rows = 0;
+                        output_batches.push(coalescer.finish_batch().unwrap());
+                    }
+                }
+            }
+            if coalescer.buffered_rows != 0 {
+                output_batches.extend(coalescer.buffer);
+            }
+
+            // make sure we got the expected number of output batches and content
+            let mut starting_idx = 0;
+            assert_eq!(expected_output_sizes.len(), output_batches.len());
+            for (i, (expected_size, batch)) in
+                expected_output_sizes.iter().zip(output_batches).enumerate()
+            {
+                assert_eq!(
+                    *expected_size,
+                    batch.num_rows(),
+                    "Unexpected number of rows in Batch {i}"
+                );
+
+                // compare the contents of the batch (using `==` compares the
+                // underlying memory layout too)
+                let expected_batch =
+                    single_input_batch.slice(starting_idx, *expected_size);
+                let batch_strings = batch_to_pretty_strings(&batch);
+                let expected_batch_strings = batch_to_pretty_strings(&expected_batch);
+                let batch_strings = batch_strings.lines().collect::<Vec<_>>();
+                let expected_batch_strings =
+                    expected_batch_strings.lines().collect::<Vec<_>>();
+                assert_eq!(
+                    expected_batch_strings, batch_strings,
+                    "Unexpected content in Batch {i}:\
+                    \n\nExpected:\n{expected_batch_strings:#?}\n\nActual:\n{batch_strings:#?}"
+                );
+                starting_idx += *expected_size;
+            }
+        }
+    }
+
+    /// Return a batch of  UInt32 with the specified range
+    fn uint32_batch(range: Range<u32>) -> RecordBatch {
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]));
+
+        RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(UInt32Array::from_iter_values(range))],
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn test_gc_string_view_batch_small_no_compact() {
+        // view with only short strings (no buffers) --> no need to compact
+        let array = StringViewTest {
+            rows: 1000,
+            strings: vec![Some("a"), Some("b"), Some("c")],
+        }
+        .build();
+
+        let gc_array = do_gc(array.clone());
+        compare_string_array_values(&array, &gc_array);
+        assert_eq!(array.data_buffers().len(), 0);
+        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
+    }
+
+    #[test]
+    fn test_gc_string_view_batch_large_no_compact() {
+        // view with large strings (has buffers) but full --> no need to compact
+        let array = StringViewTest {
+            rows: 1000,
+            strings: vec![Some("This string is longer than 12 bytes")],
+        }
+        .build();
+
+        let gc_array = do_gc(array.clone());
+        compare_string_array_values(&array, &gc_array);
+        assert_eq!(array.data_buffers().len(), 5);
+        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
+    }
+
+    #[test]
+    fn test_gc_string_view_batch_large_slice_compact() {
+        // view with large strings (has buffers) and only partially used  --> no need to compact
+        let array = StringViewTest {
+            rows: 1000,
+            strings: vec![Some("this string is longer than 12 bytes")],
+        }
+        .build();
+
+        // slice only 11 rows, so most of the buffer is not used
+        let array = array.slice(11, 22);
+
+        let gc_array = do_gc(array.clone());
+        compare_string_array_values(&array, &gc_array);
+        assert_eq!(array.data_buffers().len(), 5);
+        assert_eq!(gc_array.data_buffers().len(), 1); // compacted into a single buffer
+    }
+
+    /// Compares the values of two string view arrays
+    fn compare_string_array_values(arr1: &StringViewArray, arr2: &StringViewArray) {
+        assert_eq!(arr1.len(), arr2.len());
+        for (s1, s2) in arr1.iter().zip(arr2.iter()) {
+            assert_eq!(s1, s2);
+        }
+    }
+
+    /// runs garbage collection on string view array
+    /// and ensures the number of rows are the same
+    fn do_gc(array: StringViewArray) -> StringViewArray {
+        let batch =
+            RecordBatch::try_from_iter(vec![("a", Arc::new(array) as ArrayRef)]).unwrap();
+        let gc_batch = gc_string_view_batch(&batch);
+        assert_eq!(batch.num_rows(), gc_batch.num_rows());
+        assert_eq!(batch.schema(), gc_batch.schema());
+        gc_batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<StringViewArray>()
+            .unwrap()
+            .clone()
+    }
+
+    /// Describes parameters for creating a `StringViewArray`
+    struct StringViewTest {
+        /// The number of rows in the array
+        rows: usize,
+        /// The strings to use in the array (repeated over and over
+        strings: Vec<Option<&'static str>>,
+    }
+
+    impl StringViewTest {
+        /// Create a `StringViewArray` with the parameters specified in this struct
+        fn build(self) -> StringViewArray {
+            let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192);
+            loop {
+                for &v in self.strings.iter() {
+                    builder.append_option(v);
+                    if builder.len() >= self.rows {
+                        return builder.finish();
+                    }
+                }
+            }
+        }
+    }
+    fn batch_to_pretty_strings(batch: &RecordBatch) -> String {
+        arrow::util::pretty::pretty_format_batches(&[batch.clone()])
+            .unwrap()
+            .to_string()
+    }
+}
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 5589027694fe4..7caf5b8ab65a3 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -28,19 +28,17 @@ use crate::{
     DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
 };
 
-use arrow::array::{AsArray, StringViewBuilder};
-use arrow::compute::concat_batches;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use arrow_array::{Array, ArrayRef};
 use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 
+use crate::coalesce::{BatchCoalescer, CoalescerState};
 use futures::ready;
 use futures::stream::{Stream, StreamExt};
 
 /// `CoalesceBatchesExec` combines small batches into larger batches for more
-/// efficient use of vectorized processing by later operators.
+/// efficient vectorized processing by later operators.
 ///
 /// The operator buffers batches until it collects `target_batch_size` rows and
 /// then emits a single concatenated batch. When only a limited number of rows
@@ -48,35 +46,7 @@ use futures::stream::{Stream, StreamExt};
 /// buffering and returns the final batch once the number of collected rows
 /// reaches the `fetch` value.
 ///
-/// # Background
-///
-/// Generally speaking, larger RecordBatches are more efficient to process than
-/// smaller record batches (until the CPU cache is exceeded) because there is
-/// fixed processing overhead per batch. This code concatenates multiple small
-/// record batches into larger ones to amortize this overhead.
-///
-/// ```text
-/// ┌────────────────────┐
-/// │    RecordBatch     │
-/// │   num_rows = 23    │
-/// └────────────────────┘                 ┌────────────────────┐
-///                                        │                    │
-/// ┌────────────────────┐     Coalesce    │                    │
-/// │                    │      Batches    │                    │
-/// │    RecordBatch     │                 │                    │
-/// │   num_rows = 50    │  ─ ─ ─ ─ ─ ─ ▶  │                    │
-/// │                    │                 │    RecordBatch     │
-/// │                    │                 │   num_rows = 106   │
-/// └────────────────────┘                 │                    │
-///                                        │                    │
-/// ┌────────────────────┐                 │                    │
-/// │                    │                 │                    │
-/// │    RecordBatch     │                 │                    │
-/// │   num_rows = 33    │                 └────────────────────┘
-/// │                    │
-/// └────────────────────┘
-/// ```
-
+/// See [`BatchCoalescer`] for more information
 #[derive(Debug)]
 pub struct CoalesceBatchesExec {
     /// The input plan
@@ -346,7 +316,7 @@ impl CoalesceBatchesStream {
                 }
                 CoalesceBatchesStreamState::Exhausted => {
                     // Handle the end of the input stream.
-                    return if self.coalescer.buffer.is_empty() {
+                    return if self.coalescer.is_empty() {
                         // If buffer is empty, return None indicating the stream is fully consumed.
                         Poll::Ready(None)
                     } else {
@@ -365,511 +335,3 @@ impl RecordBatchStream for CoalesceBatchesStream {
         self.coalescer.schema()
     }
 }
-
-/// Concatenate multiple record batches into larger batches
-///
-/// See [`CoalesceBatchesExec`] for more details.
-///
-/// Notes:
-///
-/// 1. The output rows is the same order as the input rows
-///
-/// 2. The output is a sequence of batches, with all but the last being at least
-///    `target_batch_size` rows.
-///
-/// 3. Eventually this may also be able to handle other optimizations such as a
-///    combined filter/coalesce operation.
-#[derive(Debug)]
-struct BatchCoalescer {
-    /// The input schema
-    schema: SchemaRef,
-    /// Minimum number of rows for coalesces batches
-    target_batch_size: usize,
-    /// Total number of rows returned so far
-    total_rows: usize,
-    /// Buffered batches
-    buffer: Vec<RecordBatch>,
-    /// Buffered row count
-    buffered_rows: usize,
-    /// Maximum number of rows to fetch, `None` means fetching all rows
-    fetch: Option<usize>,
-}
-
-impl BatchCoalescer {
-    /// Create a new `BatchCoalescer`
-    ///
-    /// # Arguments
-    /// - `schema` - the schema of the output batches
-    /// - `target_batch_size` - the minimum number of rows for each
-    ///    output batch (until limit reached)
-    /// - `fetch` - the maximum number of rows to fetch, `None` means fetch all rows
-    fn new(schema: SchemaRef, target_batch_size: usize, fetch: Option<usize>) -> Self {
-        Self {
-            schema,
-            target_batch_size,
-            total_rows: 0,
-            buffer: vec![],
-            buffered_rows: 0,
-            fetch,
-        }
-    }
-
-    /// Return the schema of the output batches
-    fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.schema)
-    }
-
-    /// Given a batch, it updates the buffer of [`BatchCoalescer`]. It returns
-    /// a variant of [`CoalescerState`] indicating the final state of the buffer.
-    fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState {
-        let batch = gc_string_view_batch(&batch);
-        if self.limit_reached(&batch) {
-            CoalescerState::LimitReached
-        } else if self.target_reached(batch) {
-            CoalescerState::TargetReached
-        } else {
-            CoalescerState::Continue
-        }
-    }
-
-    /// The function checks if the buffer can reach the specified limit after getting `batch`.
-    /// If it does, it slices the received batch as needed, updates the buffer with it, and
-    /// finally returns `true`. Otherwise; the function does nothing and returns `false`.
-    fn limit_reached(&mut self, batch: &RecordBatch) -> bool {
-        match self.fetch {
-            Some(fetch) if self.total_rows + batch.num_rows() >= fetch => {
-                // Limit is reached
-                let remaining_rows = fetch - self.total_rows;
-                debug_assert!(remaining_rows > 0);
-
-                let batch = batch.slice(0, remaining_rows);
-                self.buffered_rows += batch.num_rows();
-                self.total_rows = fetch;
-                self.buffer.push(batch);
-                true
-            }
-            _ => false,
-        }
-    }
-
-    /// Updates the buffer with the given batch. If the target batch size is reached,
-    /// the function returns `true`. Otherwise, it returns `false`.
-    fn target_reached(&mut self, batch: RecordBatch) -> bool {
-        if batch.num_rows() == 0 {
-            false
-        } else {
-            self.total_rows += batch.num_rows();
-            self.buffered_rows += batch.num_rows();
-            self.buffer.push(batch);
-            self.buffered_rows >= self.target_batch_size
-        }
-    }
-
-    /// Concatenates and returns all buffered batches, and clears the buffer.
-    fn finish_batch(&mut self) -> Result<RecordBatch> {
-        let batch = concat_batches(&self.schema, &self.buffer)?;
-        self.buffer.clear();
-        self.buffered_rows = 0;
-        Ok(batch)
-    }
-}
-
-/// This enumeration acts as a status indicator for the [`BatchCoalescer`] after a
-/// [`BatchCoalescer::push_batch()`] operation.
-enum CoalescerState {
-    /// Neither the limit nor the target batch size is reached.
-    Continue,
-    /// The sufficient row count to produce a complete query result is reached.
-    LimitReached,
-    /// The specified minimum number of rows a batch should have is reached.
-    TargetReached,
-}
-
-/// Heuristically compact `StringViewArray`s to reduce memory usage, if needed
-///
-/// This function decides when to consolidate the StringView into a new buffer
-/// to reduce memory usage and improve string locality for better performance.
-///
-/// This differs from `StringViewArray::gc` because:
-/// 1. It may not compact the array depending on a heuristic.
-/// 2. It uses a precise block size to reduce the number of buffers to track.
-///
-/// # Heuristic
-///
-/// If the average size of each view is larger than 32 bytes, we compact the array.
-///
-/// `StringViewArray` include pointers to buffer that hold the underlying data.
-/// One of the great benefits of `StringViewArray` is that many operations
-/// (e.g., `filter`) can be done without copying the underlying data.
-///
-/// However, after a while (e.g., after `FilterExec` or `HashJoinExec`) the
-/// `StringViewArray` may only refer to a small portion of the buffer,
-/// significantly increasing memory usage.
-fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
-    let new_columns: Vec<ArrayRef> = batch
-        .columns()
-        .iter()
-        .map(|c| {
-            // Try to re-create the `StringViewArray` to prevent holding the underlying buffer too long.
-            let Some(s) = c.as_string_view_opt() else {
-                return Arc::clone(c);
-            };
-            let ideal_buffer_size: usize = s
-                .views()
-                .iter()
-                .map(|v| {
-                    let len = (*v as u32) as usize;
-                    if len > 12 {
-                        len
-                    } else {
-                        0
-                    }
-                })
-                .sum();
-            let actual_buffer_size = s.get_buffer_memory_size();
-
-            // Re-creating the array copies data and can be time consuming.
-            // We only do it if the array is sparse
-            if actual_buffer_size > (ideal_buffer_size * 2) {
-                // We set the block size to `ideal_buffer_size` so that the new StringViewArray only has one buffer, which accelerate later concat_batches.
-                // See https://github.com/apache/arrow-rs/issues/6094 for more details.
-                let mut builder = StringViewBuilder::with_capacity(s.len());
-                if ideal_buffer_size > 0 {
-                    builder = builder.with_block_size(ideal_buffer_size as u32);
-                }
-
-                for v in s.iter() {
-                    builder.append_option(v);
-                }
-
-                let gc_string = builder.finish();
-
-                debug_assert!(gc_string.data_buffers().len() <= 1); // buffer count can be 0 if the `ideal_buffer_size` is 0
-
-                Arc::new(gc_string)
-            } else {
-                Arc::clone(c)
-            }
-        })
-        .collect();
-    RecordBatch::try_new(batch.schema(), new_columns)
-        .expect("Failed to re-create the gc'ed record batch")
-}
-
-#[cfg(test)]
-mod tests {
-    use std::ops::Range;
-
-    use super::*;
-
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow_array::builder::ArrayBuilder;
-    use arrow_array::{StringViewArray, UInt32Array};
-
-    #[test]
-    fn test_coalesce() {
-        let batch = uint32_batch(0..8);
-        Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
-            // expected output is batches of at least 20 rows (except for the final batch)
-            .with_target_batch_size(21)
-            .with_expected_output_sizes(vec![24, 24, 24, 8])
-            .run()
-    }
-
-    #[test]
-    fn test_coalesce_with_fetch_larger_than_input_size() {
-        let batch = uint32_batch(0..8);
-        Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
-            // input is 10 batches x 8 rows (80 rows) with fetch limit of 100
-            // expected to behave the same as `test_concat_batches`
-            .with_target_batch_size(21)
-            .with_fetch(Some(100))
-            .with_expected_output_sizes(vec![24, 24, 24, 8])
-            .run();
-    }
-
-    #[test]
-    fn test_coalesce_with_fetch_less_than_input_size() {
-        let batch = uint32_batch(0..8);
-        Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
-            // input is 10 batches x 8 rows (80 rows) with fetch limit of 50
-            .with_target_batch_size(21)
-            .with_fetch(Some(50))
-            .with_expected_output_sizes(vec![24, 24, 2])
-            .run();
-    }
-
-    #[test]
-    fn test_coalesce_with_fetch_less_than_target_and_no_remaining_rows() {
-        let batch = uint32_batch(0..8);
-        Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
-            // input is 10 batches x 8 rows (80 rows) with fetch limit of 48
-            .with_target_batch_size(21)
-            .with_fetch(Some(48))
-            .with_expected_output_sizes(vec![24, 24])
-            .run();
-    }
-
-    #[test]
-    fn test_coalesce_with_fetch_less_target_batch_size() {
-        let batch = uint32_batch(0..8);
-        Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
-            // input is 10 batches x 8 rows (80 rows) with fetch limit of 10
-            .with_target_batch_size(21)
-            .with_fetch(Some(10))
-            .with_expected_output_sizes(vec![10])
-            .run();
-    }
-
-    #[test]
-    fn test_coalesce_single_large_batch_over_fetch() {
-        let large_batch = uint32_batch(0..100);
-        Test::new()
-            .with_batch(large_batch)
-            .with_target_batch_size(20)
-            .with_fetch(Some(7))
-            .with_expected_output_sizes(vec![7])
-            .run()
-    }
-
-    /// Test for [`BatchCoalescer`]
-    ///
-    /// Pushes the input batches to the coalescer and verifies that the resulting
-    /// batches have the expected number of rows and contents.
-    #[derive(Debug, Clone, Default)]
-    struct Test {
-        /// Batches to feed to the coalescer. Tests must have at least one
-        /// schema
-        input_batches: Vec<RecordBatch>,
-        /// Expected output sizes of the resulting batches
-        expected_output_sizes: Vec<usize>,
-        /// target batch size
-        target_batch_size: usize,
-        /// Fetch (limit)
-        fetch: Option<usize>,
-    }
-
-    impl Test {
-        fn new() -> Self {
-            Self::default()
-        }
-
-        /// Set the target batch size
-        fn with_target_batch_size(mut self, target_batch_size: usize) -> Self {
-            self.target_batch_size = target_batch_size;
-            self
-        }
-
-        /// Set the fetch (limit)
-        fn with_fetch(mut self, fetch: Option<usize>) -> Self {
-            self.fetch = fetch;
-            self
-        }
-
-        /// Extend the input batches with `batch`
-        fn with_batch(mut self, batch: RecordBatch) -> Self {
-            self.input_batches.push(batch);
-            self
-        }
-
-        /// Extends the input batches with `batches`
-        fn with_batches(
-            mut self,
-            batches: impl IntoIterator<Item = RecordBatch>,
-        ) -> Self {
-            self.input_batches.extend(batches);
-            self
-        }
-
-        /// Extends `sizes` to expected output sizes
-        fn with_expected_output_sizes(
-            mut self,
-            sizes: impl IntoIterator<Item = usize>,
-        ) -> Self {
-            self.expected_output_sizes.extend(sizes);
-            self
-        }
-
-        /// Runs the test -- see documentation on [`Test`] for details
-        fn run(self) {
-            let Self {
-                input_batches,
-                target_batch_size,
-                fetch,
-                expected_output_sizes,
-            } = self;
-
-            let schema = input_batches[0].schema();
-
-            // create a single large input batch for output comparison
-            let single_input_batch = concat_batches(&schema, &input_batches).unwrap();
-
-            let mut coalescer =
-                BatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch);
-
-            let mut output_batches = vec![];
-            for batch in input_batches {
-                match coalescer.push_batch(batch) {
-                    CoalescerState::Continue => {}
-                    CoalescerState::LimitReached => {
-                        output_batches.push(coalescer.finish_batch().unwrap());
-                        break;
-                    }
-                    CoalescerState::TargetReached => {
-                        coalescer.buffered_rows = 0;
-                        output_batches.push(coalescer.finish_batch().unwrap());
-                    }
-                }
-            }
-            if coalescer.buffered_rows != 0 {
-                output_batches.extend(coalescer.buffer);
-            }
-
-            // make sure we got the expected number of output batches and content
-            let mut starting_idx = 0;
-            assert_eq!(expected_output_sizes.len(), output_batches.len());
-            for (i, (expected_size, batch)) in
-                expected_output_sizes.iter().zip(output_batches).enumerate()
-            {
-                assert_eq!(
-                    *expected_size,
-                    batch.num_rows(),
-                    "Unexpected number of rows in Batch {i}"
-                );
-
-                // compare the contents of the batch (using `==` compares the
-                // underlying memory layout too)
-                let expected_batch =
-                    single_input_batch.slice(starting_idx, *expected_size);
-                let batch_strings = batch_to_pretty_strings(&batch);
-                let expected_batch_strings = batch_to_pretty_strings(&expected_batch);
-                let batch_strings = batch_strings.lines().collect::<Vec<_>>();
-                let expected_batch_strings =
-                    expected_batch_strings.lines().collect::<Vec<_>>();
-                assert_eq!(
-                    expected_batch_strings, batch_strings,
-                    "Unexpected content in Batch {i}:\
-                    \n\nExpected:\n{expected_batch_strings:#?}\n\nActual:\n{batch_strings:#?}"
-                );
-                starting_idx += *expected_size;
-            }
-        }
-    }
-
-    /// Return a batch of  UInt32 with the specified range
-    fn uint32_batch(range: Range<u32>) -> RecordBatch {
-        let schema =
-            Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]));
-
-        RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![Arc::new(UInt32Array::from_iter_values(range))],
-        )
-        .unwrap()
-    }
-
-    #[test]
-    fn test_gc_string_view_batch_small_no_compact() {
-        // view with only short strings (no buffers) --> no need to compact
-        let array = StringViewTest {
-            rows: 1000,
-            strings: vec![Some("a"), Some("b"), Some("c")],
-        }
-        .build();
-
-        let gc_array = do_gc(array.clone());
-        compare_string_array_values(&array, &gc_array);
-        assert_eq!(array.data_buffers().len(), 0);
-        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
-    }
-
-    #[test]
-    fn test_gc_string_view_batch_large_no_compact() {
-        // view with large strings (has buffers) but full --> no need to compact
-        let array = StringViewTest {
-            rows: 1000,
-            strings: vec![Some("This string is longer than 12 bytes")],
-        }
-        .build();
-
-        let gc_array = do_gc(array.clone());
-        compare_string_array_values(&array, &gc_array);
-        assert_eq!(array.data_buffers().len(), 5);
-        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
-    }
-
-    #[test]
-    fn test_gc_string_view_batch_large_slice_compact() {
-        // view with large strings (has buffers) and only partially used  --> no need to compact
-        let array = StringViewTest {
-            rows: 1000,
-            strings: vec![Some("this string is longer than 12 bytes")],
-        }
-        .build();
-
-        // slice only 11 rows, so most of the buffer is not used
-        let array = array.slice(11, 22);
-
-        let gc_array = do_gc(array.clone());
-        compare_string_array_values(&array, &gc_array);
-        assert_eq!(array.data_buffers().len(), 5);
-        assert_eq!(gc_array.data_buffers().len(), 1); // compacted into a single buffer
-    }
-
-    /// Compares the values of two string view arrays
-    fn compare_string_array_values(arr1: &StringViewArray, arr2: &StringViewArray) {
-        assert_eq!(arr1.len(), arr2.len());
-        for (s1, s2) in arr1.iter().zip(arr2.iter()) {
-            assert_eq!(s1, s2);
-        }
-    }
-
-    /// runs garbage collection on string view array
-    /// and ensures the number of rows are the same
-    fn do_gc(array: StringViewArray) -> StringViewArray {
-        let batch =
-            RecordBatch::try_from_iter(vec![("a", Arc::new(array) as ArrayRef)]).unwrap();
-        let gc_batch = gc_string_view_batch(&batch);
-        assert_eq!(batch.num_rows(), gc_batch.num_rows());
-        assert_eq!(batch.schema(), gc_batch.schema());
-        gc_batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringViewArray>()
-            .unwrap()
-            .clone()
-    }
-
-    /// Describes parameters for creating a `StringViewArray`
-    struct StringViewTest {
-        /// The number of rows in the array
-        rows: usize,
-        /// The strings to use in the array (repeated over and over
-        strings: Vec<Option<&'static str>>,
-    }
-
-    impl StringViewTest {
-        /// Create a `StringViewArray` with the parameters specified in this struct
-        fn build(self) -> StringViewArray {
-            let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192);
-            loop {
-                for &v in self.strings.iter() {
-                    builder.append_option(v);
-                    if builder.len() >= self.rows {
-                        return builder.finish();
-                    }
-                }
-            }
-        }
-    }
-    fn batch_to_pretty_strings(batch: &RecordBatch) -> String {
-        arrow::util::pretty::pretty_format_batches(&[batch.clone()])
-            .unwrap()
-            .to_string()
-    }
-}
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 59c5da6b6fb20..fb86a008e2cd6 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -85,5 +85,6 @@ pub mod udaf {
     pub use datafusion_physical_expr_functions_aggregate::aggregate::AggregateFunctionExpr;
 }
 
+pub mod coalesce;
 #[cfg(test)]
 pub mod test;