From bdb3d35450ac31b52ce9aefbf1099238a5c1a33c Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 13 Sep 2024 00:22:21 +0800 Subject: [PATCH] Handle case-sensitive identifier when decorrelating predicate subquery (#12443) * fix replace_qualified_name to handle case-sensitive ident * add tests * fix clippy --- .../src/decorrelate_predicate_subquery.rs | 35 +++++++++++++++++-- datafusion/optimizer/src/utils.rs | 4 +-- .../optimizer/tests/optimizer_integration.rs | 25 +++++++++++++ datafusion/sqllogictest/test_files/join.slt | 31 ++++++++++++++++ 4 files changed, 90 insertions(+), 5 deletions(-) diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs index 9b1ada382b41..5bb57bf05f62 100644 --- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs +++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs @@ -370,8 +370,8 @@ mod tests { use super::*; use crate::test::*; - use arrow::datatypes::DataType; - use datafusion_expr::{and, binary_expr, col, lit, not, or, out_ref_col}; + use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_expr::{and, binary_expr, col, lit, not, or, out_ref_col, table_scan}; fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> { assert_optimized_plan_eq_display_indent( @@ -1909,4 +1909,35 @@ mod tests { assert_optimized_plan_equal(plan, expected) } + + #[test] + fn upper_case_ident() -> Result<()> { + let fields = vec![ + Field::new("A", DataType::UInt32, false), + Field::new("B", DataType::UInt32, false), + ]; + + let schema = Schema::new(fields); + let table_scan_a = table_scan(Some("\"TEST_A\""), &schema, None)?.build()?; + let table_scan_b = table_scan(Some("\"TEST_B\""), &schema, None)?.build()?; + + let subquery = LogicalPlanBuilder::from(table_scan_b) + .filter(col("\"A\"").eq(out_ref_col(DataType::UInt32, "\"TEST_A\".\"A\"")))? + .project(vec![lit(1)])? + .build()?; + + let plan = LogicalPlanBuilder::from(table_scan_a) + .filter(exists(Arc::new(subquery)))? + .project(vec![col("\"TEST_A\".\"B\"")])? + .build()?; + + let expected = "Projection: TEST_A.B [B:UInt32]\ + \n LeftSemi Join: Filter: __correlated_sq_1.A = TEST_A.A [A:UInt32, B:UInt32]\ + \n TableScan: TEST_A [A:UInt32, B:UInt32]\ + \n SubqueryAlias: __correlated_sq_1 [Int32(1):Int32, A:UInt32]\ + \n Projection: Int32(1), TEST_B.A [Int32(1):Int32, A:UInt32]\ + \n TableScan: TEST_B [A:UInt32, B:UInt32]"; + + assert_optimized_plan_equal(plan, expected) + } } diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index b4d292f88313..6972c16c0ddf 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -104,9 +104,7 @@ pub(crate) fn replace_qualified_name( ) -> Result { let alias_cols: Vec = cols .iter() - .map(|col| { - Column::from_qualified_name(format!("{}.{}", subquery_alias, col.name)) - }) + .map(|col| Column::new(Some(subquery_alias), &col.name)) .collect(); let replace_map: HashMap<&Column, &Column> = cols.iter().zip(alias_cols.iter()).collect(); diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index da5e92eafd11..470bd947c7fb 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -361,6 +361,31 @@ fn select_wildcard_with_repeated_column_but_is_aliased() { assert_eq!(expected, format!("{plan}")); } +#[test] +fn select_correlated_predicate_subquery_with_uppercase_ident() { + let sql = r#" + SELECT * + FROM + test + WHERE + EXISTS ( + SELECT 1 + FROM (SELECT col_int32 as "COL_INT32", col_uint32 as "COL_UINT32" FROM test) "T1" + WHERE "T1"."COL_INT32" = test.col_int32 + ) + "#; + let plan = test_sql(sql).unwrap(); + let expected = "LeftSemi Join: test.col_int32 = __correlated_sq_1.COL_INT32\ + \n Filter: test.col_int32 IS NOT NULL\ + \n TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]\ + \n SubqueryAlias: __correlated_sq_1\ + \n SubqueryAlias: T1\ + \n Projection: test.col_int32 AS COL_INT32\ + \n Filter: test.col_int32 IS NOT NULL\ + \n TableScan: test projection=[col_int32]"; + assert_eq!(expected, format!("{plan}")); +} + fn test_sql(sql: &str) -> Result { // parse the SQL let dialect = GenericDialect {}; // or AnsiDialect, or your own dialect ... diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt index 21fea4ad1025..3e7a08981eac 100644 --- a/datafusion/sqllogictest/test_files/join.slt +++ b/datafusion/sqllogictest/test_files/join.slt @@ -1178,3 +1178,34 @@ drop table t1; statement ok drop table t0; + +# Test decorrelate query with the uppercase table name and column name +statement ok +create table "T1"("C1" int, "C2" int); + +statement ok +create table "T2"("C1" int, "C3" int); + +statement ok +select "C1" from "T1" where not exists (select 1 from "T2" where "T1"."C1" = "T2"."C1") + +statement ok +create table t1(c1 int, c2 int); + +statement ok +create table t2(c1 int, c3 int); + +statement ok +select "C1" from (select c1 as "C1", c2 as "C2" from t1) as "T1" where not exists (select 1 from (select c1 as "C1", c3 as "C3" from t2) as "T2" where "T1"."C1" = "T2"."C1") + +statement ok +drop table "T1"; + +statement ok +drop table "T2"; + +statement ok +drop table t1; + +statement ok +drop table t2;