From 0a2f09ffc01d10083fd27f2c33e33730410fdcef Mon Sep 17 00:00:00 2001 From: Sergey Zhukov <62326549+cj-zhukov@users.noreply.github.com> Date: Tue, 14 Jan 2025 19:33:24 +0300 Subject: [PATCH] Add a hint about normalization in error message (#14089) (#14113) * Add a hint about normalization in error message (#14089) * normalization suggestion is only shown when a column name matches schema --------- Co-authored-by: Sergey Zhukov --- datafusion/common/src/column.rs | 3 ++- datafusion/common/src/dfschema.rs | 23 +++++++++------- datafusion/common/src/error.rs | 12 +++++++++ datafusion/expr/src/expr_rewriter/mod.rs | 7 +++-- datafusion/sql/tests/sql_integration.rs | 27 +++++++++---------- .../sqllogictest/test_files/identifiers.slt | 8 +++--- datafusion/sqllogictest/test_files/select.slt | 4 +-- 7 files changed, 48 insertions(+), 36 deletions(-) diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index 4f25260d5e9c..fdde3d69eddb 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -368,7 +368,8 @@ mod tests { &[], ) .expect_err("should've failed to find field"); - let expected = r#"Schema error: No field named z. Valid fields are t1.a, t1.b, t2.c, t2.d, t3.a, t3.b, t3.c, t3.d, t3.e."#; + let expected = "Schema error: No field named z. \ + Valid fields are t1.a, t1.b, t2.c, t2.d, t3.a, t3.b, t3.c, t3.d, t3.e."; assert_eq!(err.strip_backtrace(), expected); // ambiguous column reference diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index ac4d8be8045f..302d515e027e 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -1068,10 +1068,12 @@ mod tests { let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?; // lookup with unqualified name "t1.c0" let err = schema.index_of_column(&col).unwrap_err(); - assert_eq!( - err.strip_backtrace(), - "Schema error: No field named \"t1.c0\". Valid fields are t1.c0, t1.c1." - ); + let expected = "Schema error: No field named \"t1.c0\". \ + Column names are case sensitive. \ + You can use double quotes to refer to the \"\"t1.c0\"\" column \ + or set the datafusion.sql_parser.enable_ident_normalization configuration. \ + Valid fields are t1.c0, t1.c1."; + assert_eq!(err.strip_backtrace(), expected); Ok(()) } @@ -1088,10 +1090,9 @@ mod tests { // lookup with unqualified name "t1.c0" let err = schema.index_of_column(&col).unwrap_err(); - assert_eq!( - err.strip_backtrace(), - "Schema error: No field named \"t1.c0\". Valid fields are t1.\"CapitalColumn\", t1.\"field.with.period\"." - ); + let expected = "Schema error: No field named \"t1.c0\". \ + Valid fields are t1.\"CapitalColumn\", t1.\"field.with.period\"."; + assert_eq!(err.strip_backtrace(), expected); Ok(()) } @@ -1261,12 +1262,14 @@ mod tests { let col = Column::from_qualified_name("t1.c0"); let err = schema.index_of_column(&col).unwrap_err(); - assert_eq!(err.strip_backtrace(), "Schema error: No field named t1.c0."); + let expected = "Schema error: No field named t1.c0."; + assert_eq!(err.strip_backtrace(), expected); // the same check without qualifier let col = Column::from_name("c0"); let err = schema.index_of_column(&col).err().unwrap(); - assert_eq!(err.strip_backtrace(), "Schema error: No field named c0."); + let expected = "Schema error: No field named c0."; + assert_eq!(err.strip_backtrace(), expected); } #[test] diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index 1012c4cd2270..f7c247aaf288 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -167,6 +167,18 @@ impl Display for SchemaError { valid_fields, } => { write!(f, "No field named {}", field.quoted_flat_name())?; + let lower_valid_fields = valid_fields + .iter() + .map(|column| column.flat_name().to_lowercase()) + .collect::>(); + if lower_valid_fields.contains(&field.flat_name().to_lowercase()) { + write!( + f, + ". Column names are case sensitive. You can use double quotes to refer to the \"{}\" column \ + or set the datafusion.sql_parser.enable_ident_normalization configuration", + field.quoted_flat_name() + )?; + } if !valid_fields.is_empty() { write!( f, diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index b944428977c4..335d5587bee7 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -462,10 +462,9 @@ mod test { normalize_col_with_schemas_and_ambiguity_check(expr, &[&schemas], &[]) .unwrap_err() .strip_backtrace(); - assert_eq!( - error, - r#"Schema error: No field named b. Valid fields are "tableA".a."# - ); + let expected = "Schema error: No field named b. \ + Valid fields are \"tableA\".a."; + assert_eq!(error, expected); } #[test] diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index b93060988d20..5edafdd4053c 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -492,7 +492,8 @@ Dml: op=[Insert Into] table=[test_decimal] )] #[case::non_existing_column( "INSERT INTO test_decimal (nonexistent, price) VALUES (1, 2), (4, 5)", - "Schema error: No field named nonexistent. Valid fields are id, price." + "Schema error: No field named nonexistent. \ + Valid fields are id, price." )] #[case::target_column_count_mismatch( "INSERT INTO person (id, first_name, last_name) VALUES ($1, $2)", @@ -1358,9 +1359,11 @@ fn select_simple_aggregate_with_groupby_column_unselected() { fn select_simple_aggregate_with_groupby_and_column_in_group_by_does_not_exist() { let sql = "SELECT sum(age) FROM person GROUP BY doesnotexist"; let err = logical_plan(sql).expect_err("query should have failed"); - assert_eq!("Schema error: No field named doesnotexist. Valid fields are \"sum(person.age)\", \ + let expected = "Schema error: No field named doesnotexist. \ + Valid fields are \"sum(person.age)\", \ person.id, person.first_name, person.last_name, person.age, person.state, \ - person.salary, person.birth_date, person.\"😀\".", err.strip_backtrace()); + person.salary, person.birth_date, person.\"😀\"."; + assert_eq!(err.strip_backtrace(), expected); } #[test] @@ -3636,10 +3639,8 @@ fn test_prepare_statement_to_plan_panic_prepare_wrong_syntax() { #[test] fn test_prepare_statement_to_plan_panic_no_relation_and_constant_param() { let sql = "PREPARE my_plan(INT) AS SELECT id + $1"; - assert_eq!( - logical_plan(sql).unwrap_err().strip_backtrace(), - "Schema error: No field named id." - ) + let expected = "Schema error: No field named id."; + assert_eq!(logical_plan(sql).unwrap_err().strip_backtrace(), expected); } #[test] @@ -4334,18 +4335,14 @@ fn test_multi_grouping_sets() { fn test_field_not_found_window_function() { let order_by_sql = "SELECT count() OVER (order by a);"; let order_by_err = logical_plan(order_by_sql).expect_err("query should have failed"); - assert_eq!( - "Schema error: No field named a.", - order_by_err.strip_backtrace() - ); + let expected = "Schema error: No field named a."; + assert_eq!(order_by_err.strip_backtrace(), expected); let partition_by_sql = "SELECT count() OVER (PARTITION BY a);"; let partition_by_err = logical_plan(partition_by_sql).expect_err("query should have failed"); - assert_eq!( - "Schema error: No field named a.", - partition_by_err.strip_backtrace() - ); + let expected = "Schema error: No field named a."; + assert_eq!(partition_by_err.strip_backtrace(), expected); let qualified_sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY orders.order_id) from orders"; diff --git a/datafusion/sqllogictest/test_files/identifiers.slt b/datafusion/sqllogictest/test_files/identifiers.slt index 755d617e7a2a..f1b2777bece0 100644 --- a/datafusion/sqllogictest/test_files/identifiers.slt +++ b/datafusion/sqllogictest/test_files/identifiers.slt @@ -90,16 +90,16 @@ drop table case_insensitive_test statement ok CREATE TABLE test("Column1" string) AS VALUES ('content1'); -statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\. +statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\. SELECT COLumn1 from test -statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\. +statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\. SELECT Column1 from test -statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\. +statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\. SELECT column1 from test -statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\. +statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\. SELECT "column1" from test statement ok diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index a127463c2b27..dce685f5c137 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1799,7 +1799,7 @@ select a + b from (select 1 as a, 2 as b, 1 as "a + b"); 3 # Can't reference an output column by expression over projection. -query error DataFusion error: Schema error: No field named a\. Valid fields are "a \+ Int64\(1\)"\. +query error DataFusion error: Schema error: No field named a. Valid fields are "a \+ Int64\(1\)"\. select a + 1 from (select a+1 from (select 1 as a)); query I @@ -1834,5 +1834,5 @@ statement ok DROP TABLE test; # Can't reference an unqualified column by a qualified name -query error DataFusion error: Schema error: No field named t1\.v1\. Valid fields are "t1\.v1"\. +query error DataFusion error: Schema error: No field named t1.v1. Column names are case sensitive. You can use double quotes to refer to the "t1.v1" column or set the datafusion.sql_parser.enable_ident_normalization configuration. Valid fields are "t1.v1". SELECT t1.v1 FROM (SELECT 1 AS "t1.v1");