From 6e921032b0211deaf498ef776b45ba7dd1592b94 Mon Sep 17 00:00:00 2001 From: HuSen8891 Date: Sat, 14 Sep 2024 19:42:41 +0800 Subject: [PATCH] Fix: check ambiguous column reference --- datafusion/common/src/dfschema.rs | 89 ++++++++++++++++++++- datafusion/sql/src/expr/identifier.rs | 3 + datafusion/sqllogictest/test_files/join.slt | 14 ++++ 3 files changed, 102 insertions(+), 4 deletions(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 095f4c5101943..7bb7b27dcbf49 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -412,6 +412,33 @@ impl DFSchema { } } + /// Check whether the column reference is ambiguous + pub fn check_ambiguous_name( + &self, + qualifier: Option<&TableReference>, + name: &str, + ) -> Result<()> { + let count = self + .iter() + .filter(|(field_q, f)| match (field_q, qualifier) { + (Some(q1), Some(q2)) => q1.resolved_eq(q2) && f.name() == name, + (None, None) => f.name() == name, + _ => false, + }) + .take(2) + .count(); + if count > 1 { + _schema_err!(SchemaError::AmbiguousReference { + field: Column { + relation: None, + name: name.to_string(), + }, + }) + } else { + Ok(()) + } + } + /// Find the qualified field with the given name pub fn qualified_field_with_name( &self, @@ -419,10 +446,7 @@ impl DFSchema { name: &str, ) -> Result<(Option<&TableReference>, &Field)> { if let Some(qualifier) = qualifier { - let idx = self - .index_of_column_by_name(Some(qualifier), name) - .ok_or_else(|| field_not_found(Some(qualifier.clone()), name, self))?; - Ok((self.field_qualifiers[idx].as_ref(), self.field(idx))) + self.qualified_field_with_qualified_name(qualifier, name) } else { self.qualified_field_with_unqualified_name(name) } @@ -467,6 +491,34 @@ impl DFSchema { .collect() } + /// Find all fields that match the given name with qualifier and return them with their qualifier + pub fn qualified_fields_with_qualified_name( + &self, + qualifier: &TableReference, + name: &str, + ) -> Vec<(Option<&TableReference>, &Field)> { + self.iter() + .filter(|(q, f)| match (qualifier, q) { + // field to lookup is qualified. + // current field is qualified and not shared between relations, compare both + // qualifier and name. + (q, Some(field_q)) => q.resolved_eq(field_q) && f.name() == name, + // field to lookup is qualified but current field is unqualified. + (qq, None) => { + // the original field may now be aliased with a name that matches the + // original qualified name + let column = Column::from_qualified_name(f.name()); + if let Some(r) = column.relation.as_ref() { + r == qq && column.name == name + } else { + false + } + } + }) + .map(|(qualifier, field)| (qualifier, field.as_ref())) + .collect() + } + /// Find all fields that match the given name and convert to column pub fn columns_with_unqualified_name(&self, name: &str) -> Vec { self.iter() @@ -519,6 +571,35 @@ impl DFSchema { } } + /// Find the qualified field with the given qualified name + pub fn qualified_field_with_qualified_name( + &self, + qualifier: &TableReference, + name: &str, + ) -> Result<(Option<&TableReference>, &Field)> { + let matches = self.qualified_fields_with_qualified_name(qualifier, name); + match matches.len() { + 0 => Err(field_not_found(Some(qualifier.clone()), name, self)), + 1 => Ok((matches[0].0, (matches[0].1))), + _ => { + let fields_with_qualifier = matches + .iter() + .filter(|(q, _)| q.is_some()) + .collect::>(); + if fields_with_qualifier.len() == 1 { + Ok((fields_with_qualifier[0].0, fields_with_qualifier[0].1)) + } else { + _schema_err!(SchemaError::AmbiguousReference { + field: Column { + relation: None, + name: name.to_string(), + }, + }) + } + } + } + } + /// Find the field with the given name pub fn field_with_unqualified_name(&self, name: &str) -> Result<&Field> { self.qualified_field_with_unqualified_name(name) diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs index 049600799f3ce..f7c5c311df946 100644 --- a/datafusion/sql/src/expr/identifier.rs +++ b/datafusion/sql/src/expr/identifier.rs @@ -186,6 +186,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let s = &ids[0..ids.len()]; // safe unwrap as s can never be empty or exceed the bounds let (relation, column_name) = form_identifier(s).unwrap(); + // sanity check on column + schema + .check_ambiguous_name(relation.as_ref(), column_name)?; Ok(Expr::Column(Column::new(relation, column_name))) } } diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt index 3e7a08981eac5..f4bc17a48d83e 100644 --- a/datafusion/sqllogictest/test_files/join.slt +++ b/datafusion/sqllogictest/test_files/join.slt @@ -1209,3 +1209,17 @@ drop table t1; statement ok drop table t2; + +# Test SQLancer issue: https://github.com/apache/datafusion/issues/12337 +statement ok +create table t1(v1 int); + +## Query with Ambiguous column reference +query error DataFusion error: Schema error: Ambiguous reference to unqualified field v1 +select count(*) +from t1 +right outer join t1 +on t1.v1 > 0; + +statement ok +drop table t1; \ No newline at end of file