Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support catalog.schema.table.column in SQL SELECT and WHERE #5343

Merged
merged 22 commits into from
Mar 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
324e304
Support catalog.schema.table.column in SQL SELECT and WHERE
Jefffrey Feb 20, 2023
5b358a9
Merge branch 'main' into support_catalog_schema_in_ident
Jefffrey Feb 28, 2023
7b7782f
Merge branch 'main' into support_catalog_schema_in_ident
Jefffrey Feb 28, 2023
627c7c2
Merge branch 'main' into support_catalog_schema_in_ident
Jefffrey Mar 1, 2023
eb65472
Update column new() docstring
Jefffrey Mar 1, 2023
1007dd6
Merge branch 'main' into support_catalog_schema_in_ident
Jefffrey Mar 3, 2023
1d613ed
Add tests for dfschema search
Jefffrey Mar 3, 2023
91a88e7
Introduce DFField::new_unqualified
Jefffrey Mar 3, 2023
587b0b2
Introduce new_unqualified methods for simpler syntax
Jefffrey Mar 3, 2023
7ee7929
Merge branch 'main' into support_catalog_schema_in_ident
Jefffrey Mar 3, 2023
04a34ba
Merge branch 'main' into support_catalog_schema_in_ident
Jefffrey Mar 3, 2023
f730de7
Fix merge
Jefffrey Mar 3, 2023
f58e596
Merge branch 'main' into support_catalog_schema_in_ident
Jefffrey Mar 3, 2023
03f65e7
Merge branch 'main' into support_catalog_schema_in_ident
Jefffrey Mar 8, 2023
db57180
New ident() expr function
Jefffrey Mar 8, 2023
5240976
Refactor OwnedTableReference to be a type alies of TableReference<'st…
Jefffrey Mar 8, 2023
cd1453e
Merge branch 'main' into support_catalog_schema_in_ident
Jefffrey Mar 9, 2023
44bb3fa
Update comments
Jefffrey Mar 9, 2023
b9a9af5
Comments
Jefffrey Mar 9, 2023
57ce66c
Update docstrings
Jefffrey Mar 9, 2023
0a2062d
From OwnedTableReference to TableReference impl
Jefffrey Mar 9, 2023
5bd9a90
Merge branch 'main' into support_catalog_schema_in_ident
Jefffrey Mar 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 75 additions & 34 deletions datafusion/common/src/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

//! Column

use crate::{DFSchema, DataFusionError, Result, SchemaError};
use crate::utils::{parse_identifiers_normalized, quote_identifier};
use crate::{DFSchema, DataFusionError, OwnedTableReference, Result, SchemaError};
use std::collections::HashSet;
use std::convert::Infallible;
use std::fmt;
Expand All @@ -27,21 +28,37 @@ use std::sync::Arc;
/// A named reference to a qualified field in a schema.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Column {
/// relation/table name.
pub relation: Option<String>,
/// relation/table reference.
pub relation: Option<OwnedTableReference>,
/// field/column name.
pub name: String,
}

impl Column {
/// Create Column from optional qualifier and name
pub fn new(relation: Option<impl Into<String>>, name: impl Into<String>) -> Self {
/// Create Column from optional qualifier and name. The optional qualifier, if present,
/// will be parsed and normalized by default.
///
/// See full details on [`TableReference::parse_str`]
///
/// [`TableReference::parse_str`]: crate::TableReference::parse_str
pub fn new(
relation: Option<impl Into<OwnedTableReference>>,
alamb marked this conversation as resolved.
Show resolved Hide resolved
name: impl Into<String>,
) -> Self {
Self {
relation: relation.map(|r| r.into()),
name: name.into(),
}
}

/// Convenience method for when there is no qualifier
pub fn new_unqualified(name: impl Into<String>) -> Self {
Self {
relation: None,
name: name.into(),
}
}

/// Create Column from unqualified name.
pub fn from_name(name: impl Into<String>) -> Self {
Self {
Expand All @@ -53,26 +70,36 @@ impl Column {
/// Deserialize a fully qualified name string into a column
pub fn from_qualified_name(flat_name: impl Into<String>) -> Self {
let flat_name = flat_name.into();
use sqlparser::tokenizer::Token;

let dialect = sqlparser::dialect::GenericDialect {};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is one of the key changes in my mind -- use the full standard identifier normalization rules rather than some custom sqlparser based semantics

let mut tokenizer = sqlparser::tokenizer::Tokenizer::new(&dialect, &flat_name);
if let Ok(tokens) = tokenizer.tokenize() {
if let [Token::Word(relation), Token::Period, Token::Word(name)] =
tokens.as_slice()
{
return Column {
relation: Some(relation.value.clone()),
name: name.value.clone(),
};
}
}
// any expression that's not in the form of `foo.bar` will be treated as unqualified column
// name
Column {
relation: None,
name: flat_name,
}
let mut idents = parse_identifiers_normalized(&flat_name);

let (relation, name) = match idents.len() {
1 => (None, idents.remove(0)),
2 => (
Some(OwnedTableReference::Bare {
table: idents.remove(0).into(),
}),
idents.remove(0),
),
3 => (
Some(OwnedTableReference::Partial {
schema: idents.remove(0).into(),
table: idents.remove(0).into(),
}),
idents.remove(0),
),
4 => (
Some(OwnedTableReference::Full {
catalog: idents.remove(0).into(),
schema: idents.remove(0).into(),
table: idents.remove(0).into(),
}),
idents.remove(0),
),
// any expression that failed to parse or has more than 4 period delimited
// identifiers will be treated as an unqualified column name
_ => (None, flat_name),
};
Self { relation, name }
}

/// Serialize column into a flat name string
Expand All @@ -83,6 +110,18 @@ impl Column {
}
}

/// Serialize column into a quoted flat name string
pub fn quoted_flat_name(&self) -> String {
// TODO: quote identifiers only when special characters present
// see: https://github.com/apache/arrow-datafusion/issues/5523
match &self.relation {
Some(r) => {
format!("{}.{}", r.to_quoted_string(), quote_identifier(&self.name))
}
None => quote_identifier(&self.name),
}
}

/// Qualify column if not done yet.
///
/// If this column already has a [relation](Self::relation), it will be returned as is and the given parameters are
Expand Down Expand Up @@ -151,7 +190,7 @@ impl Column {
}

Err(DataFusionError::SchemaError(SchemaError::FieldNotFound {
field: Column::new(self.relation.clone(), self.name),
field: Box::new(Column::new(self.relation.clone(), self.name)),
valid_fields: schemas
.iter()
.flat_map(|s| s.fields().iter().map(|f| f.qualified_column()))
Expand Down Expand Up @@ -240,16 +279,15 @@ impl Column {
// If not due to USING columns then due to ambiguous column name
return Err(DataFusionError::SchemaError(
SchemaError::AmbiguousReference {
qualifier: None,
name: self.name,
field: Column::new_unqualified(self.name),
},
));
}
}
}

Err(DataFusionError::SchemaError(SchemaError::FieldNotFound {
field: self,
field: Box::new(self),
valid_fields: schemas
.iter()
.flat_map(|s| s.iter())
Expand Down Expand Up @@ -304,7 +342,12 @@ mod tests {
let fields = names
.iter()
.map(|(qualifier, name)| {
DFField::new(qualifier.to_owned(), name, DataType::Boolean, true)
DFField::new(
qualifier.to_owned().map(|s| s.to_string()),
name,
DataType::Boolean,
true,
)
})
.collect::<Vec<_>>();
DFSchema::new_with_metadata(fields, HashMap::new())
Expand Down Expand Up @@ -362,9 +405,7 @@ mod tests {
&[],
)
.expect_err("should've failed to find field");
let expected = "Schema error: No field named 'z'. \
Valid fields are 't1'.'a', 't1'.'b', 't2'.'c', \
't2'.'d', 't3'.'a', 't3'.'b', 't3'.'c', 't3'.'d', 't3'.'e'.";
let expected = r#"Schema error: No field named "z". Valid fields are "t1"."a", "t1"."b", "t2"."c", "t2"."d", "t3"."a", "t3"."b", "t3"."c", "t3"."d", "t3"."e"."#;
assert_eq!(err.to_string(), expected);

// ambiguous column reference
Expand All @@ -375,7 +416,7 @@ mod tests {
&[],
)
.expect_err("should've found ambiguous field");
let expected = "Schema error: Ambiguous reference to unqualified field 'a'";
let expected = "Schema error: Ambiguous reference to unqualified field \"a\"";
assert_eq!(err.to_string(), expected);

Ok(())
Expand Down
Loading