Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved ergonomy for CREATE EXTERNAL TABLE OPTIONS: Don't require quotations for simple namespaced keys like foo.bar #10483

Merged
merged 2 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 26 additions & 39 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ macro_rules! config_namespace {
$(
stringify!($field_name) => self.$field_name.set(rem, value),
)*
_ => return Err(DataFusionError::Configuration(format!(
_ => return _config_err!(
"Config value \"{}\" not found on {}", key, stringify!($struct_name)
)))
)
}
}

Expand Down Expand Up @@ -675,22 +675,17 @@ impl ConfigOptions {

/// Set a configuration option
pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
let (prefix, key) = key.split_once('.').ok_or_else(|| {
DataFusionError::Configuration(format!(
"could not find config namespace for key \"{key}\"",
))
})?;
let Some((prefix, key)) = key.split_once('.') else {
return _config_err!("could not find config namespace for key \"{key}\"");
};

if prefix == "datafusion" {
return ConfigField::set(self, key, value);
}

let e = self.extensions.0.get_mut(prefix);
let e = e.ok_or_else(|| {
DataFusionError::Configuration(format!(
"Could not find config namespace \"{prefix}\""
))
})?;
let Some(e) = self.extensions.0.get_mut(prefix) else {
return _config_err!("Could not find config namespace \"{prefix}\"");
};
e.0.set(key, value)
}

Expand Down Expand Up @@ -1278,22 +1273,17 @@ impl TableOptions {
///
/// A result indicating success or failure in setting the configuration option.
pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
let (prefix, _) = key.split_once('.').ok_or_else(|| {
DataFusionError::Configuration(format!(
"could not find config namespace for key \"{key}\""
))
})?;
let Some((prefix, _)) = key.split_once('.') else {
return _config_err!("could not find config namespace for key \"{key}\"");
};

if prefix == "format" {
return ConfigField::set(self, key, value);
}

let e = self.extensions.0.get_mut(prefix);
let e = e.ok_or_else(|| {
DataFusionError::Configuration(format!(
"Could not find config namespace \"{prefix}\""
))
})?;
let Some(e) = self.extensions.0.get_mut(prefix) else {
return _config_err!("Could not find config namespace \"{prefix}\"");
};
e.0.set(key, value)
}

Expand Down Expand Up @@ -1412,19 +1402,19 @@ impl ConfigField for TableParquetOptions {
fn set(&mut self, key: &str, value: &str) -> Result<()> {
// Determine if the key is a global, metadata, or column-specific setting
if key.starts_with("metadata::") {
let k =
match key.split("::").collect::<Vec<_>>()[..] {
[_meta] | [_meta, ""] => return Err(DataFusionError::Configuration(
let k = match key.split("::").collect::<Vec<_>>()[..] {
[_meta] | [_meta, ""] => {
return _config_err!(
"Invalid metadata key provided, missing key in metadata::<key>"
.to_string(),
)),
[_meta, k] => k.into(),
_ => {
return Err(DataFusionError::Configuration(format!(
)
}
[_meta, k] => k.into(),
_ => {
return _config_err!(
"Invalid metadata key provided, found too many '::' in \"{key}\""
)))
}
};
)
}
};
self.key_value_metadata.insert(k, Some(value.into()));
Ok(())
} else if key.contains("::") {
Expand Down Expand Up @@ -1497,10 +1487,7 @@ macro_rules! config_namespace_with_hashmap {

inner_value.set(inner_key, value)
}
_ => Err(DataFusionError::Configuration(format!(
"Unrecognized key '{}'.",
key
))),
_ => _config_err!("Unrecognized key '{key}'."),
}
}

Expand Down
24 changes: 11 additions & 13 deletions datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ use std::ops::ControlFlow;
use std::sync::{Arc, Weak};

use super::options::ReadOptions;
#[cfg(feature = "array_expressions")]
use crate::functions_array;
use crate::{
catalog::information_schema::{InformationSchemaProvider, INFORMATION_SCHEMA},
catalog::listing_schema::ListingSchemaProvider,
Expand Down Expand Up @@ -53,53 +55,49 @@ use crate::{
},
optimizer::analyzer::{Analyzer, AnalyzerRule},
optimizer::optimizer::{Optimizer, OptimizerConfig, OptimizerRule},
physical_expr::{create_physical_expr, PhysicalExpr},
physical_optimizer::optimizer::{PhysicalOptimizer, PhysicalOptimizerRule},
physical_plan::ExecutionPlan,
physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner},
variable::{VarProvider, VarType},
};

#[cfg(feature = "array_expressions")]
use crate::functions_array;
use crate::{functions, functions_aggregate};

use arrow::datatypes::{DataType, SchemaRef};
use arrow::record_batch::RecordBatch;
use arrow_schema::Schema;
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use datafusion_common::tree_node::TreeNode;
use datafusion_common::{
alias::AliasGenerator,
config::{ConfigExtension, TableOptions},
exec_err, not_impl_err, plan_datafusion_err, plan_err,
tree_node::{TreeNodeRecursion, TreeNodeVisitor},
tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor},
DFSchema, SchemaReference, TableReference,
};
use datafusion_execution::registry::SerializerRegistry;
use datafusion_expr::{
expr_rewriter::FunctionRewrite,
logical_plan::{DdlStatement, Statement},
simplify::SimplifyInfo,
var_provider::is_system_variables,
Expr, ExprSchemable, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
};
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
use datafusion_sql::{
parser::{CopyToSource, CopyToStatement, DFParser},
planner::{object_name_to_table_reference, ContextProvider, ParserOptions, SqlToRel},
ResolvedTableReference,
};
use parking_lot::RwLock;
use sqlparser::dialect::dialect_from_str;

use async_trait::async_trait;
use chrono::{DateTime, Utc};
use parking_lot::RwLock;
use url::Url;
use uuid::Uuid;

use crate::physical_expr::PhysicalExpr;
pub use datafusion_execution::config::SessionConfig;
pub use datafusion_execution::TaskContext;
pub use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::expr_rewriter::FunctionRewrite;
use datafusion_expr::simplify::SimplifyInfo;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
use datafusion_physical_expr::create_physical_expr;

mod avro;
mod csv;
Expand Down
18 changes: 10 additions & 8 deletions datafusion/proto/tests/cases/roundtrip_logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
// specific language governing permissions and limitations
// under the License.

use std::any::Any;
use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use std::sync::Arc;
use std::vec;

use arrow::array::{ArrayRef, FixedSizeListArray};
use arrow::datatypes::{
DataType, Field, Fields, Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType,
Expand All @@ -24,6 +30,7 @@ use datafusion::datasource::provider::TableProviderFactory;
use datafusion::datasource::TableProvider;
use datafusion::execution::context::SessionState;
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
use datafusion::execution::FunctionRegistry;
use datafusion::functions_aggregate::covariance::{covar_pop, covar_samp};
use datafusion::functions_aggregate::expr_fn::first_value;
use datafusion::prelude::*;
Expand Down Expand Up @@ -51,16 +58,11 @@ use datafusion_proto::bytes::{
logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec,
};
use datafusion_proto::logical_plan::to_proto::serialize_expr;
use datafusion_proto::logical_plan::LogicalExtensionCodec;
use datafusion_proto::logical_plan::{from_proto, DefaultLogicalExtensionCodec};
use datafusion_proto::logical_plan::{
from_proto, DefaultLogicalExtensionCodec, LogicalExtensionCodec,
};
use datafusion_proto::protobuf;
use std::any::Any;
use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use std::sync::Arc;
use std::vec;

use datafusion::execution::FunctionRegistry;
use prost::Message;

#[cfg(feature = "json")]
Expand Down
24 changes: 19 additions & 5 deletions datafusion/sql/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,21 @@ impl<'a> DFParser<'a> {
pub fn parse_option_key(&mut self) -> Result<String, ParserError> {
let next_token = self.parser.next_token();
match next_token.token {
Token::Word(Word { value, .. }) => Ok(value),
Token::Word(Word { value, .. }) => {
let mut parts = vec![value];
while self.parser.consume_token(&Token::Period) {
let next_token = self.parser.next_token();
if let Token::Word(Word { value, .. }) = next_token.token {
parts.push(value);
} else {
// Unquoted namespaced keys have to conform to the syntax
// "<WORD>[\.<WORD>]*". If we have a key that breaks this
// pattern, error out:
return self.parser.expected("key name", next_token);
ozankabak marked this conversation as resolved.
Show resolved Hide resolved
}
}
Ok(parts.join("."))
}
Token::SingleQuotedString(s) => Ok(s),
Token::DoubleQuotedString(s) => Ok(s),
Token::EscapedStringLiteral(s) => Ok(s),
Expand Down Expand Up @@ -712,15 +726,15 @@ impl<'a> DFParser<'a> {
} else {
self.parser.expect_keyword(Keyword::HEADER)?;
self.parser.expect_keyword(Keyword::ROW)?;
return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)");
}
}
Keyword::DELIMITER => {
return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS ('format.delimiter' ',')");
return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')");
}
Keyword::COMPRESSION => {
self.parser.expect_keyword(Keyword::TYPE)?;
return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS ('format.compression' 'gzip')");
return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)");
}
Keyword::PARTITIONED => {
self.parser.expect_keyword(Keyword::BY)?;
Expand Down Expand Up @@ -933,7 +947,7 @@ mod tests {
expect_parse_ok(sql, expected)?;

// positive case with delimiter
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('format.delimiter' '|')";
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
Expand Down
21 changes: 17 additions & 4 deletions datafusion/sqllogictest/test_files/create_external_table.slt
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ LOCATION 'test_files/scratch/create_external_table/manual_partitioning/';
statement error DataFusion error: Error during planning: Option format.delimiter is specified multiple times
CREATE EXTERNAL TABLE t STORED AS CSV OPTIONS (
'format.delimiter' '*',
'format.has_header' 'true',
'format.delimiter' '|')
'format.has_header' 'true',
'format.delimiter' '|')
LOCATION 'foo.csv';

# If a config does not belong to any namespace, we assume it is a 'format' option and apply the 'format' prefix for backwards compatibility.
Expand All @@ -201,7 +201,20 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
r_name VARCHAR,
r_comment VARCHAR,
r_rev VARCHAR,
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
OPTIONS (
'format.delimiter' '|',
'has_header' 'false');
'has_header' 'false');

# Verify that we do not need quotations for simple namespaced keys.
statement ok
CREATE EXTERNAL TABLE IF NOT EXISTS region (
r_regionkey BIGINT,
r_name VARCHAR,
r_comment VARCHAR,
r_rev VARCHAR,
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
OPTIONS (
format.delimiter '|',
has_header false,
compression gzip);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍🏻

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,4 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
r_name VARCHAR,
r_comment VARCHAR,
r_rev VARCHAR,
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|');
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|');