Skip to content

Commit

Permalink
Improved ergonomy for CREATE EXTERNAL TABLE OPTIONS: Don't require …
Browse files Browse the repository at this point in the history
…quotations for simple namespaced keys like `foo.bar` (apache#10483)

* Don't require quotations for simple namespaced keys like foo.bar

* Add comments clarifying parse error cases for unquoted namespaced keys
  • Loading branch information
ozankabak authored May 13, 2024
1 parent adf0bfc commit 5b74c2d
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 70 deletions.
65 changes: 26 additions & 39 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ macro_rules! config_namespace {
$(
stringify!($field_name) => self.$field_name.set(rem, value),
)*
_ => return Err(DataFusionError::Configuration(format!(
_ => return _config_err!(
"Config value \"{}\" not found on {}", key, stringify!($struct_name)
)))
)
}
}

Expand Down Expand Up @@ -676,22 +676,17 @@ impl ConfigOptions {

/// Set a configuration option
pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
let (prefix, key) = key.split_once('.').ok_or_else(|| {
DataFusionError::Configuration(format!(
"could not find config namespace for key \"{key}\"",
))
})?;
let Some((prefix, key)) = key.split_once('.') else {
return _config_err!("could not find config namespace for key \"{key}\"");
};

if prefix == "datafusion" {
return ConfigField::set(self, key, value);
}

let e = self.extensions.0.get_mut(prefix);
let e = e.ok_or_else(|| {
DataFusionError::Configuration(format!(
"Could not find config namespace \"{prefix}\""
))
})?;
let Some(e) = self.extensions.0.get_mut(prefix) else {
return _config_err!("Could not find config namespace \"{prefix}\"");
};
e.0.set(key, value)
}

Expand Down Expand Up @@ -1279,22 +1274,17 @@ impl TableOptions {
///
/// A result indicating success or failure in setting the configuration option.
pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
let (prefix, _) = key.split_once('.').ok_or_else(|| {
DataFusionError::Configuration(format!(
"could not find config namespace for key \"{key}\""
))
})?;
let Some((prefix, _)) = key.split_once('.') else {
return _config_err!("could not find config namespace for key \"{key}\"");
};

if prefix == "format" {
return ConfigField::set(self, key, value);
}

let e = self.extensions.0.get_mut(prefix);
let e = e.ok_or_else(|| {
DataFusionError::Configuration(format!(
"Could not find config namespace \"{prefix}\""
))
})?;
let Some(e) = self.extensions.0.get_mut(prefix) else {
return _config_err!("Could not find config namespace \"{prefix}\"");
};
e.0.set(key, value)
}

Expand Down Expand Up @@ -1413,19 +1403,19 @@ impl ConfigField for TableParquetOptions {
fn set(&mut self, key: &str, value: &str) -> Result<()> {
// Determine if the key is a global, metadata, or column-specific setting
if key.starts_with("metadata::") {
let k =
match key.split("::").collect::<Vec<_>>()[..] {
[_meta] | [_meta, ""] => return Err(DataFusionError::Configuration(
let k = match key.split("::").collect::<Vec<_>>()[..] {
[_meta] | [_meta, ""] => {
return _config_err!(
"Invalid metadata key provided, missing key in metadata::<key>"
.to_string(),
)),
[_meta, k] => k.into(),
_ => {
return Err(DataFusionError::Configuration(format!(
)
}
[_meta, k] => k.into(),
_ => {
return _config_err!(
"Invalid metadata key provided, found too many '::' in \"{key}\""
)))
}
};
)
}
};
self.key_value_metadata.insert(k, Some(value.into()));
Ok(())
} else if key.contains("::") {
Expand Down Expand Up @@ -1498,10 +1488,7 @@ macro_rules! config_namespace_with_hashmap {

inner_value.set(inner_key, value)
}
_ => Err(DataFusionError::Configuration(format!(
"Unrecognized key '{}'.",
key
))),
_ => _config_err!("Unrecognized key '{key}'."),
}
}

Expand Down
24 changes: 11 additions & 13 deletions datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ use std::ops::ControlFlow;
use std::sync::{Arc, Weak};

use super::options::ReadOptions;
#[cfg(feature = "array_expressions")]
use crate::functions_array;
use crate::{
catalog::information_schema::{InformationSchemaProvider, INFORMATION_SCHEMA},
catalog::listing_schema::ListingSchemaProvider,
Expand Down Expand Up @@ -53,53 +55,49 @@ use crate::{
},
optimizer::analyzer::{Analyzer, AnalyzerRule},
optimizer::optimizer::{Optimizer, OptimizerConfig, OptimizerRule},
physical_expr::{create_physical_expr, PhysicalExpr},
physical_optimizer::optimizer::{PhysicalOptimizer, PhysicalOptimizerRule},
physical_plan::ExecutionPlan,
physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner},
variable::{VarProvider, VarType},
};

#[cfg(feature = "array_expressions")]
use crate::functions_array;
use crate::{functions, functions_aggregate};

use arrow::datatypes::{DataType, SchemaRef};
use arrow::record_batch::RecordBatch;
use arrow_schema::Schema;
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use datafusion_common::tree_node::TreeNode;
use datafusion_common::{
alias::AliasGenerator,
config::{ConfigExtension, TableOptions},
exec_err, not_impl_err, plan_datafusion_err, plan_err,
tree_node::{TreeNodeRecursion, TreeNodeVisitor},
tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor},
DFSchema, SchemaReference, TableReference,
};
use datafusion_execution::registry::SerializerRegistry;
use datafusion_expr::{
expr_rewriter::FunctionRewrite,
logical_plan::{DdlStatement, Statement},
simplify::SimplifyInfo,
var_provider::is_system_variables,
Expr, ExprSchemable, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
};
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
use datafusion_sql::{
parser::{CopyToSource, CopyToStatement, DFParser},
planner::{object_name_to_table_reference, ContextProvider, ParserOptions, SqlToRel},
ResolvedTableReference,
};
use parking_lot::RwLock;
use sqlparser::dialect::dialect_from_str;

use async_trait::async_trait;
use chrono::{DateTime, Utc};
use parking_lot::RwLock;
use url::Url;
use uuid::Uuid;

use crate::physical_expr::PhysicalExpr;
pub use datafusion_execution::config::SessionConfig;
pub use datafusion_execution::TaskContext;
pub use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::expr_rewriter::FunctionRewrite;
use datafusion_expr::simplify::SimplifyInfo;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
use datafusion_physical_expr::create_physical_expr;

mod avro;
mod csv;
Expand Down
18 changes: 10 additions & 8 deletions datafusion/proto/tests/cases/roundtrip_logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
// specific language governing permissions and limitations
// under the License.

use std::any::Any;
use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use std::sync::Arc;
use std::vec;

use arrow::array::{ArrayRef, FixedSizeListArray};
use arrow::datatypes::{
DataType, Field, Fields, Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType,
Expand All @@ -24,6 +30,7 @@ use datafusion::datasource::provider::TableProviderFactory;
use datafusion::datasource::TableProvider;
use datafusion::execution::context::SessionState;
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
use datafusion::execution::FunctionRegistry;
use datafusion::functions_aggregate::covariance::{covar_pop, covar_samp};
use datafusion::functions_aggregate::expr_fn::first_value;
use datafusion::prelude::*;
Expand Down Expand Up @@ -51,16 +58,11 @@ use datafusion_proto::bytes::{
logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec,
};
use datafusion_proto::logical_plan::to_proto::serialize_expr;
use datafusion_proto::logical_plan::LogicalExtensionCodec;
use datafusion_proto::logical_plan::{from_proto, DefaultLogicalExtensionCodec};
use datafusion_proto::logical_plan::{
from_proto, DefaultLogicalExtensionCodec, LogicalExtensionCodec,
};
use datafusion_proto::protobuf;
use std::any::Any;
use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use std::sync::Arc;
use std::vec;

use datafusion::execution::FunctionRegistry;
use prost::Message;

#[cfg(feature = "json")]
Expand Down
24 changes: 19 additions & 5 deletions datafusion/sql/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,21 @@ impl<'a> DFParser<'a> {
pub fn parse_option_key(&mut self) -> Result<String, ParserError> {
let next_token = self.parser.next_token();
match next_token.token {
Token::Word(Word { value, .. }) => Ok(value),
Token::Word(Word { value, .. }) => {
let mut parts = vec![value];
while self.parser.consume_token(&Token::Period) {
let next_token = self.parser.next_token();
if let Token::Word(Word { value, .. }) = next_token.token {
parts.push(value);
} else {
// Unquoted namespaced keys have to conform to the syntax
// "<WORD>[\.<WORD>]*". If we have a key that breaks this
// pattern, error out:
return self.parser.expected("key name", next_token);
}
}
Ok(parts.join("."))
}
Token::SingleQuotedString(s) => Ok(s),
Token::DoubleQuotedString(s) => Ok(s),
Token::EscapedStringLiteral(s) => Ok(s),
Expand Down Expand Up @@ -712,15 +726,15 @@ impl<'a> DFParser<'a> {
} else {
self.parser.expect_keyword(Keyword::HEADER)?;
self.parser.expect_keyword(Keyword::ROW)?;
return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)");
}
}
Keyword::DELIMITER => {
return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS ('format.delimiter' ',')");
return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')");
}
Keyword::COMPRESSION => {
self.parser.expect_keyword(Keyword::TYPE)?;
return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS ('format.compression' 'gzip')");
return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)");
}
Keyword::PARTITIONED => {
self.parser.expect_keyword(Keyword::BY)?;
Expand Down Expand Up @@ -933,7 +947,7 @@ mod tests {
expect_parse_ok(sql, expected)?;

// positive case with delimiter
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('format.delimiter' '|')";
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
Expand Down
21 changes: 17 additions & 4 deletions datafusion/sqllogictest/test_files/create_external_table.slt
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ LOCATION 'test_files/scratch/create_external_table/manual_partitioning/';
statement error DataFusion error: Error during planning: Option format.delimiter is specified multiple times
CREATE EXTERNAL TABLE t STORED AS CSV OPTIONS (
'format.delimiter' '*',
'format.has_header' 'true',
'format.delimiter' '|')
'format.has_header' 'true',
'format.delimiter' '|')
LOCATION 'foo.csv';

# If a config does not belong to any namespace, we assume it is a 'format' option and apply the 'format' prefix for backwards compatibility.
Expand All @@ -201,7 +201,20 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
r_name VARCHAR,
r_comment VARCHAR,
r_rev VARCHAR,
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
OPTIONS (
'format.delimiter' '|',
'has_header' 'false');
'has_header' 'false');

# Verify that we do not need quotations for simple namespaced keys.
statement ok
CREATE EXTERNAL TABLE IF NOT EXISTS region (
r_regionkey BIGINT,
r_name VARCHAR,
r_comment VARCHAR,
r_rev VARCHAR,
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
OPTIONS (
format.delimiter '|',
has_header false,
compression gzip);
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,4 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
r_name VARCHAR,
r_comment VARCHAR,
r_rev VARCHAR,
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|');
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|');

0 comments on commit 5b74c2d

Please sign in to comment.