From ebf6c30507495ef495b6fd36aec27de10dc821dd Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Tue, 29 Aug 2023 17:51:15 -0700 Subject: [PATCH 1/2] refactor: change file type logic for create table --- datafusion/sql/src/statement.rs | 5 ++--- datafusion/sql/tests/sql_integration.rs | 5 ++++- docs/source/user-guide/sql/ddl.md | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 655442d7e353..638b25f72e46 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -679,11 +679,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { options, } = statement; - if file_type != "CSV" - && file_type != "JSON" + if (file_type == "PARQUET" || file_type == "AVRO" || file_type == "ARROW") && file_compression_type != CompressionTypeVariant::UNCOMPRESSED { - plan_err!("File compression type can be specified for CSV/JSON files.")?; + plan_err!("File compression type cannot be set for PARQUET, AVRO, or ARROW files.")?; } let schema = self.build_schema(columns)?; diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 07112184bf59..154bd3f9a01a 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -1850,6 +1850,7 @@ fn create_external_table_with_compression_type() { "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE BZIP2 LOCATION 'foo.csv.bz2'", "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON COMPRESSION TYPE GZIP LOCATION 'foo.json.gz'", "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON COMPRESSION TYPE BZIP2 LOCATION 'foo.json.bz2'", + "CREATE EXTERNAL TABLE t(c1 int) STORED AS NONSTANDARD COMPRESSION TYPE GZIP LOCATION 'foo.unk'", ]; for sql in sqls { let expected = "CreateExternalTable: Bare { table: \"t\" }"; @@ -1862,11 +1863,13 @@ fn create_external_table_with_compression_type() { "CREATE EXTERNAL TABLE t STORED AS AVRO COMPRESSION TYPE BZIP2 LOCATION 'foo.avro'", "CREATE EXTERNAL TABLE t STORED AS PARQUET COMPRESSION TYPE GZIP LOCATION 'foo.parquet'", "CREATE EXTERNAL TABLE t STORED AS PARQUET COMPRESSION TYPE BZIP2 LOCATION 'foo.parquet'", + "CREATE EXTERNAL TABLE t STORED AS ARROW COMPRESSION TYPE GZIP LOCATION 'foo.arrow'", + "CREATE EXTERNAL TABLE t STORED AS ARROW COMPRESSION TYPE BZIP2 LOCATION 'foo.arrow'", ]; for sql in sqls { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"File compression type can be specified for CSV/JSON files.\")", + "Plan(\"File compression type cannot be set for PARQUET, AVRO, or ARROW files.\")", format!("{err:?}") ); } diff --git a/docs/source/user-guide/sql/ddl.md b/docs/source/user-guide/sql/ddl.md index f566b8342ec1..751159c305fc 100644 --- a/docs/source/user-guide/sql/ddl.md +++ b/docs/source/user-guide/sql/ddl.md @@ -79,7 +79,7 @@ LOCATION `file_type` is one of `CSV`, `PARQUET`, `AVRO` or `JSON` -`LOCATION ` specfies the location to find the data. It can be +`LOCATION ` specifies the location to find the data. It can be a path to a file or directory of partitioned files locally or on an object store. From 87f6f4ad692b87b612ad29a086b16e8db60855cc Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Mon, 4 Sep 2023 16:12:30 -0700 Subject: [PATCH 2/2] style: run rust fmt --- datafusion/sql/src/statement.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 638b25f72e46..c05f43d04d60 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -682,7 +682,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { if (file_type == "PARQUET" || file_type == "AVRO" || file_type == "ARROW") && file_compression_type != CompressionTypeVariant::UNCOMPRESSED { - plan_err!("File compression type cannot be set for PARQUET, AVRO, or ARROW files.")?; + plan_err!( + "File compression type cannot be set for PARQUET, AVRO, or ARROW files." + )?; } let schema = self.build_schema(columns)?;