diff --git a/datafusion/common/src/file_options/file_type.rs b/datafusion/common/src/file_options/file_type.rs index a07f2e0cb847b..c83da387c25d9 100644 --- a/datafusion/common/src/file_options/file_type.rs +++ b/datafusion/common/src/file_options/file_type.rs @@ -48,7 +48,6 @@ pub enum FileType { /// Apache Avro file AVRO, /// Apache Parquet file - #[cfg(feature = "parquet")] PARQUET, /// CSV file CSV, @@ -61,7 +60,6 @@ impl GetExt for FileType { match self { FileType::ARROW => DEFAULT_ARROW_EXTENSION.to_owned(), FileType::AVRO => DEFAULT_AVRO_EXTENSION.to_owned(), - #[cfg(feature = "parquet")] FileType::PARQUET => DEFAULT_PARQUET_EXTENSION.to_owned(), FileType::CSV => DEFAULT_CSV_EXTENSION.to_owned(), FileType::JSON => DEFAULT_JSON_EXTENSION.to_owned(), @@ -74,7 +72,6 @@ impl Display for FileType { let out = match self { FileType::CSV => "csv", FileType::JSON => "json", - #[cfg(feature = "parquet")] FileType::PARQUET => "parquet", FileType::AVRO => "avro", FileType::ARROW => "arrow", @@ -91,7 +88,6 @@ impl FromStr for FileType { match s.as_str() { "ARROW" => Ok(FileType::ARROW), "AVRO" => Ok(FileType::AVRO), - #[cfg(feature = "parquet")] "PARQUET" => Ok(FileType::PARQUET), "CSV" => Ok(FileType::CSV), "JSON" | "NDJSON" => Ok(FileType::JSON), diff --git a/datafusion/common/src/file_options/mod.rs b/datafusion/common/src/file_options/mod.rs index b7c1341e30460..029d0086623cc 100644 --- a/datafusion/common/src/file_options/mod.rs +++ b/datafusion/common/src/file_options/mod.rs @@ -22,7 +22,6 @@ pub mod avro_writer; pub mod csv_writer; pub mod file_type; pub mod json_writer; -#[cfg(feature = "parquet")] pub mod parquet_writer; pub(crate) mod parse_utils; @@ -38,14 +37,13 @@ use crate::{ DataFusionError, FileType, Result, }; -#[cfg(feature = "parquet")] -use self::parquet_writer::ParquetWriterOptions; - use self::{ arrow_writer::ArrowWriterOptions, avro_writer::AvroWriterOptions, csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions, }; +use self::parquet_writer::ParquetWriterOptions; + /// Represents a single arbitrary setting in a /// [StatementOptions] where OptionTuple.0 determines /// the specific setting to be modified and OptionTuple.1 @@ -148,7 +146,6 @@ impl StatementOptions { /// plus any DataFusion specific writing options (e.g. CSV compression) #[derive(Clone, Debug)] pub enum FileTypeWriterOptions { - #[cfg(feature = "parquet")] Parquet(ParquetWriterOptions), CSV(CsvWriterOptions), JSON(JsonWriterOptions), @@ -168,7 +165,6 @@ impl FileTypeWriterOptions { let options = (config_defaults, statement_options); let file_type_write_options = match file_type { - #[cfg(feature = "parquet")] FileType::PARQUET => { FileTypeWriterOptions::Parquet(ParquetWriterOptions::try_from(options)?) } @@ -198,7 +194,6 @@ impl FileTypeWriterOptions { let options = (config_defaults, &empty_statement); let file_type_write_options = match file_type { - #[cfg(feature = "parquet")] FileType::PARQUET => { FileTypeWriterOptions::Parquet(ParquetWriterOptions::try_from(options)?) } @@ -288,7 +283,6 @@ impl Display for FileTypeWriterOptions { FileTypeWriterOptions::Avro(_) => "AvroWriterOptions", FileTypeWriterOptions::CSV(_) => "CsvWriterOptions", FileTypeWriterOptions::JSON(_) => "JsonWriterOptions", - #[cfg(feature = "parquet")] FileTypeWriterOptions::Parquet(_) => "ParquetWriterOptions", }; write!(f, "{}", name) diff --git a/datafusion/common/src/file_options/parquet_writer/mod.rs b/datafusion/common/src/file_options/parquet_writer/mod.rs new file mode 100644 index 0000000000000..63d4d9828b1b2 --- /dev/null +++ b/datafusion/common/src/file_options/parquet_writer/mod.rs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[cfg(feature = "parquet")] +mod parquet; + +#[cfg(feature = "parquet")] +pub use parquet::*; + +#[cfg(not(feature = "parquet"))] +mod parquet_stub; + +#[cfg(not(feature = "parquet"))] +pub use parquet_stub::*; diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer/parquet.rs similarity index 99% rename from datafusion/common/src/file_options/parquet_writer.rs rename to datafusion/common/src/file_options/parquet_writer/parquet.rs index 80fa023587eef..b28d3972f438d 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer/parquet.rs @@ -21,8 +21,7 @@ use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder}; use crate::{config::ConfigOptions, DataFusionError, Result}; -use super::StatementOptions; - +use crate::file_options::StatementOptions; use parquet::{ basic::{BrotliLevel, GzipLevel, ZstdLevel}, file::properties::{EnabledStatistics, WriterVersion}, diff --git a/datafusion/common/src/file_options/parquet_writer/parquet_stub.rs b/datafusion/common/src/file_options/parquet_writer/parquet_stub.rs new file mode 100644 index 0000000000000..8fffbb8b1fc09 --- /dev/null +++ b/datafusion/common/src/file_options/parquet_writer/parquet_stub.rs @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::config::ConfigOptions; +use crate::file_options::StatementOptions; +use crate::not_impl_err; + +/// Stub implementation of `ParquetFormat` that always returns a +/// NotYetImplemented error used when parquet feature is not activated. +#[derive(Clone, Debug)] +pub struct ParquetWriterOptions {} + +impl TryFrom<(&ConfigOptions, &StatementOptions)> for ParquetWriterOptions { + type Error = DataFusionError; + + fn try_from(_: (&ConfigOptions, &StatementOptions)) -> Result { + not_impl_err!( + "Parquet format is not enabled, Hint enable the `parquet` feature flag" + ) + } +} diff --git a/datafusion/core/src/datasource/file_format/file_compression_type.rs b/datafusion/core/src/datasource/file_format/file_compression_type.rs index 3dac7c293050c..50dd0e3ac034a 100644 --- a/datafusion/core/src/datasource/file_format/file_compression_type.rs +++ b/datafusion/core/src/datasource/file_format/file_compression_type.rs @@ -243,7 +243,6 @@ impl FileTypeExt for FileType { "FileCompressionType can be specified for CSV/JSON FileType.".into(), )), }, - #[cfg(feature = "parquet")] FileType::PARQUET => match c.variant { UNCOMPRESSED => Ok(ext), _ => Err(DataFusionError::Internal( diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs index b541e2a1d44c1..04bd9ddeb6aa6 100644 --- a/datafusion/core/src/datasource/file_format/mod.rs +++ b/datafusion/core/src/datasource/file_format/mod.rs @@ -27,8 +27,8 @@ pub mod csv; pub mod file_compression_type; pub mod json; pub mod options; -#[cfg(feature = "parquet")] pub mod parquet; + pub mod write; use std::any::Any; diff --git a/datafusion/core/src/datasource/file_format/parquet/mod.rs b/datafusion/core/src/datasource/file_format/parquet/mod.rs new file mode 100644 index 0000000000000..644fcf8d81aa4 --- /dev/null +++ b/datafusion/core/src/datasource/file_format/parquet/mod.rs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! FileFormat for parquet + +/// If parquet is enabled, use actual implementation +#[cfg(feature = "parquet")] +mod parquet; +#[cfg(feature = "parquet")] +pub use parquet::*; + +/// If parquet is not enabled, use dummy implementation +#[cfg(not(feature = "parquet"))] +mod parquet_stub; +#[cfg(not(feature = "parquet"))] +pub use parquet_stub::ParquetFormat; diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet/parquet.rs similarity index 99% rename from datafusion/core/src/datasource/file_format/parquet.rs rename to datafusion/core/src/datasource/file_format/parquet/parquet.rs index 62867c0e2b38d..982b87c58e009 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet/parquet.rs @@ -54,18 +54,21 @@ use parquet::file::metadata::ParquetMetaData; use parquet::file::properties::WriterProperties; use parquet::file::statistics::Statistics as ParquetStatistics; -use super::write::demux::start_demuxer_task; -use super::write::{create_writer, AbortableWrite, FileWriterMode}; -use super::{FileFormat, FileScanConfig}; use crate::arrow::array::{ BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, }; use crate::arrow::datatypes::DataType; use crate::config::ConfigOptions; +use crate::datasource::file_format::write::demux::start_demuxer_task; +use crate::datasource::file_format::write::{ + create_writer, AbortableWrite, FileWriterMode, +}; +use crate::datasource::file_format::FileFormat; use crate::datasource::get_col_stats; use crate::datasource::physical_plan::{ - FileGroupDisplay, FileMeta, FileSinkConfig, ParquetExec, SchemaAdapter, + FileGroupDisplay, FileMeta, FileScanConfig, FileSinkConfig, ParquetExec, + SchemaAdapter, }; use crate::error::Result; @@ -1206,7 +1209,6 @@ pub(crate) mod test_util { #[cfg(test)] mod tests { - use super::super::test_util::scan_format; use crate::physical_plan::collect; use std::fmt::{Display, Formatter}; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -1214,6 +1216,7 @@ mod tests { use super::*; use crate::datasource::file_format::parquet::test_util::store_parquet; + use crate::datasource::file_format::test_util::scan_format; use crate::physical_plan::metrics::MetricValue; use crate::prelude::{SessionConfig, SessionContext}; use arrow::array::{Array, ArrayRef, StringArray}; diff --git a/datafusion/core/src/datasource/file_format/parquet/parquet_stub.rs b/datafusion/core/src/datasource/file_format/parquet/parquet_stub.rs new file mode 100644 index 0000000000000..efcc946f77d31 --- /dev/null +++ b/datafusion/core/src/datasource/file_format/parquet/parquet_stub.rs @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datasource::file_format::FileFormat; +use crate::datasource::physical_plan::FileScanConfig; +use crate::execution::context::SessionState; +use arrow_schema::SchemaRef; +use async_trait::async_trait; +use datafusion_common::{DataFusionError, FileType, Result, Statistics}; +use datafusion_physical_expr::PhysicalExpr; +use datafusion_physical_plan::ExecutionPlan; +use object_store::{ObjectMeta, ObjectStore}; +use std::any::Any; +use std::sync::Arc; + +/// Stub implementation of `ParquetFormat` that always returns a NotYetImplemented error. +#[derive(Debug, Default)] +pub struct ParquetFormat; + +impl ParquetFormat { + /// Create a new instance of the Parquet format + pub fn new() -> Self { + Self + } +} + +fn nyi_error() -> DataFusionError { + DataFusionError::NotImplemented( + "Parquet support not enabled. Hint enable the `parquet` crate feature".into(), + ) +} + +#[async_trait] +impl FileFormat for ParquetFormat { + fn as_any(&self) -> &dyn Any { + self + } + + async fn infer_schema( + &self, + _: &SessionState, + _: &Arc, + _: &[ObjectMeta], + ) -> Result { + Err(nyi_error()) + } + + async fn infer_stats( + &self, + _: &SessionState, + _: &Arc, + _: SchemaRef, + _: &ObjectMeta, + ) -> Result { + Err(nyi_error()) + } + + async fn create_physical_plan( + &self, + _: &SessionState, + _: FileScanConfig, + _: Option<&Arc>, + ) -> Result> { + Err(nyi_error()) + } + + fn file_type(&self) -> FileType { + FileType::PARQUET + } +} diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 822a78a5522a6..7247ee874945b 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -153,6 +153,11 @@ impl ListingTableConfig { ), #[cfg(feature = "parquet")] FileType::PARQUET => Arc::new(ParquetFormat::default()), + #[cfg(not(feature = "parquet"))] + FileType::PARQUET => return Err(DataFusionError::NotImplemented( + "Parquet format is not enabled, Hint enable the `parquet` feature flag" + .to_string(), + )), }; Ok((file_format, ext)) diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index 26f40518979a2..3d9a509cedba2 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -23,7 +23,6 @@ use std::sync::Arc; use super::listing::ListingTableInsertMode; -#[cfg(feature = "parquet")] use crate::datasource::file_format::parquet::ParquetFormat; use crate::datasource::file_format::{ arrow::ArrowFormat, avro::AvroFormat, csv::CsvFormat, @@ -80,7 +79,6 @@ impl TableProviderFactory for ListingTableFactory { .with_delimiter(cmd.delimiter as u8) .with_file_compression_type(file_compression_type), ), - #[cfg(feature = "parquet")] FileType::PARQUET => Arc::new(ParquetFormat::default()), FileType::AVRO => Arc::new(AvroFormat), FileType::JSON => Arc::new( @@ -159,7 +157,6 @@ impl TableProviderFactory for ListingTableFactory { Some(mode) => ListingTableInsertMode::from_str(mode.as_str()), None => match file_type { FileType::CSV => Ok(ListingTableInsertMode::AppendToFile), - #[cfg(feature = "parquet")] FileType::PARQUET => Ok(ListingTableInsertMode::AppendNewFiles), FileType::AVRO => Ok(ListingTableInsertMode::AppendNewFiles), FileType::JSON => Ok(ListingTableInsertMode::AppendToFile), @@ -199,7 +196,6 @@ impl TableProviderFactory for ListingTableFactory { json_writer_options.compression = cmd.file_compression_type; FileTypeWriterOptions::JSON(json_writer_options) } - #[cfg(feature = "parquet")] FileType::PARQUET => file_type_writer_options, FileType::ARROW => file_type_writer_options, FileType::AVRO => file_type_writer_options, diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index f941e88f3a36d..419f62cff6647 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -25,7 +25,6 @@ use crate::datasource::file_format::arrow::ArrowFormat; use crate::datasource::file_format::avro::AvroFormat; use crate::datasource::file_format::csv::CsvFormat; use crate::datasource::file_format::json::JsonFormat; -#[cfg(feature = "parquet")] use crate::datasource::file_format::parquet::ParquetFormat; use crate::datasource::file_format::write::FileWriterMode; use crate::datasource::file_format::FileFormat; @@ -600,7 +599,6 @@ impl DefaultPhysicalPlanner { let sink_format: Arc = match file_format { FileType::CSV => Arc::new(CsvFormat::default()), - #[cfg(feature = "parquet")] FileType::PARQUET => Arc::new(ParquetFormat::default()), FileType::JSON => Arc::new(JsonFormat::default()), FileType::AVRO => Arc::new(AvroFormat {} ),