From 40a46588a247298f71050eef8a34a857d5e29558 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 26 Oct 2023 06:32:18 -0400 Subject: [PATCH 1/3] remove more #cfg --- .../common/src/file_options/file_type.rs | 4 - datafusion/common/src/file_options/mod.rs | 11 +-- .../src/file_options/parquet_writer/mod.rs | 28 ++++++ .../parquet.rs} | 3 +- .../parquet_writer/parquet_stub.rs | 36 ++++++++ datafusion/common/src/test_util.rs | 1 - .../file_format/file_compression_type.rs | 2 - .../core/src/datasource/file_format/mod.rs | 2 +- .../src/datasource/file_format/options.rs | 1 - .../src/datasource/file_format/parquet/mod.rs | 30 +++++++ .../file_format/{ => parquet}/parquet.rs | 13 +-- .../file_format/parquet/parquet_stub.rs | 86 +++++++++++++++++++ .../core/src/datasource/listing/table.rs | 3 - .../src/datasource/listing_table_factory.rs | 4 - datafusion/core/src/datasource/mod.rs | 1 - datafusion/core/src/execution/context/mod.rs | 1 - datafusion/core/src/physical_planner.rs | 2 - datafusion/core/src/test_util/mod.rs | 1 - datafusion/proto/src/logical_plan/mod.rs | 61 ++++--------- 19 files changed, 211 insertions(+), 79 deletions(-) create mode 100644 datafusion/common/src/file_options/parquet_writer/mod.rs rename datafusion/common/src/file_options/{parquet_writer.rs => parquet_writer/parquet.rs} (99%) create mode 100644 datafusion/common/src/file_options/parquet_writer/parquet_stub.rs create mode 100644 datafusion/core/src/datasource/file_format/parquet/mod.rs rename datafusion/core/src/datasource/file_format/{ => parquet}/parquet.rs (99%) create mode 100644 datafusion/core/src/datasource/file_format/parquet/parquet_stub.rs diff --git a/datafusion/common/src/file_options/file_type.rs b/datafusion/common/src/file_options/file_type.rs index a07f2e0cb847b..c83da387c25d9 100644 --- a/datafusion/common/src/file_options/file_type.rs +++ b/datafusion/common/src/file_options/file_type.rs @@ -48,7 +48,6 @@ pub enum FileType { /// Apache Avro file AVRO, /// Apache Parquet file - #[cfg(feature = "parquet")] PARQUET, /// CSV file CSV, @@ -61,7 +60,6 @@ impl GetExt for FileType { match self { FileType::ARROW => DEFAULT_ARROW_EXTENSION.to_owned(), FileType::AVRO => DEFAULT_AVRO_EXTENSION.to_owned(), - #[cfg(feature = "parquet")] FileType::PARQUET => DEFAULT_PARQUET_EXTENSION.to_owned(), FileType::CSV => DEFAULT_CSV_EXTENSION.to_owned(), FileType::JSON => DEFAULT_JSON_EXTENSION.to_owned(), @@ -74,7 +72,6 @@ impl Display for FileType { let out = match self { FileType::CSV => "csv", FileType::JSON => "json", - #[cfg(feature = "parquet")] FileType::PARQUET => "parquet", FileType::AVRO => "avro", FileType::ARROW => "arrow", @@ -91,7 +88,6 @@ impl FromStr for FileType { match s.as_str() { "ARROW" => Ok(FileType::ARROW), "AVRO" => Ok(FileType::AVRO), - #[cfg(feature = "parquet")] "PARQUET" => Ok(FileType::PARQUET), "CSV" => Ok(FileType::CSV), "JSON" | "NDJSON" => Ok(FileType::JSON), diff --git a/datafusion/common/src/file_options/mod.rs b/datafusion/common/src/file_options/mod.rs index b7c1341e30460..c6a5660237a01 100644 --- a/datafusion/common/src/file_options/mod.rs +++ b/datafusion/common/src/file_options/mod.rs @@ -22,7 +22,6 @@ pub mod avro_writer; pub mod csv_writer; pub mod file_type; pub mod json_writer; -#[cfg(feature = "parquet")] pub mod parquet_writer; pub(crate) mod parse_utils; @@ -38,14 +37,13 @@ use crate::{ DataFusionError, FileType, Result, }; -#[cfg(feature = "parquet")] -use self::parquet_writer::ParquetWriterOptions; - use self::{ arrow_writer::ArrowWriterOptions, avro_writer::AvroWriterOptions, csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions, }; +use self::parquet_writer::ParquetWriterOptions; + /// Represents a single arbitrary setting in a /// [StatementOptions] where OptionTuple.0 determines /// the specific setting to be modified and OptionTuple.1 @@ -148,7 +146,6 @@ impl StatementOptions { /// plus any DataFusion specific writing options (e.g. CSV compression) #[derive(Clone, Debug)] pub enum FileTypeWriterOptions { - #[cfg(feature = "parquet")] Parquet(ParquetWriterOptions), CSV(CsvWriterOptions), JSON(JsonWriterOptions), @@ -168,7 +165,6 @@ impl FileTypeWriterOptions { let options = (config_defaults, statement_options); let file_type_write_options = match file_type { - #[cfg(feature = "parquet")] FileType::PARQUET => { FileTypeWriterOptions::Parquet(ParquetWriterOptions::try_from(options)?) } @@ -198,7 +194,6 @@ impl FileTypeWriterOptions { let options = (config_defaults, &empty_statement); let file_type_write_options = match file_type { - #[cfg(feature = "parquet")] FileType::PARQUET => { FileTypeWriterOptions::Parquet(ParquetWriterOptions::try_from(options)?) } @@ -221,7 +216,6 @@ impl FileTypeWriterOptions { /// Tries to extract ParquetWriterOptions from this FileTypeWriterOptions enum. /// Returns an error if a different type from parquet is set. - #[cfg(feature = "parquet")] pub fn try_into_parquet(&self) -> Result<&ParquetWriterOptions> { match self { FileTypeWriterOptions::Parquet(opt) => Ok(opt), @@ -288,7 +282,6 @@ impl Display for FileTypeWriterOptions { FileTypeWriterOptions::Avro(_) => "AvroWriterOptions", FileTypeWriterOptions::CSV(_) => "CsvWriterOptions", FileTypeWriterOptions::JSON(_) => "JsonWriterOptions", - #[cfg(feature = "parquet")] FileTypeWriterOptions::Parquet(_) => "ParquetWriterOptions", }; write!(f, "{}", name) diff --git a/datafusion/common/src/file_options/parquet_writer/mod.rs b/datafusion/common/src/file_options/parquet_writer/mod.rs new file mode 100644 index 0000000000000..63d4d9828b1b2 --- /dev/null +++ b/datafusion/common/src/file_options/parquet_writer/mod.rs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[cfg(feature = "parquet")] +mod parquet; + +#[cfg(feature = "parquet")] +pub use parquet::*; + +#[cfg(not(feature = "parquet"))] +mod parquet_stub; + +#[cfg(not(feature = "parquet"))] +pub use parquet_stub::*; diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer/parquet.rs similarity index 99% rename from datafusion/common/src/file_options/parquet_writer.rs rename to datafusion/common/src/file_options/parquet_writer/parquet.rs index 80fa023587eef..b28d3972f438d 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer/parquet.rs @@ -21,8 +21,7 @@ use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder}; use crate::{config::ConfigOptions, DataFusionError, Result}; -use super::StatementOptions; - +use crate::file_options::StatementOptions; use parquet::{ basic::{BrotliLevel, GzipLevel, ZstdLevel}, file::properties::{EnabledStatistics, WriterVersion}, diff --git a/datafusion/common/src/file_options/parquet_writer/parquet_stub.rs b/datafusion/common/src/file_options/parquet_writer/parquet_stub.rs new file mode 100644 index 0000000000000..f061cf2aad089 --- /dev/null +++ b/datafusion/common/src/file_options/parquet_writer/parquet_stub.rs @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::config::ConfigOptions; +use crate::error::_not_impl_err; +use crate::file_options::StatementOptions; +use crate::{DataFusionError, Result}; + +/// Stub implementation of `ParquetWriterOptions` that always returns a +/// NotYetImplemented error used when parquet feature is not activated. +#[derive(Clone, Debug)] +pub struct ParquetWriterOptions {} + +impl TryFrom<(&ConfigOptions, &StatementOptions)> for ParquetWriterOptions { + type Error = DataFusionError; + + fn try_from(_: (&ConfigOptions, &StatementOptions)) -> Result { + _not_impl_err!( + "Parquet support is not enabled. Hint enable the `parquet` feature flag" + ) + } +} diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs index 9a44337821570..60f1df7fd11ac 100644 --- a/datafusion/common/src/test_util.rs +++ b/datafusion/common/src/test_util.rs @@ -180,7 +180,6 @@ pub fn arrow_test_data() -> String { /// let filename = format!("{}/binary.parquet", testdata); /// assert!(std::path::PathBuf::from(filename).exists()); /// ``` -#[cfg(feature = "parquet")] pub fn parquet_test_data() -> String { match get_data_dir("PARQUET_TEST_DATA", "../../parquet-testing/data") { Ok(pb) => pb.display().to_string(), diff --git a/datafusion/core/src/datasource/file_format/file_compression_type.rs b/datafusion/core/src/datasource/file_format/file_compression_type.rs index 3dac7c293050c..d116418a7b89c 100644 --- a/datafusion/core/src/datasource/file_format/file_compression_type.rs +++ b/datafusion/core/src/datasource/file_format/file_compression_type.rs @@ -243,7 +243,6 @@ impl FileTypeExt for FileType { "FileCompressionType can be specified for CSV/JSON FileType.".into(), )), }, - #[cfg(feature = "parquet")] FileType::PARQUET => match c.variant { UNCOMPRESSED => Ok(ext), _ => Err(DataFusionError::Internal( @@ -285,7 +284,6 @@ mod tests { let mut ty_ext_tuple = vec![]; ty_ext_tuple.push((FileType::AVRO, ".avro")); - #[cfg(feature = "parquet")] ty_ext_tuple.push((FileType::PARQUET, ".parquet")); // Cannot specify compression for these file types diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs index b541e2a1d44c1..04bd9ddeb6aa6 100644 --- a/datafusion/core/src/datasource/file_format/mod.rs +++ b/datafusion/core/src/datasource/file_format/mod.rs @@ -27,8 +27,8 @@ pub mod csv; pub mod file_compression_type; pub mod json; pub mod options; -#[cfg(feature = "parquet")] pub mod parquet; + pub mod write; use std::any::Any; diff --git a/datafusion/core/src/datasource/file_format/options.rs b/datafusion/core/src/datasource/file_format/options.rs index 41a70e6d2f8fb..ee1ab7377b114 100644 --- a/datafusion/core/src/datasource/file_format/options.rs +++ b/datafusion/core/src/datasource/file_format/options.rs @@ -25,7 +25,6 @@ use datafusion_common::{plan_err, DataFusionError}; use crate::datasource::file_format::arrow::ArrowFormat; use crate::datasource::file_format::file_compression_type::FileCompressionType; -#[cfg(feature = "parquet")] use crate::datasource::file_format::parquet::ParquetFormat; use crate::datasource::file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD; use crate::datasource::listing::{ListingTableInsertMode, ListingTableUrl}; diff --git a/datafusion/core/src/datasource/file_format/parquet/mod.rs b/datafusion/core/src/datasource/file_format/parquet/mod.rs new file mode 100644 index 0000000000000..644fcf8d81aa4 --- /dev/null +++ b/datafusion/core/src/datasource/file_format/parquet/mod.rs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! FileFormat for parquet + +/// If parquet is enabled, use actual implementation +#[cfg(feature = "parquet")] +mod parquet; +#[cfg(feature = "parquet")] +pub use parquet::*; + +/// If parquet is not enabled, use dummy implementation +#[cfg(not(feature = "parquet"))] +mod parquet_stub; +#[cfg(not(feature = "parquet"))] +pub use parquet_stub::ParquetFormat; diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet/parquet.rs similarity index 99% rename from datafusion/core/src/datasource/file_format/parquet.rs rename to datafusion/core/src/datasource/file_format/parquet/parquet.rs index 62867c0e2b38d..982b87c58e009 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet/parquet.rs @@ -54,18 +54,21 @@ use parquet::file::metadata::ParquetMetaData; use parquet::file::properties::WriterProperties; use parquet::file::statistics::Statistics as ParquetStatistics; -use super::write::demux::start_demuxer_task; -use super::write::{create_writer, AbortableWrite, FileWriterMode}; -use super::{FileFormat, FileScanConfig}; use crate::arrow::array::{ BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, }; use crate::arrow::datatypes::DataType; use crate::config::ConfigOptions; +use crate::datasource::file_format::write::demux::start_demuxer_task; +use crate::datasource::file_format::write::{ + create_writer, AbortableWrite, FileWriterMode, +}; +use crate::datasource::file_format::FileFormat; use crate::datasource::get_col_stats; use crate::datasource::physical_plan::{ - FileGroupDisplay, FileMeta, FileSinkConfig, ParquetExec, SchemaAdapter, + FileGroupDisplay, FileMeta, FileScanConfig, FileSinkConfig, ParquetExec, + SchemaAdapter, }; use crate::error::Result; @@ -1206,7 +1209,6 @@ pub(crate) mod test_util { #[cfg(test)] mod tests { - use super::super::test_util::scan_format; use crate::physical_plan::collect; use std::fmt::{Display, Formatter}; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -1214,6 +1216,7 @@ mod tests { use super::*; use crate::datasource::file_format::parquet::test_util::store_parquet; + use crate::datasource::file_format::test_util::scan_format; use crate::physical_plan::metrics::MetricValue; use crate::prelude::{SessionConfig, SessionContext}; use arrow::array::{Array, ArrayRef, StringArray}; diff --git a/datafusion/core/src/datasource/file_format/parquet/parquet_stub.rs b/datafusion/core/src/datasource/file_format/parquet/parquet_stub.rs new file mode 100644 index 0000000000000..78312ac2e24c1 --- /dev/null +++ b/datafusion/core/src/datasource/file_format/parquet/parquet_stub.rs @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datasource::file_format::FileFormat; +use crate::datasource::physical_plan::FileScanConfig; +use crate::execution::context::SessionState; +use arrow_schema::SchemaRef; +use async_trait::async_trait; +use datafusion_common::{ + not_impl_datafusion_err, DataFusionError, FileType, Result, Statistics, +}; +use datafusion_physical_expr::PhysicalExpr; +use datafusion_physical_plan::ExecutionPlan; +use object_store::{ObjectMeta, ObjectStore}; +use std::any::Any; +use std::sync::Arc; + +/// Stub implementation of `ParquetFormat` that always returns a NotYetImplemented error. +#[derive(Debug, Default)] +pub struct ParquetFormat; + +impl ParquetFormat { + /// Create a new instance of the Parquet format + pub fn new() -> Self { + Self + } +} + +fn nyi_error() -> DataFusionError { + not_impl_datafusion_err!( + "Parquet support is not enabled. Hint enable the `parquet` feature flag" + ) +} + +#[async_trait] +impl FileFormat for ParquetFormat { + fn as_any(&self) -> &dyn Any { + self + } + + async fn infer_schema( + &self, + _: &SessionState, + _: &Arc, + _: &[ObjectMeta], + ) -> Result { + Err(nyi_error()) + } + + async fn infer_stats( + &self, + _: &SessionState, + _: &Arc, + _: SchemaRef, + _: &ObjectMeta, + ) -> Result { + Err(nyi_error()) + } + + async fn create_physical_plan( + &self, + _: &SessionState, + _: FileScanConfig, + _: Option<&Arc>, + ) -> Result> { + Err(nyi_error()) + } + + fn file_type(&self) -> FileType { + FileType::PARQUET + } +} diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 822a78a5522a6..197a8bf826f2f 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -23,7 +23,6 @@ use std::{any::Any, sync::Arc}; use super::helpers::{expr_applicable_for_cols, pruned_partition_list, split_files}; use super::PartitionedFile; -#[cfg(feature = "parquet")] use crate::datasource::file_format::parquet::ParquetFormat; use crate::datasource::{ file_format::{ @@ -151,7 +150,6 @@ impl ListingTableConfig { FileType::JSON => Arc::new( JsonFormat::default().with_file_compression_type(file_compression_type), ), - #[cfg(feature = "parquet")] FileType::PARQUET => Arc::new(ParquetFormat::default()), }; @@ -1021,7 +1019,6 @@ mod tests { use std::fs::File; use super::*; - #[cfg(feature = "parquet")] use crate::datasource::file_format::parquet::ParquetFormat; use crate::datasource::{provider_as_source, MemTable}; use crate::execution::options::ArrowReadOptions; diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index 26f40518979a2..3d9a509cedba2 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -23,7 +23,6 @@ use std::sync::Arc; use super::listing::ListingTableInsertMode; -#[cfg(feature = "parquet")] use crate::datasource::file_format::parquet::ParquetFormat; use crate::datasource::file_format::{ arrow::ArrowFormat, avro::AvroFormat, csv::CsvFormat, @@ -80,7 +79,6 @@ impl TableProviderFactory for ListingTableFactory { .with_delimiter(cmd.delimiter as u8) .with_file_compression_type(file_compression_type), ), - #[cfg(feature = "parquet")] FileType::PARQUET => Arc::new(ParquetFormat::default()), FileType::AVRO => Arc::new(AvroFormat), FileType::JSON => Arc::new( @@ -159,7 +157,6 @@ impl TableProviderFactory for ListingTableFactory { Some(mode) => ListingTableInsertMode::from_str(mode.as_str()), None => match file_type { FileType::CSV => Ok(ListingTableInsertMode::AppendToFile), - #[cfg(feature = "parquet")] FileType::PARQUET => Ok(ListingTableInsertMode::AppendNewFiles), FileType::AVRO => Ok(ListingTableInsertMode::AppendNewFiles), FileType::JSON => Ok(ListingTableInsertMode::AppendToFile), @@ -199,7 +196,6 @@ impl TableProviderFactory for ListingTableFactory { json_writer_options.compression = cmd.file_compression_type; FileTypeWriterOptions::JSON(json_writer_options) } - #[cfg(feature = "parquet")] FileType::PARQUET => file_type_writer_options, FileType::ARROW => file_type_writer_options, FileType::AVRO => file_type_writer_options, diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs index 3ace2c239852d..455818056f2c0 100644 --- a/datafusion/core/src/datasource/mod.rs +++ b/datafusion/core/src/datasource/mod.rs @@ -42,6 +42,5 @@ pub use self::memory::MemTable; pub use self::provider::TableProvider; pub use self::view::ViewTable; pub use crate::logical_expr::TableType; -#[cfg(feature = "parquet")] pub(crate) use statistics::get_col_stats; pub use statistics::get_statistics_with_limit; diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index d523c39ee01e4..a180597d33fb1 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1267,7 +1267,6 @@ impl SessionState { // Create table_factories for all default formats let mut table_factories: HashMap> = HashMap::new(); - #[cfg(feature = "parquet")] table_factories.insert("PARQUET".into(), Arc::new(ListingTableFactory::new())); table_factories.insert("CSV".into(), Arc::new(ListingTableFactory::new())); table_factories.insert("JSON".into(), Arc::new(ListingTableFactory::new())); diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index f941e88f3a36d..419f62cff6647 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -25,7 +25,6 @@ use crate::datasource::file_format::arrow::ArrowFormat; use crate::datasource::file_format::avro::AvroFormat; use crate::datasource::file_format::csv::CsvFormat; use crate::datasource::file_format::json::JsonFormat; -#[cfg(feature = "parquet")] use crate::datasource::file_format::parquet::ParquetFormat; use crate::datasource::file_format::write::FileWriterMode; use crate::datasource::file_format::FileFormat; @@ -600,7 +599,6 @@ impl DefaultPhysicalPlanner { let sink_format: Arc = match file_format { FileType::CSV => Arc::new(CsvFormat::default()), - #[cfg(feature = "parquet")] FileType::PARQUET => Arc::new(ParquetFormat::default()), FileType::JSON => Arc::new(JsonFormat::default()), FileType::AVRO => Arc::new(AvroFormat {} ), diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs index c6b43de0c18d5..34c1a566fd7cb 100644 --- a/datafusion/core/src/test_util/mod.rs +++ b/datafusion/core/src/test_util/mod.rs @@ -54,7 +54,6 @@ use async_trait::async_trait; use futures::Stream; // backwards compatibility -#[cfg(feature = "parquet")] pub use datafusion_common::test_util::parquet_test_data; pub use datafusion_common::test_util::{arrow_test_data, get_data_dir}; diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index e426c598523e3..898b83e9f5323 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -31,7 +31,6 @@ use crate::{ }; use arrow::datatypes::{DataType, Schema, SchemaRef}; -#[cfg(feature = "parquet")] use datafusion::datasource::file_format::parquet::ParquetFormat; use datafusion::{ datasource::{ @@ -336,7 +335,6 @@ impl AsLogicalPlan for LogicalPlanNode { "logical_plan::from_proto() Unsupported file format '{self:?}'" )) })? { - #[cfg(feature = "parquet")] &FileFormatType::Parquet(protobuf::ParquetFormat {}) => { Arc::new(ParquetFormat::default()) } @@ -850,49 +848,28 @@ impl AsLogicalPlan for LogicalPlanNode { if let Some(listing_table) = source.downcast_ref::() { let any = listing_table.options().format.as_any(); - let file_format_type = { - let mut maybe_some_type = None; - - #[cfg(feature = "parquet")] - if any.is::() { - maybe_some_type = - Some(FileFormatType::Parquet(protobuf::ParquetFormat {})) - }; - - if let Some(csv) = any.downcast_ref::() { - maybe_some_type = - Some(FileFormatType::Csv(protobuf::CsvFormat { - delimiter: byte_to_string( - csv.delimiter(), - "delimiter", - )?, - has_header: csv.has_header(), - quote: byte_to_string(csv.quote(), "quote")?, - optional_escape: if let Some(escape) = csv.escape() { - Some( - protobuf::csv_format::OptionalEscape::Escape( - byte_to_string(escape, "escape")?, - ), - ) - } else { - None - }, - })) - } - - if any.is::() { - maybe_some_type = - Some(FileFormatType::Avro(protobuf::AvroFormat {})) - } - - if let Some(file_format_type) = maybe_some_type { - file_format_type - } else { - return Err(proto_error(format!( + let file_format_type = if any.is::() { + FileFormatType::Parquet(protobuf::ParquetFormat {}) + } else if let Some(csv) = any.downcast_ref::() { + FileFormatType::Csv(protobuf::CsvFormat { + delimiter: byte_to_string(csv.delimiter(), "delimiter")?, + has_header: csv.has_header(), + quote: byte_to_string(csv.quote(), "quote")?, + optional_escape: if let Some(escape) = csv.escape() { + Some(protobuf::csv_format::OptionalEscape::Escape( + byte_to_string(escape, "escape")?, + )) + } else { + None + }, + }) + } else if any.is::() { + FileFormatType::Avro(protobuf::AvroFormat {}) + } else { + return Err(proto_error(format!( "Error converting file format, {:?} is invalid as a datafusion format.", listing_table.options().format ))); - } }; let options = listing_table.options(); From 73636c7714d9742a056412d067fdda529188a529 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 26 Oct 2023 07:28:19 -0400 Subject: [PATCH 2/3] clippy --- datafusion/core/src/datasource/file_format/parquet/mod.rs | 4 ++-- .../file_format/parquet/{parquet.rs => parquet_impl.rs} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename datafusion/core/src/datasource/file_format/parquet/{parquet.rs => parquet_impl.rs} (100%) diff --git a/datafusion/core/src/datasource/file_format/parquet/mod.rs b/datafusion/core/src/datasource/file_format/parquet/mod.rs index 644fcf8d81aa4..6f693df8eda42 100644 --- a/datafusion/core/src/datasource/file_format/parquet/mod.rs +++ b/datafusion/core/src/datasource/file_format/parquet/mod.rs @@ -19,9 +19,9 @@ /// If parquet is enabled, use actual implementation #[cfg(feature = "parquet")] -mod parquet; +mod parquet_impl; #[cfg(feature = "parquet")] -pub use parquet::*; +pub use parquet_impl::*; /// If parquet is not enabled, use dummy implementation #[cfg(not(feature = "parquet"))] diff --git a/datafusion/core/src/datasource/file_format/parquet/parquet.rs b/datafusion/core/src/datasource/file_format/parquet/parquet_impl.rs similarity index 100% rename from datafusion/core/src/datasource/file_format/parquet/parquet.rs rename to datafusion/core/src/datasource/file_format/parquet/parquet_impl.rs From 63fbbc6ed92e1bb68970cc6a96ac632f13252df8 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 26 Oct 2023 09:29:55 -0400 Subject: [PATCH 3/3] fix msrv check --- datafusion/common/src/file_options/parquet_writer/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/file_options/parquet_writer/mod.rs b/datafusion/common/src/file_options/parquet_writer/mod.rs index 63d4d9828b1b2..11a2c8c84e040 100644 --- a/datafusion/common/src/file_options/parquet_writer/mod.rs +++ b/datafusion/common/src/file_options/parquet_writer/mod.rs @@ -19,7 +19,7 @@ mod parquet; #[cfg(feature = "parquet")] -pub use parquet::*; +pub use self::parquet::*; #[cfg(not(feature = "parquet"))] mod parquet_stub;