diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 875c58ae447e9..4aadf066ea20c 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -46,6 +46,7 @@ use crate::datasource::physical_plan::{ParquetExec, SchemaAdapter}; use crate::datasource::{create_max_min_accs, get_col_stats}; use crate::error::Result; use crate::execution::context::SessionState; +use crate::merge_and_sort_schema; use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator}; use crate::physical_plan::{Accumulator, ExecutionPlan, Statistics}; @@ -163,11 +164,10 @@ impl FileFormat for ParquetFormat { .await?; let schema = if self.skip_metadata(state.config_options()) { - Schema::try_merge(clear_metadata(schemas)) + merge_and_sort_schema(clear_metadata(schemas))? } else { - Schema::try_merge(schemas) - }?; - + merge_and_sort_schema(schemas.into_iter())? + }; Ok(Arc::new(schema)) } diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index 3e58923c3aad4..005d262bc0469 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -426,6 +426,8 @@ pub mod variable; // re-export dependencies from arrow-rs to minimise version maintenance for crate users pub use arrow; +use arrow::error::Result; +use arrow_schema::{Schema, SchemaBuilder}; pub use parquet; // re-export DataFusion crates @@ -449,3 +451,16 @@ doc_comment::doctest!( "../../../docs/source/user-guide/example-usage.md", user_guid_example_tests ); + +pub(crate) fn merge_and_sort_schema( + schemas: impl Iterator, +) -> Result { + let s = Schema::try_merge(schemas)?; + + let mut fields = s.all_fields(); + fields.sort_by(|a, b| a.name().cmp(b.name())); + + let mut b = SchemaBuilder::new(); + b.extend(fields.into_iter().map(|a| a.clone())); + Ok(b.finish().with_metadata(s.metadata)) +}