From 1cf246b750d2583a3eab5eedfd8fbcd948044890 Mon Sep 17 00:00:00 2001 From: Thomas Cameron Date: Wed, 28 Jun 2023 17:22:51 +0000 Subject: [PATCH] fix? --- .../src/datasource/file_format/parquet.rs | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index dbdf448646623..7094365dc0a8b 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -168,7 +168,6 @@ impl FileFormat for ParquetFormat { stack.sort_by(|(_, a), (_, b)| a.location.cmp(&b.location)); stack.into_iter().map(|(a, _)| a).collect() }; - let schema = if self.skip_metadata(state.config_options()) { Schema::try_merge(clear_metadata(schemas)) @@ -553,15 +552,11 @@ async fn fetch_statistics( #[cfg(test)] pub(crate) mod test_util { - use std::path::PathBuf; - use std::str::FromStr; - use super::*; use crate::test::object_store::local_unpartitioned_file; use arrow::record_batch::RecordBatch; use parquet::arrow::ArrowWriter; use parquet::file::properties::WriterProperties; - use tempfile::NamedTempFile; /// How many rows per page should be written const ROWS_PER_PAGE: usize = 2; @@ -574,14 +569,17 @@ pub(crate) mod test_util { pub async fn store_parquet( batches: Vec, multi_page: bool, - ) -> Result<(Vec, Vec)> { + ) -> Result<(Vec, Vec)> { // Each batch writes to their own file - let pathbuf = PathBuf::from_str("/tmp/").unwrap(); + let mut idx = 0; + let tmpdir = tempfile::tempdir().unwrap(); let files: Vec<_> = batches .into_iter() .map(|batch| { - let mut output = NamedTempFile::new_in(pathbuf.clone()).expect("creating temp file"); - + idx += 1; + let path = tmpdir.path().join(idx.to_string()); + let mut output = std::fs::File::open(&path).unwrap(); + let builder = WriterProperties::builder(); let props = if multi_page { builder.set_data_page_row_count_limit(ROWS_PER_PAGE) @@ -602,12 +600,15 @@ pub(crate) mod test_util { writer.write(&batch).expect("Writing batch"); }; writer.close().unwrap(); - output + (path, output) }) .collect(); - let meta: Vec<_> = files.iter().map(local_unpartitioned_file).collect(); - Ok((meta, files)) + let meta: Vec<_> = files + .iter() + .map(|(path, _)| local_unpartitioned_file(path)) + .collect(); + Ok((meta, files.into_iter().map(|(_, f)| f).collect())) } //// write batches chunk_size rows at a time @@ -1263,8 +1264,7 @@ mod tests { // https://github.com/apache/arrow-datafusion/issues/5779 let fields1 = ["description", "code", "year"]; let fields2 = ["year", "description", "code"]; - let batch1 = - RecordBatch::try_from_iter(func(fields1)).unwrap(); + let batch1 = RecordBatch::try_from_iter(func(fields1)).unwrap(); let batch2 = RecordBatch::try_from_iter(func(fields2)).unwrap(); let store = Arc::new(RequestCountingObjectStore::new(Arc::new(