Skip to content

Commit

Permalink
fix?
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas-k-cameron committed Jun 28, 2023
1 parent 2d0f9bb commit 1cf246b
Showing 1 changed file with 14 additions and 14 deletions.
28 changes: 14 additions & 14 deletions datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ impl FileFormat for ParquetFormat {
stack.sort_by(|(_, a), (_, b)| a.location.cmp(&b.location));
stack.into_iter().map(|(a, _)| a).collect()
};


let schema = if self.skip_metadata(state.config_options()) {
Schema::try_merge(clear_metadata(schemas))
Expand Down Expand Up @@ -553,15 +552,11 @@ async fn fetch_statistics(

#[cfg(test)]
pub(crate) mod test_util {
use std::path::PathBuf;
use std::str::FromStr;

use super::*;
use crate::test::object_store::local_unpartitioned_file;
use arrow::record_batch::RecordBatch;
use parquet::arrow::ArrowWriter;
use parquet::file::properties::WriterProperties;
use tempfile::NamedTempFile;

/// How many rows per page should be written
const ROWS_PER_PAGE: usize = 2;
Expand All @@ -574,14 +569,17 @@ pub(crate) mod test_util {
pub async fn store_parquet(
batches: Vec<RecordBatch>,
multi_page: bool,
) -> Result<(Vec<ObjectMeta>, Vec<NamedTempFile>)> {
) -> Result<(Vec<ObjectMeta>, Vec<std::fs::File>)> {
// Each batch writes to their own file
let pathbuf = PathBuf::from_str("/tmp/").unwrap();
let mut idx = 0;
let tmpdir = tempfile::tempdir().unwrap();
let files: Vec<_> = batches
.into_iter()
.map(|batch| {
let mut output = NamedTempFile::new_in(pathbuf.clone()).expect("creating temp file");

idx += 1;
let path = tmpdir.path().join(idx.to_string());
let mut output = std::fs::File::open(&path).unwrap();

let builder = WriterProperties::builder();
let props = if multi_page {
builder.set_data_page_row_count_limit(ROWS_PER_PAGE)
Expand All @@ -602,12 +600,15 @@ pub(crate) mod test_util {
writer.write(&batch).expect("Writing batch");
};
writer.close().unwrap();
output
(path, output)
})
.collect();

let meta: Vec<_> = files.iter().map(local_unpartitioned_file).collect();
Ok((meta, files))
let meta: Vec<_> = files
.iter()
.map(|(path, _)| local_unpartitioned_file(path))
.collect();
Ok((meta, files.into_iter().map(|(_, f)| f).collect()))
}

//// write batches chunk_size rows at a time
Expand Down Expand Up @@ -1263,8 +1264,7 @@ mod tests {
// https://github.com/apache/arrow-datafusion/issues/5779
let fields1 = ["description", "code", "year"];
let fields2 = ["year", "description", "code"];
let batch1 =
RecordBatch::try_from_iter(func(fields1)).unwrap();
let batch1 = RecordBatch::try_from_iter(func(fields1)).unwrap();
let batch2 = RecordBatch::try_from_iter(func(fields2)).unwrap();

let store = Arc::new(RequestCountingObjectStore::new(Arc::new(
Expand Down

0 comments on commit 1cf246b

Please sign in to comment.