From 81ab424913daad775ce63be62f7b57b145313267 Mon Sep 17 00:00:00 2001 From: Jorge Leitao Date: Wed, 29 Jun 2022 08:57:55 -0700 Subject: [PATCH] Improved read performance (#1124) --- src/error.rs | 6 ++++++ src/io/parquet/read/row_group.rs | 19 +++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/error.rs b/src/error.rs index 3bf4d8b6501..52a8cd9062b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -62,6 +62,12 @@ impl From for Error { } } +impl From for Error { + fn from(_: std::collections::TryReserveError) -> Error { + Error::Overflow + } +} + impl Display for Error { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { diff --git a/src/io/parquet/read/row_group.rs b/src/io/parquet/read/row_group.rs index 5d9198e6f45..deefaea3644 100644 --- a/src/io/parquet/read/row_group.rs +++ b/src/io/parquet/read/row_group.rs @@ -120,10 +120,15 @@ fn _read_single_column<'a, R>( where R: Read + Seek, { - let (start, len) = meta.byte_range(); + let (start, length) = meta.byte_range(); reader.seek(std::io::SeekFrom::Start(start))?; - let mut chunk = vec![0; len as usize]; - reader.read_exact(&mut chunk)?; + + let mut chunk = vec![]; + chunk.try_reserve(length as usize)?; + reader + .by_ref() + .take(length as u64) + .read_to_end(&mut chunk)?; Ok((meta, chunk)) } @@ -136,10 +141,12 @@ where F: Fn() -> BoxFuture<'b, std::io::Result>, { let mut reader = factory().await?; - let (start, len) = meta.byte_range(); + let (start, length) = meta.byte_range(); reader.seek(std::io::SeekFrom::Start(start)).await?; - let mut chunk = vec![0; len as usize]; - reader.read_exact(&mut chunk).await?; + + let mut chunk = vec![]; + chunk.try_reserve(length as usize)?; + reader.take(length as u64).read_to_end(&mut chunk).await?; Result::Ok((meta, chunk)) }