From 39a872f1866a6ccf180c369c3e81581675a3ce1d Mon Sep 17 00:00:00 2001 From: Dan Harris Date: Sat, 16 Jul 2022 09:40:10 -0400 Subject: [PATCH] PR comments --- arrow-flight/src/arrow.flight.protocol.rs | 42 +++++++++++------------ arrow/Cargo.toml | 2 +- parquet/Cargo.toml | 2 +- parquet/src/data_type.rs | 7 ++-- parquet/src/encodings/rle.rs | 2 +- parquet/src/util/bit_util.rs | 9 ----- 6 files changed, 27 insertions(+), 37 deletions(-) diff --git a/arrow-flight/src/arrow.flight.protocol.rs b/arrow-flight/src/arrow.flight.protocol.rs index f56dd0dd8869..c76469b39ce7 100644 --- a/arrow-flight/src/arrow.flight.protocol.rs +++ b/arrow-flight/src/arrow.flight.protocol.rs @@ -279,9 +279,9 @@ pub mod flight_service_client { &mut self, request: impl tonic::IntoStreamingRequest, ) -> Result< - tonic::Response>, - tonic::Status, - > { + tonic::Response>, + tonic::Status, + > { self.inner .ready() .await @@ -308,9 +308,9 @@ pub mod flight_service_client { &mut self, request: impl tonic::IntoRequest, ) -> Result< - tonic::Response>, - tonic::Status, - > { + tonic::Response>, + tonic::Status, + > { self.inner .ready() .await @@ -389,9 +389,9 @@ pub mod flight_service_client { &mut self, request: impl tonic::IntoRequest, ) -> Result< - tonic::Response>, - tonic::Status, - > { + tonic::Response>, + tonic::Status, + > { self.inner .ready() .await @@ -418,9 +418,9 @@ pub mod flight_service_client { &mut self, request: impl tonic::IntoStreamingRequest, ) -> Result< - tonic::Response>, - tonic::Status, - > { + tonic::Response>, + tonic::Status, + > { self.inner .ready() .await @@ -446,9 +446,9 @@ pub mod flight_service_client { &mut self, request: impl tonic::IntoStreamingRequest, ) -> Result< - tonic::Response>, - tonic::Status, - > { + tonic::Response>, + tonic::Status, + > { self.inner .ready() .await @@ -475,9 +475,9 @@ pub mod flight_service_client { &mut self, request: impl tonic::IntoRequest, ) -> Result< - tonic::Response>, - tonic::Status, - > { + tonic::Response>, + tonic::Status, + > { self.inner .ready() .await @@ -501,9 +501,9 @@ pub mod flight_service_client { &mut self, request: impl tonic::IntoRequest, ) -> Result< - tonic::Response>, - tonic::Status, - > { + tonic::Response>, + tonic::Status, + > { self.inner .ready() .await diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 17ebde52b25a..7b3d4c64ad71 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -41,7 +41,7 @@ bench = false ahash = { version = "0.7", default-features = false } serde = { version = "1.0", default-features = false } serde_derive = { version = "1.0", default-features = false } -serde_json = { version = "1.0", default-features = false, features = ["preserve_order","std"] } +serde_json = { version = "1.0", default-features = false, features = ["preserve_order"] } indexmap = { version = "1.9", default-features = false, features = ["std"] } rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true } num = { version = "0.4", default-features = false, features = ["std"] } diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 697d38c180a5..64819077a744 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -59,7 +59,7 @@ brotli = { version = "3.3", default-features = false, features = [ "std" ] } flate2 = { version = "1.0", default-features = false, features = [ "rust_backend" ] } lz4 = { version = "1.23", default-features = false } zstd = { version = "0.11", default-features = false } -serde_json = { version = "1.0", default-features = false, features = ["preserve_order","std"] } +serde_json = { version = "1.0", default-features = false, features = ["preserve_order"] } arrow = { path = "../arrow", version = "18.0.0", default-features = false, features = ["ipc", "test_utils", "prettyprint"] } [package.metadata.docs.rs] diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index 2364d58ee2e7..7b6fb04a74bb 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -695,8 +695,7 @@ pub(crate) mod private { fn skip(decoder: &mut PlainDecoderDetails, num_values: usize) -> Result { let bit_reader = decoder.bit_reader.as_mut().unwrap(); let num_values = std::cmp::min(num_values, decoder.num_values); - let mut buffer = vec![false; num_values]; - let values_read = bit_reader.get_batch(&mut buffer[..num_values], 1); + let values_read = bit_reader.skip(num_values, 1); decoder.num_values -= values_read; Ok(values_read) } @@ -778,7 +777,7 @@ pub(crate) mod private { #[inline] fn skip(decoder: &mut PlainDecoderDetails, num_values: usize) -> Result { let data = decoder.data.as_ref().expect("set_data should have been called"); - let num_values = std::cmp::min(num_values, decoder.num_values); + let num_values = num_values.min(decoder.num_values); let bytes_left = data.len() - decoder.start; let bytes_to_skip = std::mem::size_of::() * num_values; @@ -987,7 +986,7 @@ pub(crate) mod private { .data .as_mut() .expect("set_data should have been called"); - let num_values = std::cmp::min(num_values, decoder.num_values); + let num_values = num_values.min(decoder.num_values); for _ in 0..num_values { let len: usize = diff --git a/parquet/src/encodings/rle.rs b/parquet/src/encodings/rle.rs index 975d5c0accb9..808c8f0d49a6 100644 --- a/parquet/src/encodings/rle.rs +++ b/parquet/src/encodings/rle.rs @@ -751,7 +751,7 @@ mod tests { "eee", "fff", "ddd", "eee", "fff", "eee", "fff", "fff", ]; - let skipped = decoder.skip(4).expect("skipping two values"); + let skipped = decoder.skip(4).expect("skipping four values"); assert_eq!(skipped, 4); let remainder = decoder.get_batch_with_dict::<&str>( dict.as_slice(), diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs index a78ee8d16989..29269c4ad7e2 100644 --- a/parquet/src/util/bit_util.rs +++ b/parquet/src/util/bit_util.rs @@ -642,15 +642,6 @@ impl BitReader { let mut values_skipped = 0; - if num_bits > 32 { - // No fast path - read values individually - while values_skipped < num_values { - self.skip_value(num_bits); - values_skipped += 1; - } - return num_values; - } - // First align bit offset to byte offset if self.bit_offset != 0 { while values_skipped < num_values && self.bit_offset != 0 {