From 094dff13b27ba353262b23dd4bd84f21555ede76 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 26 Apr 2023 13:16:56 +0100 Subject: [PATCH 1/2] Return BooleanBuffer from BooleanBufferBuilder --- arrow-arith/src/arithmetic.rs | 7 +-- arrow-array/src/array/primitive_array.rs | 6 +-- .../src/builder/boolean_buffer_builder.rs | 43 +++++++++++-------- arrow-array/src/builder/boolean_builder.rs | 2 +- .../src/builder/null_buffer_builder.rs | 3 +- arrow-integration-test/Cargo.toml | 2 +- arrow-json/src/reader/list_array.rs | 4 +- arrow-json/src/reader/map_array.rs | 4 +- arrow-json/src/reader/struct_array.rs | 4 +- arrow-row/src/dictionary.rs | 4 +- arrow-select/src/filter.rs | 4 +- arrow-select/src/interleave.rs | 2 +- arrow-select/src/nullif.rs | 2 +- arrow-string/src/like.rs | 2 +- arrow-string/src/regexp.rs | 4 +- parquet/src/arrow/array_reader/list_array.rs | 2 +- .../src/arrow/array_reader/primitive_array.rs | 2 +- .../src/arrow/array_reader/struct_array.rs | 4 +- .../arrow/record_reader/definition_levels.rs | 2 +- 19 files changed, 48 insertions(+), 55 deletions(-) diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs index 2b8a2f3b7db2..3803f69e26f9 100644 --- a/arrow-arith/src/arithmetic.rs +++ b/arrow-arith/src/arithmetic.rs @@ -3575,12 +3575,7 @@ mod tests { null_buffer_builder.resize(13); assert_eq!(null_buffer_builder.len(), 13); - let null_buffer = null_buffer_builder.finish(); - - // `count_set_bits_offset` takes len in bits as parameter. - assert_eq!(null_buffer.count_set_bits_offset(0, 13), 0); - - let nulls = BooleanBuffer::new(null_buffer, 0, 13); + let nulls = null_buffer_builder.finish(); assert_eq!(nulls.count_set_bits(), 0); let nulls = NullBuffer::new(nulls); assert_eq!(nulls.null_count(), 13); diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 9fb78eb1459d..8c8562b5be38 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -25,9 +25,7 @@ use crate::timezone::Tz; use crate::trusted_len::trusted_len_unzip; use crate::types::*; use crate::{Array, ArrayAccessor, ArrayRef}; -use arrow_buffer::{ - i256, ArrowNativeType, BooleanBuffer, Buffer, NullBuffer, ScalarBuffer, -}; +use arrow_buffer::{i256, ArrowNativeType, Buffer, NullBuffer, ScalarBuffer}; use arrow_data::bit_iterator::try_for_each_valid_idx; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{ArrowError, DataType}; @@ -642,7 +640,7 @@ impl PrimitiveArray { Ok::<_, ()>(()) }); - let nulls = BooleanBuffer::new(null_builder.finish(), 0, len); + let nulls = null_builder.finish(); let values = buffer.finish().into(); let nulls = unsafe { NullBuffer::new_unchecked(nulls, out_null_count) }; PrimitiveArray::new(values, Some(nulls)) diff --git a/arrow-array/src/builder/boolean_buffer_builder.rs b/arrow-array/src/builder/boolean_buffer_builder.rs index ac2a96feade0..f721504d08aa 100644 --- a/arrow-array/src/builder/boolean_buffer_builder.rs +++ b/arrow-array/src/builder/boolean_buffer_builder.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow_buffer::{bit_util, Buffer, MutableBuffer}; +use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer}; use arrow_data::bit_mask; use std::ops::Range; @@ -214,12 +214,12 @@ impl BooleanBufferBuilder { self.buffer.as_slice_mut() } - /// Creates a [`Buffer`] + /// Creates a [`BooleanBuffer`] #[inline] - pub fn finish(&mut self) -> Buffer { + pub fn finish(&mut self) -> BooleanBuffer { let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); - self.len = 0; - buf.into() + let len = std::mem::replace(&mut self.len, 0); + BooleanBuffer::new(buf.into(), 0, len) } } @@ -230,6 +230,13 @@ impl From for Buffer { } } +impl From for BooleanBuffer { + #[inline] + fn from(builder: BooleanBufferBuilder) -> Self { + BooleanBuffer::new(builder.buffer.into(), 0, builder.len) + } +} + #[cfg(test)] mod tests { use super::*; @@ -244,7 +251,7 @@ mod tests { assert_eq!(4, b.len()); assert_eq!(512, b.capacity()); let buffer = b.finish(); - assert_eq!(1, buffer.len()); + assert_eq!(4, buffer.len()); // Overallocate capacity let mut b = BooleanBufferBuilder::new(8); @@ -252,7 +259,7 @@ mod tests { assert_eq!(4, b.len()); assert_eq!(512, b.capacity()); let buffer = b.finish(); - assert_eq!(1, buffer.len()); + assert_eq!(4, buffer.len()); } #[test] @@ -264,7 +271,7 @@ mod tests { buffer.append(true); buffer.set_bit(0, false); assert_eq!(buffer.len(), 4); - assert_eq!(buffer.finish().as_slice(), &[0b1010_u8]); + assert_eq!(buffer.finish().values(), &[0b1010_u8]); } #[test] @@ -276,7 +283,7 @@ mod tests { buffer.append(true); buffer.set_bit(3, false); assert_eq!(buffer.len(), 4); - assert_eq!(buffer.finish().as_slice(), &[0b0011_u8]); + assert_eq!(buffer.finish().values(), &[0b0011_u8]); } #[test] @@ -288,7 +295,7 @@ mod tests { buffer.append(true); buffer.set_bit(1, false); assert_eq!(buffer.len(), 4); - assert_eq!(buffer.finish().as_slice(), &[0b1001_u8]); + assert_eq!(buffer.finish().values(), &[0b1001_u8]); } #[test] @@ -302,7 +309,7 @@ mod tests { buffer.set_bit(1, false); buffer.set_bit(2, false); assert_eq!(buffer.len(), 5); - assert_eq!(buffer.finish().as_slice(), &[0b10001_u8]); + assert_eq!(buffer.finish().values(), &[0b10001_u8]); } #[test] @@ -313,7 +320,7 @@ mod tests { buffer.set_bit(3, false); buffer.set_bit(9, false); assert_eq!(buffer.len(), 10); - assert_eq!(buffer.finish().as_slice(), &[0b11110110_u8, 0b01_u8]); + assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]); } #[test] @@ -329,7 +336,7 @@ mod tests { buffer.set_bit(14, true); buffer.set_bit(13, false); assert_eq!(buffer.len(), 15); - assert_eq!(buffer.finish().as_slice(), &[0b01010110_u8, 0b1011100_u8]); + assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]); } #[test] @@ -394,7 +401,7 @@ mod tests { let start = a.min(b); let end = a.max(b); - buffer.append_packed_range(start..end, compacted_src.as_slice()); + buffer.append_packed_range(start..end, compacted_src.values()); all_bools.extend_from_slice(&src[start..end]); } @@ -430,14 +437,14 @@ mod tests { let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2); builder.advance(2); let finished = builder.finish(); - assert_eq!(finished.as_slice(), &[0b00000011]); + assert_eq!(finished.values(), &[0b00000011]); let mut builder = BooleanBufferBuilder::new(10); builder.append_n(5, true); builder.resize(3); builder.advance(2); let finished = builder.finish(); - assert_eq!(finished.as_slice(), &[0b00000111]); + assert_eq!(finished.values(), &[0b00000111]); let mut builder = BooleanBufferBuilder::new(10); builder.append_n(16, true); @@ -478,7 +485,7 @@ mod tests { } let buf2 = builder.finish(); - assert_eq!(buf.len(), buf2.len()); - assert_eq!(buf.as_slice(), buf2.as_slice()); + assert_eq!(buf.len(), buf2.inner().len()); + assert_eq!(buf.as_slice(), buf2.values()); } } diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs index bc3b62f99234..c7974967a700 100644 --- a/arrow-array/src/builder/boolean_builder.rs +++ b/arrow-array/src/builder/boolean_builder.rs @@ -149,7 +149,7 @@ impl BooleanBuilder { let null_bit_buffer = self.null_buffer_builder.finish(); let builder = ArrayData::builder(DataType::Boolean) .len(len) - .add_buffer(self.values_builder.finish()) + .add_buffer(self.values_builder.finish().into_inner()) .null_bit_buffer(null_bit_buffer); let array_data = unsafe { builder.build_unchecked() }; diff --git a/arrow-array/src/builder/null_buffer_builder.rs b/arrow-array/src/builder/null_buffer_builder.rs index 0061f70c7ed4..f37ce3a747ff 100644 --- a/arrow-array/src/builder/null_buffer_builder.rs +++ b/arrow-array/src/builder/null_buffer_builder.rs @@ -129,8 +129,7 @@ impl NullBufferBuilder { /// Builds the null buffer and resets the builder. /// Returns `None` if the builder only contains `true`s. pub fn finish(&mut self) -> Option { - let buf = self.bitmap_builder.as_mut().map(|b| b.finish()); - self.bitmap_builder = None; + let buf = self.bitmap_builder.take().map(Into::into); self.len = 0; buf } diff --git a/arrow-integration-test/Cargo.toml b/arrow-integration-test/Cargo.toml index 6ede476eb569..8afbfacff7c3 100644 --- a/arrow-integration-test/Cargo.toml +++ b/arrow-integration-test/Cargo.toml @@ -35,7 +35,7 @@ bench = false [dependencies] arrow = { workspace = true } -arrow-buffer = { workspace = true, path = "../arrow-buffer" } +arrow-buffer = { workspace = true } hex = { version = "0.4", default-features = false, features = ["std"] } serde = { version = "1.0", default-features = false, features = ["rc", "derive"] } serde_json = { version = "1.0", default-features = false, features = ["std"] } diff --git a/arrow-json/src/reader/list_array.rs b/arrow-json/src/reader/list_array.rs index aa3538bd5349..b9db6bceb4b3 100644 --- a/arrow-json/src/reader/list_array.rs +++ b/arrow-json/src/reader/list_array.rs @@ -99,9 +99,7 @@ impl ArrayDecoder for ListArrayDecoder { } let child_data = self.decoder.decode(tape, &child_pos)?; - let nulls = nulls - .as_mut() - .map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len()))); + let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish())); let data = ArrayDataBuilder::new(self.data_type.clone()) .len(pos.len()) diff --git a/arrow-json/src/reader/map_array.rs b/arrow-json/src/reader/map_array.rs index 5e800a0d62dd..15814b1b6684 100644 --- a/arrow-json/src/reader/map_array.rs +++ b/arrow-json/src/reader/map_array.rs @@ -139,9 +139,7 @@ impl ArrayDecoder for MapArrayDecoder { // Valid by construction let struct_data = unsafe { struct_data.build_unchecked() }; - let nulls = nulls - .as_mut() - .map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len()))); + let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish())); let builder = ArrayDataBuilder::new(self.data_type.clone()) .len(pos.len()) diff --git a/arrow-json/src/reader/struct_array.rs b/arrow-json/src/reader/struct_array.rs index 707b56d50eef..2a25eb39facd 100644 --- a/arrow-json/src/reader/struct_array.rs +++ b/arrow-json/src/reader/struct_array.rs @@ -113,9 +113,7 @@ impl ArrayDecoder for StructArrayDecoder { }) .collect::, ArrowError>>()?; - let nulls = nulls - .as_mut() - .map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len()))); + let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish())); for (c, f) in child_data.iter().zip(fields) { // Sanity check diff --git a/arrow-row/src/dictionary.rs b/arrow-row/src/dictionary.rs index 273b7439d0d1..d790d951ee3a 100644 --- a/arrow-row/src/dictionary.rs +++ b/arrow-row/src/dictionary.rs @@ -202,7 +202,7 @@ pub unsafe fn decode_dictionary( let builder = ArrayDataBuilder::new(data_type) .len(len) - .null_bit_buffer(Some(null_builder.finish())) + .null_bit_buffer(Some(null_builder.into())) .null_count(null_count) .add_buffer(keys.finish()) .add_child_data(child); @@ -250,7 +250,7 @@ fn decode_bool(values: &[&[u8]]) -> ArrayData { let builder = ArrayDataBuilder::new(DataType::Boolean) .len(values.len()) - .add_buffer(builder.finish()); + .add_buffer(builder.into()); // SAFETY: Buffers correct length unsafe { builder.build_unchecked() } diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs index 06f0833561d6..c89491944a21 100644 --- a/arrow-select/src/filter.rs +++ b/arrow-select/src/filter.rs @@ -433,7 +433,7 @@ fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer { for (start, end) in SlicesIterator::new(&predicate.filter) { builder.append_packed_range(start + offset..end + offset, src) } - builder.finish() + builder.into() } IterationStrategy::Slices(slices) => { let mut builder = @@ -441,7 +441,7 @@ fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer { for (start, end) in slices { builder.append_packed_range(*start + offset..*end + offset, src) } - builder.finish() + builder.into() } IterationStrategy::All | IterationStrategy::None => unreachable!(), } diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index 491395d1cc1a..c0d2026808af 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -122,7 +122,7 @@ impl<'a, T: Array + 'static> Interleave<'a, T> { null_count += !v as usize; builder.append(v) } - builder.finish() + builder.into() }); Self { diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs index aaa3423d69e5..3d9148016af0 100644 --- a/arrow-select/src/nullif.rs +++ b/arrow-select/src/nullif.rs @@ -105,7 +105,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result( let data = unsafe { ArrayDataBuilder::new(DataType::Boolean) .len(array.len()) - .buffers(vec![result.finish()]) + .buffers(vec![result.into()]) .nulls(nulls) .build_unchecked() }; @@ -136,7 +136,7 @@ pub fn regexp_is_match_utf8_scalar( } } - let buffer = result.finish(); + let buffer = result.into(); let data = unsafe { ArrayData::new_unchecked( DataType::Boolean, diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs index a6b354f902df..8160f222b484 100644 --- a/parquet/src/arrow/array_reader/list_array.rs +++ b/parquet/src/arrow/array_reader/list_array.rs @@ -222,7 +222,7 @@ impl ArrayReader for ListArrayReader { if let Some(mut builder) = validity { assert_eq!(builder.len(), list_offsets.len() - 1); - data_builder = data_builder.null_bit_buffer(Some(builder.finish())) + data_builder = data_builder.null_bit_buffer(Some(builder.into())) } let list_data = unsafe { data_builder.build_unchecked() }; diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs index 012cad5c4c69..772026960a3f 100644 --- a/parquet/src/arrow/array_reader/primitive_array.rs +++ b/parquet/src/arrow/array_reader/primitive_array.rs @@ -147,7 +147,7 @@ where for e in record_data.as_slice() { boolean_buffer.append(*e > 0); } - boolean_buffer.finish() + boolean_buffer.into() } PhysicalType::INT96 => { // SAFETY - record_data is an aligned buffer of Int96 diff --git a/parquet/src/arrow/array_reader/struct_array.rs b/parquet/src/arrow/array_reader/struct_array.rs index 11e019f29a59..66bd7a6769b1 100644 --- a/parquet/src/arrow/array_reader/struct_array.rs +++ b/parquet/src/arrow/array_reader/struct_array.rs @@ -17,7 +17,7 @@ use crate::arrow::array_reader::ArrayReader; use crate::errors::{ParquetError, Result}; -use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray, Array}; +use arrow_array::{builder::BooleanBufferBuilder, Array, ArrayRef, StructArray}; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::DataType as ArrowType; use std::any::Any; @@ -170,7 +170,7 @@ impl ArrayReader for StructArrayReader { } array_data_builder = - array_data_builder.null_bit_buffer(Some(bitmap_builder.finish())); + array_data_builder.null_bit_buffer(Some(bitmap_builder.into())); } let array_data = unsafe { array_data_builder.build_unchecked() }; diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs index 7c27a365fc28..272716caf664 100644 --- a/parquet/src/arrow/record_reader/definition_levels.rs +++ b/parquet/src/arrow/record_reader/definition_levels.rs @@ -123,7 +123,7 @@ impl DefinitionLevelBuffer { // Swap into self self.len = new_builder.len(); - std::mem::replace(old_builder, new_builder).finish() + std::mem::replace(old_builder, new_builder).into() } pub fn nulls(&self) -> &BooleanBufferBuilder { From 8d13c393723922417df62bbcb0524ed3e661596b Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 26 Apr 2023 13:31:13 +0100 Subject: [PATCH 2/2] Clippy --- arrow-arith/src/arithmetic.rs | 2 +- arrow-json/src/reader/list_array.rs | 2 +- arrow-json/src/reader/map_array.rs | 2 +- arrow-json/src/reader/struct_array.rs | 2 +- parquet/src/arrow/array_reader/list_array.rs | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs index 3803f69e26f9..7f5a081900df 100644 --- a/arrow-arith/src/arithmetic.rs +++ b/arrow-arith/src/arithmetic.rs @@ -1965,7 +1965,7 @@ mod tests { BooleanBufferBuilder, BufferBuilder, PrimitiveDictionaryBuilder, }; use arrow_array::temporal_conversions::SECONDS_IN_DAY; - use arrow_buffer::buffer::{BooleanBuffer, NullBuffer}; + use arrow_buffer::buffer::NullBuffer; use arrow_buffer::i256; use arrow_data::ArrayDataBuilder; use chrono::NaiveDate; diff --git a/arrow-json/src/reader/list_array.rs b/arrow-json/src/reader/list_array.rs index b9db6bceb4b3..ad27eb516fab 100644 --- a/arrow-json/src/reader/list_array.rs +++ b/arrow-json/src/reader/list_array.rs @@ -19,7 +19,7 @@ use crate::reader::tape::{Tape, TapeElement}; use crate::reader::{make_decoder, ArrayDecoder}; use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder}; use arrow_array::OffsetSizeTrait; -use arrow_buffer::buffer::{BooleanBuffer, NullBuffer}; +use arrow_buffer::buffer::NullBuffer; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{ArrowError, DataType}; use std::marker::PhantomData; diff --git a/arrow-json/src/reader/map_array.rs b/arrow-json/src/reader/map_array.rs index 15814b1b6684..2d6fde34d433 100644 --- a/arrow-json/src/reader/map_array.rs +++ b/arrow-json/src/reader/map_array.rs @@ -18,7 +18,7 @@ use crate::reader::tape::{Tape, TapeElement}; use crate::reader::{make_decoder, ArrayDecoder}; use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder}; -use arrow_buffer::buffer::{BooleanBuffer, NullBuffer}; +use arrow_buffer::buffer::NullBuffer; use arrow_buffer::ArrowNativeType; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{ArrowError, DataType}; diff --git a/arrow-json/src/reader/struct_array.rs b/arrow-json/src/reader/struct_array.rs index 2a25eb39facd..3d24a927d85c 100644 --- a/arrow-json/src/reader/struct_array.rs +++ b/arrow-json/src/reader/struct_array.rs @@ -18,7 +18,7 @@ use crate::reader::tape::{Tape, TapeElement}; use crate::reader::{make_decoder, ArrayDecoder}; use arrow_array::builder::BooleanBufferBuilder; -use arrow_buffer::buffer::{BooleanBuffer, NullBuffer}; +use arrow_buffer::buffer::NullBuffer; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{ArrowError, DataType, Fields}; diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs index 8160f222b484..932034417c81 100644 --- a/parquet/src/arrow/array_reader/list_array.rs +++ b/parquet/src/arrow/array_reader/list_array.rs @@ -220,7 +220,7 @@ impl ArrayReader for ListArrayReader { .add_buffer(value_offsets) .add_child_data(child_data); - if let Some(mut builder) = validity { + if let Some(builder) = validity { assert_eq!(builder.len(), list_offsets.len() - 1); data_builder = data_builder.null_bit_buffer(Some(builder.into())) }