Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return BooleanBuffer from BooleanBufferBuilder #4140

Merged
merged 2 commits into from
Apr 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions arrow-arith/src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1965,7 +1965,7 @@ mod tests {
BooleanBufferBuilder, BufferBuilder, PrimitiveDictionaryBuilder,
};
use arrow_array::temporal_conversions::SECONDS_IN_DAY;
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::i256;
use arrow_data::ArrayDataBuilder;
use chrono::NaiveDate;
Expand Down Expand Up @@ -3575,12 +3575,7 @@ mod tests {
null_buffer_builder.resize(13);
assert_eq!(null_buffer_builder.len(), 13);

let null_buffer = null_buffer_builder.finish();

// `count_set_bits_offset` takes len in bits as parameter.
assert_eq!(null_buffer.count_set_bits_offset(0, 13), 0);

let nulls = BooleanBuffer::new(null_buffer, 0, 13);
let nulls = null_buffer_builder.finish();
assert_eq!(nulls.count_set_bits(), 0);
let nulls = NullBuffer::new(nulls);
assert_eq!(nulls.null_count(), 13);
Expand Down
6 changes: 2 additions & 4 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ use crate::timezone::Tz;
use crate::trusted_len::trusted_len_unzip;
use crate::types::*;
use crate::{Array, ArrayAccessor, ArrayRef};
use arrow_buffer::{
i256, ArrowNativeType, BooleanBuffer, Buffer, NullBuffer, ScalarBuffer,
};
use arrow_buffer::{i256, ArrowNativeType, Buffer, NullBuffer, ScalarBuffer};
use arrow_data::bit_iterator::try_for_each_valid_idx;
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType};
Expand Down Expand Up @@ -642,7 +640,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
Ok::<_, ()>(())
});

let nulls = BooleanBuffer::new(null_builder.finish(), 0, len);
let nulls = null_builder.finish();
let values = buffer.finish().into();
let nulls = unsafe { NullBuffer::new_unchecked(nulls, out_null_count) };
PrimitiveArray::new(values, Some(nulls))
Expand Down
43 changes: 25 additions & 18 deletions arrow-array/src/builder/boolean_buffer_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer};
use arrow_data::bit_mask;
use std::ops::Range;

Expand Down Expand Up @@ -214,12 +214,12 @@ impl BooleanBufferBuilder {
self.buffer.as_slice_mut()
}

/// Creates a [`Buffer`]
/// Creates a [`BooleanBuffer`]
#[inline]
pub fn finish(&mut self) -> Buffer {
pub fn finish(&mut self) -> BooleanBuffer {
Copy link
Contributor Author

@tustvold tustvold Apr 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I felt it was better to just break the API, than leaving the API in a somewhat incoherent state, where a type called BooleanBufferBuilder doesn't actually build BooleanBuffer

let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
self.len = 0;
buf.into()
let len = std::mem::replace(&mut self.len, 0);
BooleanBuffer::new(buf.into(), 0, len)
}
}

Expand All @@ -230,6 +230,13 @@ impl From<BooleanBufferBuilder> for Buffer {
}
}

impl From<BooleanBufferBuilder> for BooleanBuffer {
#[inline]
fn from(builder: BooleanBufferBuilder) -> Self {
BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -244,15 +251,15 @@ mod tests {
assert_eq!(4, b.len());
assert_eq!(512, b.capacity());
let buffer = b.finish();
assert_eq!(1, buffer.len());
assert_eq!(4, buffer.len());

// Overallocate capacity
let mut b = BooleanBufferBuilder::new(8);
b.append_slice(&[false, true, false, true]);
assert_eq!(4, b.len());
assert_eq!(512, b.capacity());
let buffer = b.finish();
assert_eq!(1, buffer.len());
assert_eq!(4, buffer.len());
}

#[test]
Expand All @@ -264,7 +271,7 @@ mod tests {
buffer.append(true);
buffer.set_bit(0, false);
assert_eq!(buffer.len(), 4);
assert_eq!(buffer.finish().as_slice(), &[0b1010_u8]);
assert_eq!(buffer.finish().values(), &[0b1010_u8]);
}

#[test]
Expand All @@ -276,7 +283,7 @@ mod tests {
buffer.append(true);
buffer.set_bit(3, false);
assert_eq!(buffer.len(), 4);
assert_eq!(buffer.finish().as_slice(), &[0b0011_u8]);
assert_eq!(buffer.finish().values(), &[0b0011_u8]);
}

#[test]
Expand All @@ -288,7 +295,7 @@ mod tests {
buffer.append(true);
buffer.set_bit(1, false);
assert_eq!(buffer.len(), 4);
assert_eq!(buffer.finish().as_slice(), &[0b1001_u8]);
assert_eq!(buffer.finish().values(), &[0b1001_u8]);
}

#[test]
Expand All @@ -302,7 +309,7 @@ mod tests {
buffer.set_bit(1, false);
buffer.set_bit(2, false);
assert_eq!(buffer.len(), 5);
assert_eq!(buffer.finish().as_slice(), &[0b10001_u8]);
assert_eq!(buffer.finish().values(), &[0b10001_u8]);
}

#[test]
Expand All @@ -313,7 +320,7 @@ mod tests {
buffer.set_bit(3, false);
buffer.set_bit(9, false);
assert_eq!(buffer.len(), 10);
assert_eq!(buffer.finish().as_slice(), &[0b11110110_u8, 0b01_u8]);
assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]);
}

#[test]
Expand All @@ -329,7 +336,7 @@ mod tests {
buffer.set_bit(14, true);
buffer.set_bit(13, false);
assert_eq!(buffer.len(), 15);
assert_eq!(buffer.finish().as_slice(), &[0b01010110_u8, 0b1011100_u8]);
assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]);
}

#[test]
Expand Down Expand Up @@ -394,7 +401,7 @@ mod tests {
let start = a.min(b);
let end = a.max(b);

buffer.append_packed_range(start..end, compacted_src.as_slice());
buffer.append_packed_range(start..end, compacted_src.values());
all_bools.extend_from_slice(&src[start..end]);
}

Expand Down Expand Up @@ -430,14 +437,14 @@ mod tests {
let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2);
builder.advance(2);
let finished = builder.finish();
assert_eq!(finished.as_slice(), &[0b00000011]);
assert_eq!(finished.values(), &[0b00000011]);

let mut builder = BooleanBufferBuilder::new(10);
builder.append_n(5, true);
builder.resize(3);
builder.advance(2);
let finished = builder.finish();
assert_eq!(finished.as_slice(), &[0b00000111]);
assert_eq!(finished.values(), &[0b00000111]);

let mut builder = BooleanBufferBuilder::new(10);
builder.append_n(16, true);
Expand Down Expand Up @@ -478,7 +485,7 @@ mod tests {
}
let buf2 = builder.finish();

assert_eq!(buf.len(), buf2.len());
assert_eq!(buf.as_slice(), buf2.as_slice());
assert_eq!(buf.len(), buf2.inner().len());
assert_eq!(buf.as_slice(), buf2.values());
}
}
2 changes: 1 addition & 1 deletion arrow-array/src/builder/boolean_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ impl BooleanBuilder {
let null_bit_buffer = self.null_buffer_builder.finish();
let builder = ArrayData::builder(DataType::Boolean)
.len(len)
.add_buffer(self.values_builder.finish())
.add_buffer(self.values_builder.finish().into_inner())
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { builder.build_unchecked() };
Expand Down
3 changes: 1 addition & 2 deletions arrow-array/src/builder/null_buffer_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ impl NullBufferBuilder {
/// Builds the null buffer and resets the builder.
/// Returns `None` if the builder only contains `true`s.
pub fn finish(&mut self) -> Option<Buffer> {
let buf = self.bitmap_builder.as_mut().map(|b| b.finish());
self.bitmap_builder = None;
let buf = self.bitmap_builder.take().map(Into::into);
self.len = 0;
buf
}
Expand Down
2 changes: 1 addition & 1 deletion arrow-integration-test/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ bench = false

[dependencies]
arrow = { workspace = true }
arrow-buffer = { workspace = true, path = "../arrow-buffer" }
arrow-buffer = { workspace = true }
hex = { version = "0.4", default-features = false, features = ["std"] }
serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
Expand Down
6 changes: 2 additions & 4 deletions arrow-json/src/reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{make_decoder, ArrayDecoder};
use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
use arrow_array::OffsetSizeTrait;
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
use arrow_buffer::buffer::NullBuffer;
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType};
use std::marker::PhantomData;
Expand Down Expand Up @@ -99,9 +99,7 @@ impl<O: OffsetSizeTrait> ArrayDecoder for ListArrayDecoder<O> {
}

let child_data = self.decoder.decode(tape, &child_pos)?;
let nulls = nulls
.as_mut()
.map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len())));
let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));

let data = ArrayDataBuilder::new(self.data_type.clone())
.len(pos.len())
Expand Down
6 changes: 2 additions & 4 deletions arrow-json/src/reader/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{make_decoder, ArrayDecoder};
use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::ArrowNativeType;
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType};
Expand Down Expand Up @@ -139,9 +139,7 @@ impl ArrayDecoder for MapArrayDecoder {
// Valid by construction
let struct_data = unsafe { struct_data.build_unchecked() };

let nulls = nulls
.as_mut()
.map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len())));
let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));

let builder = ArrayDataBuilder::new(self.data_type.clone())
.len(pos.len())
Expand Down
6 changes: 2 additions & 4 deletions arrow-json/src/reader/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{make_decoder, ArrayDecoder};
use arrow_array::builder::BooleanBufferBuilder;
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
use arrow_buffer::buffer::NullBuffer;
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, Fields};

Expand Down Expand Up @@ -113,9 +113,7 @@ impl ArrayDecoder for StructArrayDecoder {
})
.collect::<Result<Vec<_>, ArrowError>>()?;

let nulls = nulls
.as_mut()
.map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len())));
let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));

for (c, f) in child_data.iter().zip(fields) {
// Sanity check
Expand Down
4 changes: 2 additions & 2 deletions arrow-row/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ pub unsafe fn decode_dictionary<K: ArrowDictionaryKeyType>(

let builder = ArrayDataBuilder::new(data_type)
.len(len)
.null_bit_buffer(Some(null_builder.finish()))
.null_bit_buffer(Some(null_builder.into()))
.null_count(null_count)
.add_buffer(keys.finish())
.add_child_data(child);
Expand Down Expand Up @@ -250,7 +250,7 @@ fn decode_bool(values: &[&[u8]]) -> ArrayData {

let builder = ArrayDataBuilder::new(DataType::Boolean)
.len(values.len())
.add_buffer(builder.finish());
.add_buffer(builder.into());

// SAFETY: Buffers correct length
unsafe { builder.build_unchecked() }
Expand Down
4 changes: 2 additions & 2 deletions arrow-select/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,15 +433,15 @@ fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer {
for (start, end) in SlicesIterator::new(&predicate.filter) {
builder.append_packed_range(start + offset..end + offset, src)
}
builder.finish()
builder.into()
}
IterationStrategy::Slices(slices) => {
let mut builder =
BooleanBufferBuilder::new(bit_util::ceil(predicate.count, 8));
for (start, end) in slices {
builder.append_packed_range(*start + offset..*end + offset, src)
}
builder.finish()
builder.into()
}
IterationStrategy::All | IterationStrategy::None => unreachable!(),
}
Expand Down
2 changes: 1 addition & 1 deletion arrow-select/src/interleave.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ impl<'a, T: Array + 'static> Interleave<'a, T> {
null_count += !v as usize;
builder.append(v)
}
builder.finish()
builder.into()
});

Self {
Expand Down
2 changes: 1 addition & 1 deletion arrow-select/src/nullif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowE
// Pad with 0s up to offset
builder.resize(l_offset);
builder.append_packed_range(0..len, &combined);
builder.finish()
builder.into()
}
};

Expand Down
2 changes: 1 addition & 1 deletion arrow-string/src/like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ where
ArrayDataBuilder::new(DataType::Boolean)
.len(left.len())
.nulls(nulls)
.buffers(vec![result.finish()])
.buffers(vec![result.into()])
.build_unchecked()
};
Ok(BooleanArray::from(data))
Expand Down
4 changes: 2 additions & 2 deletions arrow-string/src/regexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
let data = unsafe {
ArrayDataBuilder::new(DataType::Boolean)
.len(array.len())
.buffers(vec![result.finish()])
.buffers(vec![result.into()])
.nulls(nulls)
.build_unchecked()
};
Expand Down Expand Up @@ -136,7 +136,7 @@ pub fn regexp_is_match_utf8_scalar<OffsetSize: OffsetSizeTrait>(
}
}

let buffer = result.finish();
let buffer = result.into();
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/array_reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,9 @@ impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
.add_buffer(value_offsets)
.add_child_data(child_data);

if let Some(mut builder) = validity {
if let Some(builder) = validity {
assert_eq!(builder.len(), list_offsets.len() - 1);
data_builder = data_builder.null_bit_buffer(Some(builder.finish()))
data_builder = data_builder.null_bit_buffer(Some(builder.into()))
}

let list_data = unsafe { data_builder.build_unchecked() };
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/arrow/array_reader/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ where
for e in record_data.as_slice() {
boolean_buffer.append(*e > 0);
}
boolean_buffer.finish()
boolean_buffer.into()
}
PhysicalType::INT96 => {
// SAFETY - record_data is an aligned buffer of Int96
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/array_reader/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use crate::arrow::array_reader::ArrayReader;
use crate::errors::{ParquetError, Result};
use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray, Array};
use arrow_array::{builder::BooleanBufferBuilder, Array, ArrayRef, StructArray};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::DataType as ArrowType;
use std::any::Any;
Expand Down Expand Up @@ -170,7 +170,7 @@ impl ArrayReader for StructArrayReader {
}

array_data_builder =
array_data_builder.null_bit_buffer(Some(bitmap_builder.finish()));
array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
}

let array_data = unsafe { array_data_builder.build_unchecked() };
Expand Down
Loading