Skip to content

Commit

Permalink
Deprecate Array::data_ref (apache#3880)
Browse files Browse the repository at this point in the history
tustvold committed Mar 23, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent de9a90b commit 71be2e7
Showing 8 changed files with 62 additions and 144 deletions.
7 changes: 4 additions & 3 deletions arrow-array/src/array/binary_array.rs
Original file line number Diff line number Diff line change
@@ -47,13 +47,14 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
}

fn from_list(v: GenericListArray<OffsetSize>) -> Self {
let v = v.into_data();
assert_eq!(
v.data_ref().child_data().len(),
v.child_data().len(),
1,
"BinaryArray can only be created from list array of u8 values \
(i.e. List<PrimitiveArray<u8>>)."
);
let child_data = &v.data_ref().child_data()[0];
let child_data = &v.child_data()[0];

assert_eq!(
child_data.child_data().len(),
@@ -75,7 +76,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
let builder = ArrayData::builder(Self::DATA_TYPE)
.len(v.len())
.offset(v.offset())
.add_buffer(v.data_ref().buffers()[0].clone())
.add_buffer(v.buffers()[0].clone())
.add_buffer(child_data.buffers()[0].slice(child_data.offset()))
.nulls(v.nulls().cloned());

8 changes: 5 additions & 3 deletions arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
@@ -396,13 +396,15 @@ impl From<FixedSizeBinaryArray> for ArrayData {
/// Creates a `FixedSizeBinaryArray` from `FixedSizeList<u8>` array
impl From<FixedSizeListArray> for FixedSizeBinaryArray {
fn from(v: FixedSizeListArray) -> Self {
let value_len = v.value_length();
let v = v.into_data();
assert_eq!(
v.data_ref().child_data().len(),
v.child_data().len(),
1,
"FixedSizeBinaryArray can only be created from list array of u8 values \
(i.e. FixedSizeList<PrimitiveArray<u8>>)."
);
let child_data = &v.data_ref().child_data()[0];
let child_data = &v.child_data()[0];

assert_eq!(
child_data.child_data().len(),
@@ -421,7 +423,7 @@ impl From<FixedSizeListArray> for FixedSizeBinaryArray {
"The child array cannot contain null values."
);

let builder = ArrayData::builder(DataType::FixedSizeBinary(v.value_length()))
let builder = ArrayData::builder(DataType::FixedSizeBinary(value_len))
.len(v.len())
.offset(v.offset())
.add_buffer(child_data.buffers()[0].slice(child_data.offset()))
27 changes: 17 additions & 10 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -110,6 +110,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// Returns a reference-counted pointer to the underlying data of this array.
///
/// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880)
#[deprecated(note = "Use Array::to_data or Array::into_data")]
fn data_ref(&self) -> &ArrayData {
self.data()
}
@@ -126,6 +127,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
///
/// assert_eq!(*array.data_type(), DataType::Int32);
/// ```
#[allow(deprecated)] // (#3880)
fn data_type(&self) -> &DataType {
self.data_ref().data_type()
}
@@ -156,6 +158,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
///
/// assert_eq!(array.len(), 5);
/// ```
#[allow(deprecated)] // (#3880)
fn len(&self) -> usize {
self.data_ref().len()
}
@@ -171,6 +174,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
///
/// assert_eq!(array.is_empty(), false);
/// ```
#[allow(deprecated)] // (#3880)
fn is_empty(&self) -> bool {
self.data_ref().is_empty()
}
@@ -191,6 +195,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// assert_eq!(array.offset(), 0);
/// assert_eq!(array_slice.offset(), 1);
/// ```
#[allow(deprecated)] // (#3880)
fn offset(&self) -> usize {
self.data_ref().offset()
}
@@ -250,13 +255,15 @@ pub trait Array: std::fmt::Debug + Send + Sync {

/// Returns the total number of bytes of memory pointed to by this array.
/// The buffers store bytes in the Arrow memory format, and include the data as well as the validity map.
#[allow(deprecated)] // (#3880)
fn get_buffer_memory_size(&self) -> usize {
self.data_ref().get_buffer_memory_size()
}

/// Returns the total number of bytes of memory occupied physically by this array.
/// This value will always be greater than returned by `get_buffer_memory_size()` and
/// includes the overhead of the data structures that contain the pointers to the various buffers.
#[allow(deprecated)] // (#3880)
fn get_array_memory_size(&self) -> usize {
// both data.get_array_memory_size and size_of_val(self) include ArrayData fields,
// to only count additional fields of this array substract size_of(ArrayData)
@@ -286,6 +293,7 @@ impl Array for ArrayRef {
self.data().clone()
}

#[allow(deprecated)]
fn data_ref(&self) -> &ArrayData {
self.as_ref().data_ref()
}
@@ -352,6 +360,7 @@ impl<'a, T: Array> Array for &'a T {
self.data().clone()
}

#[allow(deprecated)]
fn data_ref(&self) -> &ArrayData {
T::data_ref(self)
}
@@ -997,19 +1006,17 @@ mod tests {
(0..256).map(|i| (i % values.len()) as i16),
);

let dict_data = ArrayData::builder(DataType::Dictionary(
let dict_data_type = DataType::Dictionary(
Box::new(keys.data_type().clone()),
Box::new(values.data_type().clone()),
))
.len(keys.len())
.buffers(keys.data_ref().buffers().to_vec())
.child_data(vec![ArrayData::builder(DataType::Int64)
.len(values.len())
.buffers(values.data_ref().buffers().to_vec())
);
let dict_data = keys
.into_data()
.into_builder()
.data_type(dict_data_type)
.child_data(vec![values.into_data()])
.build()
.unwrap()])
.build()
.unwrap();
.unwrap();

let empty_data = ArrayData::new_empty(&DataType::Dictionary(
Box::new(DataType::Int16),
24 changes: 8 additions & 16 deletions arrow-cast/src/cast.rs
Original file line number Diff line number Diff line change
@@ -3448,9 +3448,9 @@ where
OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
OffsetSizeTo: OffsetSizeTrait + NumCast,
{
let data = array.data_ref();
let list = array.as_list::<OffsetSizeFrom>();
// the value data stored by the list
let value_data = data.child_data()[0].clone();
let values = list.values();

let out_dtype = match array.data_type() {
DataType::List(value_type) => {
@@ -3473,7 +3473,7 @@ where
std::mem::size_of::<OffsetSizeTo>(),
std::mem::size_of::<i32>()
);
if value_data.len() > i32::MAX as usize {
if values.len() > i32::MAX as usize {
return Err(ArrowError::ComputeError(
"LargeList too large to cast to List".into(),
));
@@ -3484,14 +3484,7 @@ where
_ => unreachable!(),
};

// Safety:
// The first buffer is the offsets and they are aligned to OffSetSizeFrom: (i64 or i32)
// Justification:
// The safe variant data.buffer::<OffsetSizeFrom> take the offset into account and we
// cannot create a list array with offsets starting at non zero.
let offsets = unsafe { data.buffers()[0].as_slice().align_to::<OffsetSizeFrom>() }.1;

let iter = offsets.iter().map(|idx| {
let iter = list.value_offsets().iter().map(|idx| {
let idx: OffsetSizeTo = NumCast::from(*idx).unwrap();
idx
});
@@ -3502,14 +3495,13 @@ where

// wrap up
let builder = ArrayData::builder(out_dtype)
.offset(array.offset())
.len(array.len())
.len(list.len())
.add_buffer(offset_buffer)
.add_child_data(value_data)
.nulls(data.nulls().cloned());
.add_child_data(values.to_data())
.nulls(list.nulls().cloned());

let array_data = unsafe { builder.build_unchecked() };
Ok(make_array(array_data))
Ok(Arc::new(GenericListArray::<OffsetSizeTo>::from(array_data)))
}

#[cfg(test)]
83 changes: 13 additions & 70 deletions arrow-ord/src/comparison.rs
Original file line number Diff line number Diff line change
@@ -26,8 +26,7 @@
use arrow_array::cast::*;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::bit_mask::combine_option_bitmap;
use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};
use arrow_select::take::take;
@@ -1214,8 +1213,7 @@ where
));
}

let null_bit_buffer =
combine_option_bitmap(&[left.data_ref(), right.data_ref()], len);
let nulls = NullBuffer::union(left.nulls(), right.nulls());

// we process the data in chunks so that each iteration results in one u64 of comparison result bits
const CHUNK_SIZE: usize = 64;
@@ -1276,18 +1274,8 @@ where
result_remainder.copy_from_slice(remainder_mask_as_bytes);
}

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
len,
None,
null_bit_buffer,
0,
vec![result.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
let values = BooleanBuffer::new(result.into(), 0, len);
Ok(BooleanArray::new(values, nulls))
}

/// Helper function to perform boolean lambda function on values from an array and a scalar value using
@@ -2709,19 +2697,13 @@ where

let num_bytes = bit_util::ceil(left_len, 8);

let not_both_null_bit_buffer =
match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len) {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();

let nulls = NullBuffer::union(left.nulls(), right.nulls());
let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
let bool_slice = bool_buf.as_slice_mut();

// if both array slots are valid, check if list contains primitive
for i in 0..left_len {
if bit_util::get_bit(not_both_null_bitmap, i) {
if nulls.as_ref().map(|n| n.is_valid(i)).unwrap_or(true) {
let list = right.value(i);
let list = list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();

@@ -2734,18 +2716,8 @@ where
}
}

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
None,
0,
vec![bool_buf.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
let values = BooleanBuffer::new(bool_buf.into(), 0, left_len);
Ok(BooleanArray::new(values, None))
}

/// Checks if a [`GenericListArray`] contains a value in the [`GenericStringArray`]
@@ -2766,24 +2738,15 @@ where

let num_bytes = bit_util::ceil(left_len, 8);

let not_both_null_bit_buffer =
match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len) {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();

let nulls = NullBuffer::union(left.nulls(), right.nulls());
let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
let bool_slice = &mut bool_buf;

for i in 0..left_len {
// contains(null, null) = false
if bit_util::get_bit(not_both_null_bitmap, i) {
if nulls.as_ref().map(|n| n.is_valid(i)).unwrap_or(true) {
let list = right.value(i);
let list = list
.as_any()
.downcast_ref::<GenericStringArray<OffsetSize>>()
.unwrap();
let list = list.as_string::<OffsetSize>();

for j in 0..list.len() {
if list.is_valid(j) && (left.value(i) == list.value(j)) {
@@ -2793,28 +2756,8 @@ where
}
}
}

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
None,
0,
vec![bool_buf.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
}

// create a buffer and fill it with valid bits
#[inline]
fn new_all_set_buffer(len: usize) -> Buffer {
let buffer = MutableBuffer::new(len);
let buffer = buffer.with_bitset(len, true);

buffer.into()
let values = BooleanBuffer::new(bool_buf.into(), 0, left_len);
Ok(BooleanArray::new(values, None))
}

// disable wrapping inside literal vectors used for test data and assertions
26 changes: 10 additions & 16 deletions arrow-select/src/take.rs
Original file line number Diff line number Diff line change
@@ -735,13 +735,13 @@ where
IndexType: ArrowPrimitiveType,
IndexType::Native: ToPrimitive,
{
let data_ref = values.data_ref();
let nulls = values.nulls();
let array_iter = indices
.values()
.iter()
.map(|idx| {
let idx = maybe_usize::<IndexType::Native>(*idx)?;
if data_ref.is_valid(idx) {
if nulls.map(|n| n.is_valid(idx)).unwrap_or(true) {
Ok(Some(values.value(idx)))
} else {
Ok(None)
@@ -768,20 +768,14 @@ where
I::Native: ToPrimitive,
{
let new_keys = take_primitive::<T, I>(values.keys(), indices)?;
let new_keys_data = new_keys.data_ref();

let data = unsafe {
ArrayData::new_unchecked(
values.data_type().clone(),
new_keys.len(),
Some(new_keys_data.null_count()),
new_keys_data.nulls().map(|b| b.inner().sliced()),
0,
new_keys_data.buffers().to_vec(),
values.data().child_data().to_vec(),
)
};

let builder = new_keys
.into_data()
.into_builder()
.data_type(values.data_type().clone())
.child_data(vec![values.values().to_data()]);

// Safety: Indices were valid before
let data = unsafe { builder.build_unchecked() };
Ok(DictionaryArray::<T>::from(data))
}

2 changes: 1 addition & 1 deletion arrow-select/src/window.rs
Original file line number Diff line number Diff line change
@@ -55,7 +55,7 @@ use num::abs;
pub fn shift(array: &dyn Array, offset: i64) -> Result<ArrayRef, ArrowError> {
let value_len = array.len() as i64;
if offset == 0 {
Ok(make_array(array.data_ref().clone()))
Ok(make_array(array.to_data()))
} else if offset == i64::MIN || abs(offset) >= value_len {
Ok(new_null_array(array.data_type(), array.len()))
} else {
29 changes: 4 additions & 25 deletions arrow/tests/array_validation.rs
Original file line number Diff line number Diff line change
@@ -16,9 +16,8 @@
// under the License.

use arrow::array::{
make_array, Array, BooleanBuilder, Decimal128Builder, FixedSizeListBuilder,
Int32Array, Int32Builder, Int64Array, StringArray, StructBuilder, UInt64Array,
UInt8Builder,
make_array, Array, BooleanBuilder, Decimal128Builder, Int32Array, Int32Builder,
Int64Array, StringArray, StructBuilder, UInt64Array,
};
use arrow_array::Decimal128Array;
use arrow_buffer::{ArrowNativeType, Buffer};
@@ -996,28 +995,8 @@ fn test_string_data_from_foreign() {

#[test]
fn test_decimal_full_validation() {
let values_builder = UInt8Builder::with_capacity(10);
let byte_width = 16;
let mut fixed_size_builder = FixedSizeListBuilder::new(values_builder, byte_width);
let value_as_bytes = 123456_i128.to_le_bytes();
fixed_size_builder
.values()
.append_slice(value_as_bytes.as_slice());
fixed_size_builder.append(true);
let fixed_size_array = fixed_size_builder.finish();

// Build ArrayData for Decimal
let builder = ArrayData::builder(DataType::Decimal128(5, 3))
.len(fixed_size_array.len())
.add_buffer(fixed_size_array.data_ref().child_data()[0].buffers()[0].clone());
let array_data = unsafe { builder.build_unchecked() };
array_data.validate_full().unwrap();

let array = Decimal128Array::from(array_data);
let error = array
.validate_decimal_precision(array.precision())
.unwrap_err();

let array = Decimal128Array::from(vec![123456_i128]);
let error = array.validate_decimal_precision(5).unwrap_err();
assert_eq!(
"Invalid argument error: 123456 is too large to store in a Decimal128 of precision 5. Max is 99999",
error.to_string()

0 comments on commit 71be2e7

Please sign in to comment.