Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a function to get memory size of array slice #3501

Merged
merged 4 commits into from
Jan 10, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,62 @@ impl ArrayData {
size
}

/// Returns the total number of the bytes of memory occupied by the buffers by this slice of [ArrayData]
pub fn get_slice_memory_size(&self) -> Result<usize, ArrowError> {
askoa marked this conversation as resolved.
Show resolved Hide resolved
let mut result: usize = 0;
let layout = layout(&self.data_type);

for spec in layout.buffers.iter() {
match spec {
BufferSpec::FixedWidth { byte_width } => {
let buffer_size =
self.len.checked_mul(*byte_width).ok_or_else(|| {
ArrowError::ComputeError(
"Integer overflow computing buffer size".to_string(),
)
})?;
result += buffer_size;
}
BufferSpec::VariableWidth => {
let buffer_len: usize;
match self.data_type {
DataType::Utf8 | DataType::Binary => {
let offsets = self.typed_offsets::<i32>()?;
buffer_len = (offsets[self.len] - offsets[0] ) as usize;
}
DataType::LargeUtf8 | DataType::LargeBinary => {
let offsets = self.typed_offsets::<i64>()?;
buffer_len = (offsets[self.len] - offsets[0]) as usize;
}
_ => {
return Err(ArrowError::NotYetImplemented(format!(
"Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
self.data_type
)))
}
};
result += buffer_len;
}
BufferSpec::BitMap => {
let buffer_size = bit_util::ceil(self.len, 8);
result += buffer_size;
}
BufferSpec::AlwaysNull => {
// Nothing to do
}
}
}

if self.null_bitmap().is_some() {
result += bit_util::ceil(self.len, 8);
}

for child in &self.child_data {
result += child.get_slice_memory_size()?;
}
Ok(result)
}

/// Returns the total number of bytes of memory occupied physically by this [ArrayData].
pub fn get_array_memory_size(&self) -> usize {
let mut size = mem::size_of_val(self);
Expand Down Expand Up @@ -1838,6 +1894,42 @@ mod tests {
assert!(!string_data_slice.ptr_eq(&string_data))
}

#[test]
fn test_slice_memory_size() {
let mut bit_v: [u8; 2] = [0; 2];
bit_util::set_bit(&mut bit_v, 0);
bit_util::set_bit(&mut bit_v, 3);
bit_util::set_bit(&mut bit_v, 10);
let data = ArrayData::builder(DataType::Int32)
.len(16)
.add_buffer(make_i32_buffer(16))
.null_bit_buffer(Some(Buffer::from(bit_v)))
.build()
.unwrap();
let new_data = data.slice(1, 14);
assert_eq!(
data.get_slice_memory_size().unwrap() - 8,
new_data.get_slice_memory_size().unwrap()
);
let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
let string_data = ArrayData::try_new(
DataType::Utf8,
3,
Some(Buffer::from_iter(vec![true, false, true])),
0,
vec![offsets_buffer, data_buffer],
vec![],
)
.unwrap();
let string_data_slice = string_data.slice(1, 2);
//4 bytes of offset and 2 bytes of data reduced by slicing.
assert_eq!(
string_data.get_slice_memory_size().unwrap() - 6,
string_data_slice.get_slice_memory_size().unwrap()
);
}

#[test]
fn test_count_nulls() {
let null_buffer = Some(Buffer::from(vec![0b00010110, 0b10011111]));
Expand Down