Skip to content

Commit

Permalink
Made len/len_proxy consistent with Offsets (jorgecarleitao#1434)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Apr 5, 2023
1 parent 9f9067a commit dbd0228
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 17 deletions.
4 changes: 2 additions & 2 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ impl<O: Offset> BinaryArray<O> {

if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len())
.map_or(false, |validity| validity.len() != offsets.len_proxy())
{
return Err(Error::oos(
"validity mask length must match the number of values",
Expand Down Expand Up @@ -127,7 +127,7 @@ impl<O: Offset> BinaryArray<O> {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len()
self.offsets.len_proxy()
}

/// Returns the element at index `i`
Expand Down
4 changes: 2 additions & 2 deletions src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl<O: Offset> ListArray<O> {

if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len())
.map_or(false, |validity| validity.len() != offsets.len_proxy())
{
return Err(Error::oos(
"validity mask length must match the number of values",
Expand Down Expand Up @@ -138,7 +138,7 @@ impl<O: Offset> ListArray<O> {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len()
self.offsets.len_proxy()
}

/// Returns the element at index `i`
Expand Down
4 changes: 2 additions & 2 deletions src/array/map/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl MapArray {

if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len())
.map_or(false, |validity| validity.len() != offsets.len_proxy())
{
return Err(Error::oos(
"validity mask length must match the number of values",
Expand Down Expand Up @@ -154,7 +154,7 @@ impl MapArray {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len()
self.offsets.len_proxy()
}

/// returns the offsets
Expand Down
6 changes: 3 additions & 3 deletions src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ impl<O: Offset> Utf8Array<O> {
try_check_utf8(&offsets, &values)?;
if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len())
.map_or(false, |validity| validity.len() != offsets.len_proxy())
{
return Err(Error::oos(
"validity mask length must match the number of values",
Expand Down Expand Up @@ -144,7 +144,7 @@ impl<O: Offset> Utf8Array<O> {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len()
self.offsets.len_proxy()
}

/// Returns the value of the element at index `i`, ignoring the array's validity.
Expand Down Expand Up @@ -365,7 +365,7 @@ impl<O: Offset> Utf8Array<O> {

if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len())
.map_or(false, |validity| validity.len() != offsets.len_proxy())
{
return Err(Error::oos(
"validity mask length must match the number of values",
Expand Down
6 changes: 3 additions & 3 deletions src/compute/aggregate/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ pub fn estimated_bytes_size(array: &dyn Array) -> usize {
List => {
let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();
estimated_bytes_size(array.values().as_ref())
+ array.offsets().len() * std::mem::size_of::<i32>()
+ array.offsets().len_proxy() * std::mem::size_of::<i32>()
+ validity_size(array.validity())
}
FixedSizeList => {
Expand All @@ -73,7 +73,7 @@ pub fn estimated_bytes_size(array: &dyn Array) -> usize {
LargeList => {
let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
estimated_bytes_size(array.values().as_ref())
+ array.offsets().len() * std::mem::size_of::<i64>()
+ array.offsets().len_proxy() * std::mem::size_of::<i64>()
+ validity_size(array.validity())
}
Struct => {
Expand Down Expand Up @@ -111,7 +111,7 @@ pub fn estimated_bytes_size(array: &dyn Array) -> usize {
}),
Map => {
let array = array.as_any().downcast_ref::<MapArray>().unwrap();
let offsets = array.offsets().len() * std::mem::size_of::<i32>();
let offsets = array.offsets().len_proxy() * std::mem::size_of::<i32>();
offsets + estimated_bytes_size(array.field().as_ref()) + validity_size(array.validity())
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/io/parquet/write/pages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ impl Nested {
pub fn len(&self) -> usize {
match self {
Nested::Primitive(_, _, length) => *length,
Nested::List(nested) => nested.offsets.len(),
Nested::LargeList(nested) => nested.offsets.len(),
Nested::List(nested) => nested.offsets.len_proxy(),
Nested::LargeList(nested) => nested.offsets.len_proxy(),
Nested::Struct(_, _, len) => *len,
}
}
Expand Down
12 changes: 9 additions & 3 deletions src/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,12 +377,18 @@ impl<O: Offset> OffsetsBuffer<O> {
&self.0
}

/// Returns the length of this container
/// Returns the length an array with these offsets would be.
#[inline]
pub fn len(&self) -> usize {
pub fn len_proxy(&self) -> usize {
self.0.len() - 1
}

/// Returns the number of offsets in this container.
#[inline]
pub fn len(&self) -> usize {
self.0.len()
}

/// Returns the byte slice stored in this buffer
#[inline]
pub fn as_slice(&self) -> &[O] {
Expand Down Expand Up @@ -419,7 +425,7 @@ impl<O: Offset> OffsetsBuffer<O> {
#[inline]
pub fn start_end(&self, index: usize) -> (usize, usize) {
// soundness: the invariant of the function
assert!(index < self.len());
assert!(index < self.len_proxy());
unsafe { self.start_end_unchecked(index) }
}

Expand Down

0 comments on commit dbd0228

Please sign in to comment.