Skip to content

Commit

Permalink
less copy
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Jan 28, 2025
1 parent b659c96 commit d87a00c
Showing 1 changed file with 17 additions and 11 deletions.
28 changes: 17 additions & 11 deletions arrow-select/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,15 @@ fn concat_lists<OffsetSize: OffsetSizeTrait>(
) -> Result<ArrayRef, ArrowError> {
let mut output_len = 0;
let mut list_has_nulls = false;
let mut list_has_slices = false;

let lists = arrays
.iter()
.map(|x| x.as_list::<OffsetSize>())
.inspect(|l| {
output_len += l.len();
list_has_nulls |= l.null_count() != 0;
list_has_slices |= l.offsets()[0].as_usize() > 0;
})
.collect::<Vec<_>>();

Expand All @@ -156,19 +158,23 @@ fn concat_lists<OffsetSize: OffsetSizeTrait>(
NullBuffer::new(nulls.finish())
});

let values: Vec<ArrayRef> = lists
.iter()
.map(|x| {
// offsets may not refer to all values in the array
// for example a sliced list array may have offsets that start at
// a non-zero offset and not use all the values
let offsets = x.offsets();
// If any of the lists have slices, we need to slice the values
// to ensure that the offsets are correct
let mut sliced_values;
let values: Vec<&dyn Array> = if list_has_slices {
sliced_values = Vec::with_capacity(lists.len());
for l in &lists {
// if the first offset is non-zero, we need to slice the values so when
// we concatenate them below only the relevant values are included
let offsets = l.offsets();
let start_offset = offsets[0].as_usize();
let end_offset = offsets[offsets.len() - 1].as_usize();
x.values().slice(start_offset, end_offset - start_offset)
})
.collect::<Vec<_>>();
let values: Vec<&dyn Array> = values.iter().map(|a| a.as_ref()).collect();
sliced_values.push(l.values().slice(start_offset, end_offset - start_offset));
}
sliced_values.iter().map(|a| a.as_ref()).collect()
} else {
lists.iter().map(|x| x.values().as_ref()).collect()
};

let concatenated_values = concat(values.as_slice())?;

Expand Down

0 comments on commit d87a00c

Please sign in to comment.