Skip to content

Commit

Permalink
Optimize copying of non-contiguous tensors with 5+ dimensions
Browse files Browse the repository at this point in the history
Improve code path for tensors with 5+ dimensions in `TensorBase::init_from`.
Instead of falling back to slow iteration via `TensorBase::iter`, iterate over
inner views of 4 dims and use the faster code path that handles this.
  • Loading branch information
robertknight committed Nov 15, 2024
1 parent 408ebe0 commit bf73c6f
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions rten-tensor/src/copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,14 @@ pub fn copy_into_slice<'a, T: Clone>(
src.merge_axes();

if src.ndim() > 4 {
for (dst, src) in dest.iter_mut().zip(src.iter()) {
dst.write(src.clone());
let chunk_size = src.shape()[src.ndim() - 4..].iter().product();
let mut n_init = 0;
for (src, dest) in src.inner_iter::<4>().zip(dest.chunks_mut(chunk_size)) {
copy_into_slice(src.as_dyn(), dest);
n_init += chunk_size;
}
assert!(n_init == dest.len());

// Safety: Loop above initialized all elements of `dest`.
return unsafe { transmute::<&mut [MaybeUninit<T>], &[T]>(dest) };
}
Expand Down

0 comments on commit bf73c6f

Please sign in to comment.