robertknight · robertknight · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/rten-vecmath/src/shift_scale.rs b/rten-vecmath/src/shift_scale.rs
@@ -20,6 +20,11 @@ impl<'a> SimdOp for SimdShiftScale<'a> {
             const_scale,
         } = self;
 
+        assert_eq!(scale.len(), data.len());
+        if let Some(bias) = bias {
+            assert_eq!(bias.len(), data.len());
+        }
+
         let mut out_ptr = data.as_mut_ptr();
         let mut scale_ptr = scale.as_ptr();
         let mut bias_ptr = bias.map(|b| b.as_ptr());
@@ -54,16 +59,20 @@ impl<'a> SimdOp for SimdShiftScale<'a> {
 
 /// Shift and scale each element in the input.
 ///
-/// This scales and shifts each element using `y[i] = y[i] * const_scale *
-/// scale[i] + bias[i]`.
+/// This updates each element in `xs` according to the formula
+/// `xs[i] = xs[i] * const_scale * scale[i] + bias[i]`.
+///
+/// # Panics
+///
+/// Panics if the length of `scale` or `bias` does not match `xs`.
 pub fn vec_shift_scale_in_place(
-    data: &mut [f32],
+    xs: &mut [f32],
     const_scale: f32,
     scale: &[f32],
     bias: Option<&[f32]>,
 ) {
     let simd_op = SimdShiftScale {
-        data,
+        data: xs,
         bias,
         scale,
         const_scale,

diff --git a/rten-vecmath/src/sum.rs b/rten-vecmath/src/sum.rs
@@ -22,7 +22,12 @@ impl SimdOp for SimdSum<'_> {
     }
 }
 
-/// Return the sum of a slice of floats.
+/// Compute the sum of a slice of floats.
+///
+/// This is more efficient than `xs.iter().sum()` as it computes multiple
+/// partial sums in parallel using SIMD and then sums across the SIMD lanes at
+/// the end. This will produce very slightly different results because the
+/// additions are happening in a different order.
 pub fn vec_sum(xs: &[f32]) -> f32 {
     let op = SimdSum { input: xs };
     dispatch(op)
@@ -48,7 +53,12 @@ impl SimdOp for SimdSumSquare<'_> {
     }
 }
 
-/// Return the sum of the squares of elements in `xs`.
+/// Compute the sum of the squares of elements in `xs`.
+///
+/// Conceptually this is like `xs.iter().map(|&x| x * x).sum()` but more
+/// efficient as it computes multiple partial sums in parallel and then sums
+/// across SIMD lanes at the end. The results will also be slightly different
+/// because the additions are happening in a different order.
 pub fn vec_sum_square(xs: &[f32]) -> f32 {
     let op = SimdSumSquare { input: xs };
     dispatch(op)

diff --git a/rten-vecmath/src/tanh.rs b/rten-vecmath/src/tanh.rs
@@ -7,6 +7,7 @@ use rten_simd::SimdFloat;
 
 use crate::exp::simd_exp;
 
+/// Compute `x.tanh()` using the same algorithm as [`vec_tanh`].
 pub fn tanh(x: f32) -> f32 {
     unsafe { simd_tanh(x) }
 }
@@ -81,6 +82,7 @@ pub fn vec_tanh(xs: &[f32], out: &mut [MaybeUninit<f32>]) {
     dispatch_map_op(xs, out, SimdTanh {});
 }
 
+/// Variant of [`vec_tanh`] which modifies elements in-place.
 pub fn vec_tanh_in_place(xs: &mut [f32]) {
     dispatch_map_op_in_place(xs, SimdTanh {});
 }