Update and clarify some comments

kcat · Oct 7, 2023 · 5ed78a9 · 5ed78a9
1 parent 393790d
commit 5ed78a9
Show file tree

Hide file tree

Showing 2 changed files with 93 additions and 91 deletions.
diff --git a/common/pffft.cpp b/common/pffft.cpp
@@ -79,8 +79,7 @@
 #endif
 
 
-/*
- * vector support macros: the rest of the code is independant of
+/* Vector support macros: the rest of the code is independent of
  * SSE/Altivec/NEON -- adding support for other platforms with 4-element
  * vectors should be limited to these macros
  */
@@ -248,7 +247,7 @@ typedef float v4sf;
 #define VALIGNED(ptr) ((reinterpret_cast<uintptr_t>(ptr) & 0x3) == 0)
 #endif
 
-// shortcuts for complex multiplcations
+// shortcuts for complex multiplications
 #define VCPLXMUL(ar,ai,br,bi) do { v4sf tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMADD(ai,br,tmp); } while(0)
 #define VCPLXMULCONJ(ar,ai,br,bi) do { v4sf tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VMADD(ai,bi,ar); ai=VSUB(VMUL(ai,br),tmp); } while(0)
 #ifndef SVMUL
@@ -1866,6 +1865,8 @@ void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *fo
 void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab,
     float scaling)
 {
+    assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
+
     const int Ncvec = s->Ncvec;
     const v4sf *RESTRICT va = reinterpret_cast<const v4sf*>(a);
     const v4sf *RESTRICT vb = reinterpret_cast<const v4sf*>(b);
@@ -1892,7 +1893,6 @@ void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b,
 #ifndef ZCONVOLVE_USING_INLINE_ASM
     const v4sf vscal = LD_PS1(scaling);
 #endif
-    assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
     float ar1 = reinterpret_cast<const v4sf_union*>(va)[0].f[0];
     float ai1 = reinterpret_cast<const v4sf_union*>(va)[1].f[0];
     float br1 = reinterpret_cast<const v4sf_union*>(vb)[0].f[0];

diff --git a/common/pffft.h b/common/pffft.h
@@ -45,34 +45,36 @@
    SOFTWARE.
 */
 
-/*
-   PFFFT : a Pretty Fast FFT.
-
-   This is basically an adaptation of the single precision fftpack
-   (v4) as found on netlib taking advantage of SIMD instruction found
-   on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
-
-   For architectures where no SIMD instruction is available, the code
-   falls back to a scalar version.
-
-   Restrictions:
-
-   - 1D transforms only, with 32-bit single precision.
-
-   - supports only transforms for inputs of length N of the form
-   N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
-   144, 160, etc are all acceptable lengths). Performance is best for
-   128<=N<=8192.
-
-   - all (float*) pointers in the functions below are expected to
-   have an "simd-compatible" alignment, that is 16 bytes on x86 and
-   powerpc CPUs.
-
-   You can allocate such buffers with the functions
-   pffft_aligned_malloc / pffft_aligned_free (or with stuff like
-   posix_memalign..)
-
-*/
+/* PFFFT : a Pretty Fast FFT.
+ *
+ * This is basically an adaptation of the single precision fftpack (v4) as
+ * found on netlib taking advantage of SIMD instructions found on CPUs such as
+ * Intel x86 (SSE1), PowerPC (Altivec), and Arm (NEON).
+ *
+ * For architectures where SIMD instructions aren't available, the code falls
+ * back to a scalar version.
+ *
+ * Restrictions:
+ *
+ * - 1D transforms only, with 32-bit single precision.
+ *
+ * - supports only transforms for inputs of length N of the form
+ * N=(2^a)*(3^b)*(5^c), given a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, 144,
+ * 160, etc are all acceptable lengths). Performance is best for 128<=N<=8192.
+ *
+ * - all (float*) pointers for the functions below are expected to have a
+ * "SIMD-compatible" alignment, that is 16 bytes.
+ *
+ * You can allocate such buffers with the pffft_aligned_malloc function, and
+ * deallocate them with pffft_aligned_free (or with stuff like posix_memalign,
+ * aligned_alloc, etc).
+ *
+ * Note that for the z-domain data of real transforms, when in the canonical
+ * order (as interleaved complex numbers) both 0-frequency and half-frequency
+ * components, which are real, are assembled in the first entry as
+ * F(0)+i*F(n/2+1). The original fftpack placed F(n/2+1) at the end of the
+ * arrays instead.
+ */
 
 #ifndef PFFFT_H
 #define PFFFT_H
@@ -100,77 +102,77 @@ typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
 
 #endif
 
-/*
-   prepare for performing transforms of size N -- the returned
-   PFFFT_Setup structure is read-only so it can safely be shared by
-   multiple concurrent threads.
-*/
+/**
+ * Prepare for performing transforms of size N -- the returned PFFFT_Setup
+ * structure is read-only so it can safely be shared by multiple concurrent
+ * threads.
+ */
 PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
-void pffft_destroy_setup(PFFFT_Setup *);
-/*
-   Perform a Fourier transform , The z-domain data is stored in the
-   most efficient order for transforming it back, or using it for
-   convolution. If you need to have its content sorted in the
-   "usual" way, that is as an array of interleaved complex numbers,
-   either use pffft_transform_ordered , or call pffft_zreorder after
-   the forward fft, and before the backward fft.
-
-   Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
-   Typically you will want to scale the backward transform by 1/N.
-
-   The 'work' pointer must point to an area of N (2*N for complex
-   fft) floats, properly aligned. It cannot be NULL.
-
-   input and output may alias.
-*/
+void pffft_destroy_setup(PFFFT_Setup *setup);
+
+/**
+ * Perform a Fourier transform. The z-domain data is stored in the most
+ * efficient order for transforming back or using for convolution, and as
+ * such, there's no guarantee to the order of the values. If you need to have
+ * its content sorted in the usual way, that is as an array of interleaved
+ * complex numbers, either use pffft_transform_ordered, or call pffft_zreorder
+ * after the forward fft and before the backward fft.
+ *
+ * Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. Typically
+ * you will want to scale the backward transform by 1/N.
+ *
+ * The 'work' pointer must point to an area of N (2*N for complex fft) floats,
+ * properly aligned. It cannot be NULL.
+ *
+ * The input and output parameters may alias.
+ */
 void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
 
-/*
-   Similar to pffft_transform, but makes sure that the output is
-   ordered as expected (interleaved complex numbers).  This is
-   similar to calling pffft_transform and then pffft_zreorder.
-
-   input and output may alias.
-*/
+/**
+ * Similar to pffft_transform, but handles the complex values in the usual form
+ * (interleaved complex numbers). This is similar to calling
+ * pffft_transform(..., PFFFT_FORWARD) followed by
+ * pffft_zreorder(..., PFFFT_FORWARD), or
+ * pffft_zreorder(..., PFFFT_BACKWARD) followed by
+ * pffft_transform(..., PFFFT_BACKWARD), for the given direction.
+ *
+ * The input and output parameters may alias.
+ */
 void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
 
-/*
-   call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
-   PFFFT_FORWARD) if you want to have the frequency components in
-   the correct "canonical" order, as interleaved complex numbers.
-
-   (for real transforms, both 0-frequency and half frequency
-   components, which are real, are assembled in the first entry as
-   F(0)+i*F(n/2+1). Note that the original fftpack did place
-   F(n/2+1) at the end of the arrays).
-
-   input and output should not alias.
-*/
+/**
+ * Reorder the z-domain data. For PFFFT_FORWARD, it reorders from the internal
+ * representation to the "canonical" order (as interleaved complex numbers).
+ * For PFFFT_BACKWARD, it reorders from the canonical order to the internal
+ * order suitable for pffft_transform(..., PFFFT_BACKWARD) or
+ * pffft_zconvolve_accumulate.
+ *
+ * The input and output parameters should not alias.
+ */
 void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
 
-/*
-   Perform a multiplication of the frequency components of dft_a and
-   dft_b and accumulate them into dft_ab. The arrays should have
-   been obtained with pffft_transform(.., PFFFT_FORWARD) and should
-   *not* have been reordered with pffft_zreorder (otherwise just
-   perform the operation yourself as the dft coefs are stored as
-   interleaved complex numbers).
-
-   the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
-
-   The dft_a, dft_b and dft_ab pointers may alias.
-*/
+/**
+ * Perform a multiplication of the z-domain data in dft_a and dft_b and
+ * accumulate them into dft_ab. The arrays should have been obtained with
+ * pffft_transform(..., PFFFT_FORWARD) or pffft_zreorder(..., PFFFT_BACKWARD)
+ * and should *not* be in the usual order (otherwise just perform the operation
+ * yourself as the dft coeffs are stored as interleaved complex numbers).
+ *
+ * The operation performed is: dft_ab += (dft_a * dft_b)*scaling
+ *
+ * The dft_a, dft_b, and dft_ab parameters may alias.
+ */
 void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
 
-/*
-   the float buffers must have the correct alignment (16-byte boundary
-   on intel and powerpc). This function may be used to obtain such
-   correctly aligned buffers.
-*/
+/**
+ * The float buffers must have the correct alignment (16-byte boundary on intel
+ * and powerpc). This function may be used to obtain such correctly aligned
+ * buffers.
+ */
 void *pffft_aligned_malloc(size_t nb_bytes);
 void pffft_aligned_free(void *);
 
-/* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */
+/* Return 4 or 1 depending if vectorization was enable when building pffft.cpp. */
 int pffft_simd_size();
 
 #ifdef __cplusplus