From 2cca8c0d8b96e555c9a39b6d484ab87dd51ab9b1 Mon Sep 17 00:00:00 2001
From: randompast <matteroflight@gmail.com>
Date: Mon, 19 Oct 2020 00:01:51 -0400
Subject: [PATCH 1/5] Implemented convolve{1d2o,1d3o} mode='valid',
 shape={square,cube}, needed={tests,validations,benchmarks,optimizations}

---
 cpp/src/convolution/_convolution.cu           | 218 ++++++++++++++++++
 python/cusignal/__init__.py                   |   2 +
 python/cusignal/convolution/__init__.py       |   2 +
 .../cusignal/convolution/_convolution_cuda.py | 197 ++++++++++++++++
 python/cusignal/convolution/convolve.py       | 186 +++++++++++++++
 5 files changed, 605 insertions(+)
diff --git a/cpp/src/convolution/_convolution.cu b/cpp/src/convolution/_convolution.cu
index 9d6959c6..7dba54ce 100644
--- a/cpp/src/convolution/_convolution.cu
+++ b/cpp/src/convolution/_convolution.cu
@@ -576,3 +576,221 @@ extern "C" __global__ void __launch_bounds__( 256 )
                                   const int pick ) {
     _cupy_correlate2D<thrust::complex<double>>( inp, inpW, inpH, kernel, kerW, kerH, S0, S1, out, outW, outH, pick );
 }
+
+
+///////////////////////////////////////////////////////////////////////////////
+//                              CONVOLVE 1D2O                                //
+///////////////////////////////////////////////////////////////////////////////
+
+template<typename T>
+__device__ void _cupy_convolve1D2O( const T *__restrict__ inp,
+                                const int inpW,
+                                const T *__restrict__ kernel,
+                                const int  kerW,
+                                const int  kerH,
+                                const int  mode,
+                                T *__restrict__ out,
+                                const int outW ) {
+
+    const int tx { static_cast<int>( blockIdx.x * blockDim.x + threadIdx.x ) };
+    const int stride { static_cast<int>( blockDim.x * gridDim.x ) };
+
+    for ( int tid = tx; tid < outW; tid += stride ) {
+
+        T temp {};
+
+        if ( mode == 0 ) {  // Valid
+            if ( tid >= 0 && tid < inpW ) {
+                for ( int i = 0; i < kerW; i++ ) {
+                    for ( int j = 0; j < kerH; j++ ) {
+                        temp += inp[tid + kerW - i - 1] * inp[tid + kerH - j - 1] * kernel[ kerW * i + j];
+                    }
+                }
+            }
+        }
+        out[tid] = temp;
+    }
+
+}
+
+extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_int32( const int *__restrict__ inp,
+                                                                          const int inpW,
+                                                                          const int *__restrict__ kernel,
+                                                                          const int  kerW,
+                                                                          const int  kerH,
+                                                                          const int  mode,
+                                                                          int *__restrict__ out,
+                                                                          const int outW ) {
+    _cupy_convolve1D2O<int>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
+}
+
+extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_int64( const long int *__restrict__ inp,
+                                                                          const int inpW,
+                                                                          const long int *__restrict__ kernel,
+                                                                          const int  kerW,
+                                                                          const int  kerH,
+                                                                          const int  mode,
+                                                                          long int *__restrict__ out,
+                                                                          const int outW ) {
+    _cupy_convolve1D2O<long int>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
+}
+
+extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_float32( const float *__restrict__ inp,
+                                                                            const int inpW,
+                                                                            const float *__restrict__ kernel,
+                                                                            const int  kerW,
+                                                                            const int  kerH,
+                                                                            const int  mode,
+                                                                            float *__restrict__ out,
+                                                                            const int outW ) {
+    _cupy_convolve1D2O<float>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
+}
+
+extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_float64( const double *__restrict__ inp,
+                                                                            const int inpW,
+                                                                            const double *__restrict__ kernel,
+                                                                            const int  kerW,
+                                                                            const int  kerH,
+                                                                            const int  mode,
+                                                                            double *__restrict__ out,
+                                                                            const int outW ) {
+    _cupy_convolve1D2O<double>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
+}
+
+extern "C" __global__ void __launch_bounds__( 512 )
+    _cupy_convolve1D2O_complex64( thrust::complex<float> *__restrict__ inp,
+                              const int inpW,
+                              thrust::complex<float> *__restrict__ kernel,
+                              const int  kerW,
+                              const int  kerH,
+                              const int  mode,
+                              thrust::complex<float> *__restrict__ out,
+                              const int outW ) {
+    _cupy_convolve1D2O<thrust::complex<float>>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
+}
+
+extern "C" __global__ void __launch_bounds__( 512 )
+    _cupy_convolve1D2O_complex128( const thrust::complex<double> *__restrict__ inp,
+                               const int inpW,
+                               const thrust::complex<double> *__restrict__ kernel,
+                               const int  kerW,
+                               const int  kerH,
+                               const int  mode,
+                               thrust::complex<double> *__restrict__ out,
+                               const int outW ) {
+    _cupy_convolve1D2O<thrust::complex<double>>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//                              CONVOLVE 1D3O                                //
+///////////////////////////////////////////////////////////////////////////////
+
+template<typename T>
+__device__ void _cupy_convolve1D3O( const T *__restrict__ inp,
+                                const int inpW,
+                                const T *__restrict__ kernel,
+                                const int  kerW,
+                                const int  kerH,
+                                const int  kerD,
+                                const int  mode,
+                                T *__restrict__ out,
+                                const int outW ) {
+
+    const int tx { static_cast<int>( blockIdx.x * blockDim.x + threadIdx.x ) };
+    const int stride { static_cast<int>( blockDim.x * gridDim.x ) };
+
+    for ( int tid = tx; tid < outW; tid += stride ) {
+
+        T temp {};
+
+        if ( mode == 0 ) {  // Valid
+            if ( tid >= 0 && tid < inpW ) {
+                for ( int i = 0; i < kerW; i++ ) {
+                    for ( int j = 0; j < kerH; j++ ) {
+                        for ( int k = 0; k < kerD; k++ ) {
+                            temp += inp[tid + kerW - i - 1] * inp[tid + kerH - j - 1] * inp[tid + kerD - k - 1] * kernel[ (kerW * i + j) * kerH + k ];
+                        }
+                    }
+                }
+            }
+        }
+        out[tid] = temp;
+    }
+
+}
+
+extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_int32( const int *__restrict__ inp,
+                                                                          const int inpW,
+                                                                          const int *__restrict__ kernel,
+                                                                          const int  kerW,
+                                                                          const int  kerH,
+                                                                          const int  kerD,
+                                                                          const int  mode,
+                                                                          int *__restrict__ out,
+                                                                          const int outW ) {
+    _cupy_convolve1D3O<int>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
+}
+
+extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_int64( const long int *__restrict__ inp,
+                                                                          const int inpW,
+                                                                          const long int *__restrict__ kernel,
+                                                                          const int  kerW,
+                                                                          const int  kerH,
+                                                                          const int  kerD,
+                                                                          const int  mode,
+                                                                          long int *__restrict__ out,
+                                                                          const int outW ) {
+    _cupy_convolve1D3O<long int>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
+}
+
+extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_float32( const float *__restrict__ inp,
+                                                                            const int inpW,
+                                                                            const float *__restrict__ kernel,
+                                                                            const int  kerW,
+                                                                            const int  kerH,
+                                                                            const int  kerD,
+                                                                            const int  mode,
+                                                                            float *__restrict__ out,
+                                                                            const int outW ) {
+    _cupy_convolve1D3O<float>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
+}
+
+extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_float64( const double *__restrict__ inp,
+                                                                            const int inpW,
+                                                                            const double *__restrict__ kernel,
+                                                                            const int  kerW,
+                                                                            const int  kerH,
+                                                                            const int  kerD,
+                                                                            const int  mode,
+                                                                            double *__restrict__ out,
+                                                                            const int outW ) {
+    _cupy_convolve1D3O<double>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
+}
+
+extern "C" __global__ void __launch_bounds__( 512 )
+    _cupy_convolve1D3O_complex64( thrust::complex<float> *__restrict__ inp,
+                              const int inpW,
+                              thrust::complex<float> *__restrict__ kernel,
+                              const int  kerW,
+                              const int  kerH,
+                              const int  kerD,
+                              const int  mode,
+                              thrust::complex<float> *__restrict__ out,
+                              const int outW ) {
+    _cupy_convolve1D3O<thrust::complex<float>>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
+}
+
+extern "C" __global__ void __launch_bounds__( 512 )
+    _cupy_convolve1D3O_complex128( const thrust::complex<double> *__restrict__ inp,
+                               const int inpW,
+                               const thrust::complex<double> *__restrict__ kernel,
+                               const int  kerW,
+                               const int  kerH,
+                               const int  kerD,
+                               const int  mode,
+                               thrust::complex<double> *__restrict__ out,
+                               const int outW ) {
+    _cupy_convolve1D3O<thrust::complex<double>>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
+}
diff --git a/python/cusignal/__init__.py b/python/cusignal/__init__.py
index 6a1b4d3c..25dcb09a 100644
--- a/python/cusignal/__init__.py
+++ b/python/cusignal/__init__.py
@@ -40,6 +40,8 @@
     choose_conv_method,
     convolve,
     convolve2d,
+    convolve1d2o,
+    convolve1d3o,
 )
 from cusignal.filter_design.fir_filter_design import (
     kaiser_beta,
diff --git a/python/cusignal/convolution/__init__.py b/python/cusignal/convolution/__init__.py
index f46d4dce..c42f4229 100644
--- a/python/cusignal/convolution/__init__.py
+++ b/python/cusignal/convolution/__init__.py
@@ -16,5 +16,7 @@
     fftconvolve,
     convolve2d,
     choose_conv_method,
+    convolve1d2o,
+    convolve1d3o,
 )
 from cusignal.convolution.correlate import correlate, correlate2d
diff --git a/python/cusignal/convolution/_convolution_cuda.py b/python/cusignal/convolution/_convolution_cuda.py
index dff444cd..edf2baa9 100644
--- a/python/cusignal/convolution/_convolution_cuda.py
+++ b/python/cusignal/convolution/_convolution_cuda.py
@@ -73,6 +73,73 @@ def __call__(
         self.kernel(self.grid, self.block, kernel_args)
 
 
+class _cupy_convolve_1d2o_wrapper(object):
+    def __init__(self, grid, block, kernel):
+        if isinstance(grid, int):
+            grid = (grid,)
+        if isinstance(block, int):
+            block = (block,)
+
+        self.grid = grid
+        self.block = block
+        self.kernel = kernel
+
+    def __call__(
+        self,
+        d_inp,
+        d_kernel,
+        mode,
+        out,
+    ):
+
+        kernel_args = (
+            d_inp,
+            d_inp.shape[0],
+            d_kernel,
+            d_kernel.shape[0],
+            d_kernel.shape[1],
+            mode,
+            out,
+            out.shape[0],
+        )
+
+        self.kernel(self.grid, self.block, kernel_args)
+
+
+class _cupy_convolve_1d3o_wrapper(object):
+    def __init__(self, grid, block, kernel):
+        if isinstance(grid, int):
+            grid = (grid,)
+        if isinstance(block, int):
+            block = (block,)
+
+        self.grid = grid
+        self.block = block
+        self.kernel = kernel
+
+    def __call__(
+        self,
+        d_inp,
+        d_kernel,
+        mode,
+        out,
+    ):
+
+        kernel_args = (
+            d_inp,
+            d_inp.shape[0],
+            d_kernel,
+            d_kernel.shape[0],
+            d_kernel.shape[1],
+            d_kernel.shape[2],
+            mode,
+            out,
+            out.shape[0],
+        )
+
+        self.kernel(self.grid, self.block, kernel_args)
+
+
 class _cupy_convolve_2d_wrapper(object):
     def __init__(self, grid, block, kernel):
         if isinstance(grid, int):
@@ -145,6 +212,10 @@ def _get_backend_kernel(
             return _cupy_convolve_wrapper(grid, block, kernel)
         elif k_type == "convolve2D" or k_type == "correlate2D":
             return _cupy_convolve_2d_wrapper(grid, block, kernel)
+        elif k_type == "convolve1D2O":
+            return _cupy_convolve_1d2o_wrapper(grid, block, kernel)
+        elif k_type == "convolve1D3O":
+            return _cupy_convolve_1d3o_wrapper(grid, block, kernel)
         else:
             raise NotImplementedError(
                 "No CuPY kernel found for k_type {}, datatype {}".format(
@@ -439,3 +510,129 @@ def _convolve2d(in1, in2, use_convolve, mode, boundary, fillvalue):
     )
 
     return out
+
+
+def _convolve1d2o_gpu(
+    inp,
+    out,
+    ker,
+    mode,
+):
+
+    d_inp = cp.array(inp)
+    d_kernel = cp.array(ker)
+
+    threadsperblock, blockspergrid = _get_tpb_bpg()
+
+    k_type = "convolve1D2O"
+
+    _populate_kernel_cache(out.dtype, k_type)
+
+    kernel = _get_backend_kernel(
+        out.dtype,
+        blockspergrid,
+        threadsperblock,
+        k_type,
+    )
+
+    kernel(d_inp, d_kernel, mode, out)
+
+    _print_atts(kernel)
+
+    return out
+
+
+def _convolve1d2o(in1, in2, mode):
+
+    val = _valfrommode(mode)
+
+    # Promote inputs
+    promType = cp.promote_types(in1.dtype, in2.dtype)
+    in1 = in1.astype(promType)
+    in2 = in2.astype(promType)
+
+    # Create empty array to hold number of aout dimensions
+    out_dimens = np.empty(in1.ndim, np.int)
+    if val == VALID:
+        for i in range(in1.ndim):
+            out_dimens[i] = in1.shape[i] - in2.shape[i] + 1
+            if out_dimens[i] < 0:
+                raise Exception(
+                    "no part of the output is valid, use option 1 (same) or 2 \
+                     (full) for third argument"
+                )
+
+    # Create empty array out on GPU
+    out = cp.empty(out_dimens.tolist(), in1.dtype)
+
+    out = _convolve1d2o_gpu(
+        in1,
+        out,
+        in2,
+        val,
+    )
+
+    return out
+
+
+def _convolve1d3o_gpu(
+    inp,
+    out,
+    ker,
+    mode,
+):
+
+    d_inp = cp.array(inp)
+    d_kernel = cp.array(ker)
+
+    threadsperblock, blockspergrid = _get_tpb_bpg()
+
+    k_type = "convolve1D3O"
+
+    _populate_kernel_cache(out.dtype, k_type)
+
+    kernel = _get_backend_kernel(
+        out.dtype,
+        blockspergrid,
+        threadsperblock,
+        k_type,
+    )
+
+    kernel(d_inp, d_kernel, mode, out)
+
+    _print_atts(kernel)
+
+    return out
+
+
+def _convolve1d3o(in1, in2, mode):
+
+    val = _valfrommode(mode)
+
+    # Promote inputs
+    promType = cp.promote_types(in1.dtype, in2.dtype)
+    in1 = in1.astype(promType)
+    in2 = in2.astype(promType)
+
+    # Create empty array to hold number of aout dimensions
+    out_dimens = np.empty(in1.ndim, np.int)
+    if val == VALID:
+        for i in range(in1.ndim):
+            out_dimens[i] = in1.shape[i] - in2.shape[i] + 1
+            if out_dimens[i] < 0:
+                raise Exception(
+                    "no part of the output is valid, use option 1 (same) or 2 \
+                     (full) for third argument"
+                )
+
+    # Create empty array out on GPU
+    out = cp.empty(out_dimens.tolist(), in1.dtype)
+
+    out = _convolve1d3o_gpu(
+        in1,
+        out,
+        in2,
+        val,
+    )
+
+    return out
\ No newline at end of file
diff --git a/python/cusignal/convolution/convolve.py b/python/cusignal/convolution/convolve.py
index ad0fb571..3a80baa3 100644
--- a/python/cusignal/convolution/convolve.py
+++ b/python/cusignal/convolution/convolve.py
@@ -526,3 +526,189 @@ def choose_conv_method(in1, in2, mode="full", measure=False):
             return "fft"
 
     return "direct"
+
+
+def convolve1d2o(
+    in1,
+    in2,
+    mode="valid",
+    method="direct",
+):
+    """
+    Convolve a 1-dimensional arrays with a 2nd order filter.
+    This results in a second order convolution.
+
+    Convolve `in1` and `in2`, with the output size determined by the
+    `mode` argument.
+
+    Parameters
+    ----------
+    in1 : array_like
+        First input.
+    in2 : array_like
+        Second input. Should have the same number of dimensions as `in1`.
+    mode : str {'full', 'valid', 'same'}, optional
+        A string indicating the size of the output:
+
+        ``full``
+           The output is the full discrete linear convolution
+           of the inputs. (Default)
+        ``valid``
+           The output consists only of those elements that do not
+           rely on the zero-padding. In 'valid' mode, either `in1` or `in2`
+           must be at least as large as the other in every dimension.
+        ``same``
+           The output is the same size as `in1`, centered
+           with respect to the 'full' output.
+    method : str {'auto', 'direct', 'fft'}, optional
+        A string indicating which method to use to calculate the convolution.
+
+        ``direct``
+           The convolution is determined directly from sums, the definition of
+           convolution.
+        ``fft``
+           The Fourier Transform is used to perform the convolution by calling
+           `fftconvolve`.
+        ``auto``
+           Automatically chooses direct or Fourier method based on an estimate
+           of which is faster (default).
+
+    Returns
+    -------
+    out : ndarray
+        A 1-dimensional array containing a subset of the discrete linear
+        convolution of `in1` with `in2`.
+
+    See Also
+    --------
+    convolve
+    convolve1d2o
+    convolve1d3o
+    
+    Examples
+    --------
+    Convolution of a 2nd order filter on a 1d signal
+
+    >>> import cusignal as cs
+    >>> import numpy as np
+    >>> d = 50
+    >>> a = np.random.uniform(-1,1,(200))
+    >>> b = np.random.uniform(-1,1,(d,d))
+    >>> c = cs.convolve1d3o(a,b)
+
+    """
+
+    signal = cp.asarray(in1)
+    kernel = cp.asarray(in2)
+
+    if mode == "valid" and signal.shape[0] < kernel.shape[0]:
+        # Convolution is commutative
+        # order doesn't have any effect on output
+        signal, kernel = kernel, signal
+
+    if mode in ["same", "full"]:
+        raise NotImplementedError(
+            "Mode == {} not implemented".format(
+                mode
+            )
+        )
+
+    if method == "direct":
+        return _convolution_cuda._convolve1d2o(
+            signal, kernel, mode
+        )
+    else:
+        raise NotImplementedError("Only Direct method implemented")
+
+
+def convolve1d3o(
+    in1,
+    in2,
+    mode="valid",
+    method="direct",
+):
+    """
+    Convolve a 1-dimensional array with a 3rd order filter.
+    This results in a second order convolution.
+
+    Convolve `in1` and `in2`, with the output size determined by the
+    `mode` argument.
+
+    Parameters
+    ----------
+    in1 : array_like
+        First input.
+    in2 : array_like
+        Second input. Should have the same number of dimensions as `in1`.
+    mode : str {'full', 'valid', 'same'}, optional
+        A string indicating the size of the output:
+
+        ``full``
+           The output is the full discrete linear convolution
+           of the inputs. (Default)
+        ``valid``
+           The output consists only of those elements that do not
+           rely on the zero-padding. In 'valid' mode, either `in1` or `in2`
+           must be at least as large as the other in every dimension.
+        ``same``
+           The output is the same size as `in1`, centered
+           with respect to the 'full' output.
+    method : str {'auto', 'direct', 'fft'}, optional
+        A string indicating which method to use to calculate the convolution.
+
+        ``direct``
+           The convolution is determined directly from sums, the definition of
+           convolution.
+        ``fft``
+           The Fourier Transform is used to perform the convolution by calling
+           `fftconvolve`.
+        ``auto``
+           Automatically chooses direct or Fourier method based on an estimate
+           of which is faster (default).
+
+    Returns
+    -------
+    out : ndarray
+        A 1-dimensional array containing a subset of the discrete linear
+        convolution of `in1` with `in2`.
+
+    See Also
+    --------
+    convolve
+    convolve1d2o
+    convolve1d3o
+
+    Examples
+    --------
+    Convolution of a 3rd order filter on a 1d signal
+
+    >>> import cusignal as cs
+    >>> import numpy as np
+    >>> d = 50
+    >>> a = np.random.uniform(-1,1,(200))
+    >>> b = np.random.uniform(-1,1,(d,d,d))
+    >>> c = cs.convolve1d3o(a,b)
+
+    """
+
+    signal = cp.asarray(in1)
+    kernel = cp.asarray(in2)
+
+    if mode == "valid" and signal.shape[0] < kernel.shape[0]:
+        # Convolution is commutative
+        # order doesn't have any effect on output
+        signal, kernel = kernel, signal
+
+    if mode in ["same", "full"]:
+        raise NotImplementedError(
+            "Mode == {} not implemented".format(
+                mode
+            )
+        )
+
+    if method == "direct":
+        return _convolution_cuda._convolve1d3o(
+            signal, kernel, mode
+        )
+    else:
+        raise NotImplementedError("Only Direct method implemented")

From 87cdfea5f091d99c5b59352363d8de42267d19b6 Mon Sep 17 00:00:00 2001
From: randompast <matteroflight@gmail.com>
Date: Sat, 24 Oct 2020 01:01:50 -0400
Subject: [PATCH 2/5] changed changedlog.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index db633072..1e72448e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## New Features
 - PR #241 - Add inverse_complex_cepstrum and minimum_phase to acoustics module
+- PR #270 - Add convolve1d2o and convolve1d3o
 
 
 ## Improvements

From b8869d36c7b42a217c0f35c1bc9a9d03e3e11dd1 Mon Sep 17 00:00:00 2001
From: randompast <matteroflight@gmail.com>
Date: Sat, 24 Oct 2020 01:05:07 -0400
Subject: [PATCH 3/5] improved #270 changelog.md message

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1e72448e..c7d5ca54 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,7 @@
 
 ## New Features
 - PR #241 - Add inverse_complex_cepstrum and minimum_phase to acoustics module
-- PR #270 - Add convolve1d2o and convolve1d3o
+- PR #270 - Add second and third order convolutions as convolve1d2o and convolve1d3o 
 
 
 ## Improvements

From efe63ab523c50b579bb8ea06127e66b5c82dab7e Mon Sep 17 00:00:00 2001
From: BradReesWork <BradReesWork@users.noreply.github.com>
Date: Wed, 18 Nov 2020 09:12:16 -0500
Subject: [PATCH 4/5] style fix

---
 python/cusignal/convolution/_convolution_cuda.py | 3 ++-
 python/cusignal/convolution/convolve.py          | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/cusignal/convolution/_convolution_cuda.py b/python/cusignal/convolution/_convolution_cuda.py
index edf2baa9..8fa88228 100644
--- a/python/cusignal/convolution/_convolution_cuda.py
+++ b/python/cusignal/convolution/_convolution_cuda.py
@@ -635,4 +635,5 @@ def _convolve1d3o(in1, in2, mode):
         val,
     )
 
-    return out
\ No newline at end of file
+    return out
+
diff --git a/python/cusignal/convolution/convolve.py b/python/cusignal/convolution/convolve.py
index 3a80baa3..68c1fa2c 100644
--- a/python/cusignal/convolution/convolve.py
+++ b/python/cusignal/convolution/convolve.py
@@ -584,7 +584,7 @@ def convolve1d2o(
     convolve
     convolve1d2o
     convolve1d3o
-    
+
     Examples
     --------
     Convolution of a 2nd order filter on a 1d signal

From ef3756db7ae31a1cd24d92a0bc7f83468af34286 Mon Sep 17 00:00:00 2001
From: BradReesWork <BradReesWork@users.noreply.github.com>
Date: Wed, 18 Nov 2020 09:14:18 -0500
Subject: [PATCH 5/5] style

---
 python/cusignal/convolution/_convolution_cuda.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cusignal/convolution/_convolution_cuda.py b/python/cusignal/convolution/_convolution_cuda.py
index 8fa88228..501cc6ce 100644
--- a/python/cusignal/convolution/_convolution_cuda.py
+++ b/python/cusignal/convolution/_convolution_cuda.py
@@ -636,4 +636,3 @@ def _convolve1d3o(in1, in2, mode):
     )
 
     return out
-