Skip to content
This repository has been archived by the owner on Sep 25, 2023. It is now read-only.

Commit

Permalink
Merge pull request #270 from randompast/convolve_1d_2o3o
Browse files Browse the repository at this point in the history
[REVIEW] Implemented convolve1d{2/3}o 'valid', needed={tests,benchmarks,optimizations}
  • Loading branch information
BradReesWork authored Nov 18, 2020
2 parents a3e5293 + ef3756d commit 53bdc01
Show file tree
Hide file tree
Showing 6 changed files with 606 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## New Features
- PR #241 - Add inverse_complex_cepstrum and minimum_phase to acoustics module
- PR #270 - Add second and third order convolutions as convolve1d2o and convolve1d3o

## Improvements
- PR #267 - Various optimization across all functions
Expand Down
218 changes: 218 additions & 0 deletions cpp/src/convolution/_convolution.cu
Original file line number Diff line number Diff line change
Expand Up @@ -576,3 +576,221 @@ extern "C" __global__ void __launch_bounds__( 256 )
const int pick ) {
_cupy_correlate2D<thrust::complex<double>>( inp, inpW, inpH, kernel, kerW, kerH, S0, S1, out, outW, outH, pick );
}


///////////////////////////////////////////////////////////////////////////////
// CONVOLVE 1D2O //
///////////////////////////////////////////////////////////////////////////////

template<typename T>
__device__ void _cupy_convolve1D2O( const T *__restrict__ inp,
const int inpW,
const T *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
T *__restrict__ out,
const int outW ) {

const int tx { static_cast<int>( blockIdx.x * blockDim.x + threadIdx.x ) };
const int stride { static_cast<int>( blockDim.x * gridDim.x ) };

for ( int tid = tx; tid < outW; tid += stride ) {

T temp {};

if ( mode == 0 ) { // Valid
if ( tid >= 0 && tid < inpW ) {
for ( int i = 0; i < kerW; i++ ) {
for ( int j = 0; j < kerH; j++ ) {
temp += inp[tid + kerW - i - 1] * inp[tid + kerH - j - 1] * kernel[ kerW * i + j];
}
}
}
}
out[tid] = temp;
}

}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_int32( const int *__restrict__ inp,
const int inpW,
const int *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
int *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<int>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_int64( const long int *__restrict__ inp,
const int inpW,
const long int *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
long int *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<long int>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_float32( const float *__restrict__ inp,
const int inpW,
const float *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
float *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<float>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_float64( const double *__restrict__ inp,
const int inpW,
const double *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
double *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<double>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 )
_cupy_convolve1D2O_complex64( thrust::complex<float> *__restrict__ inp,
const int inpW,
thrust::complex<float> *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
thrust::complex<float> *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<thrust::complex<float>>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 )
_cupy_convolve1D2O_complex128( const thrust::complex<double> *__restrict__ inp,
const int inpW,
const thrust::complex<double> *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
thrust::complex<double> *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<thrust::complex<double>>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}



///////////////////////////////////////////////////////////////////////////////
// CONVOLVE 1D3O //
///////////////////////////////////////////////////////////////////////////////

template<typename T>
__device__ void _cupy_convolve1D3O( const T *__restrict__ inp,
const int inpW,
const T *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
T *__restrict__ out,
const int outW ) {

const int tx { static_cast<int>( blockIdx.x * blockDim.x + threadIdx.x ) };
const int stride { static_cast<int>( blockDim.x * gridDim.x ) };

for ( int tid = tx; tid < outW; tid += stride ) {

T temp {};

if ( mode == 0 ) { // Valid
if ( tid >= 0 && tid < inpW ) {
for ( int i = 0; i < kerW; i++ ) {
for ( int j = 0; j < kerH; j++ ) {
for ( int k = 0; k < kerD; k++ ) {
temp += inp[tid + kerW - i - 1] * inp[tid + kerH - j - 1] * inp[tid + kerD - k - 1] * kernel[ (kerW * i + j) * kerH + k ];
}
}
}
}
}
out[tid] = temp;
}

}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_int32( const int *__restrict__ inp,
const int inpW,
const int *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
int *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<int>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_int64( const long int *__restrict__ inp,
const int inpW,
const long int *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
long int *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<long int>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_float32( const float *__restrict__ inp,
const int inpW,
const float *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
float *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<float>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_float64( const double *__restrict__ inp,
const int inpW,
const double *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
double *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<double>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 )
_cupy_convolve1D3O_complex64( thrust::complex<float> *__restrict__ inp,
const int inpW,
thrust::complex<float> *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
thrust::complex<float> *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<thrust::complex<float>>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 )
_cupy_convolve1D3O_complex128( const thrust::complex<double> *__restrict__ inp,
const int inpW,
const thrust::complex<double> *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
thrust::complex<double> *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<thrust::complex<double>>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}
2 changes: 2 additions & 0 deletions python/cusignal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
choose_conv_method,
convolve,
convolve2d,
convolve1d2o,
convolve1d3o,
)
from cusignal.filter_design.fir_filter_design import (
kaiser_beta,
Expand Down
2 changes: 2 additions & 0 deletions python/cusignal/convolution/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,7 @@
fftconvolve,
convolve2d,
choose_conv_method,
convolve1d2o,
convolve1d3o,
)
from cusignal.convolution.correlate import correlate, correlate2d
Loading

0 comments on commit 53bdc01

Please sign in to comment.