Skip to content
This repository has been archived by the owner on Sep 25, 2023. It is now read-only.

[RELEASE] cusignal v0.17 #291

Merged
merged 41 commits into from
Dec 10, 2020
Merged
Changes from 7 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
b5b1c10
DOC v0.17 Updates
raydouglass Oct 2, 2020
50343a4
Merge pull request #255 from rapidsai/branch-0.16
GPUtester Oct 5, 2020
85a4404
Merge pull request #257 from rapidsai/branch-0.16
GPUtester Oct 6, 2020
0d478f9
Merge pull request #261 from rapidsai/branch-0.16
GPUtester Oct 8, 2020
4cb88ac
Merge pull request #262 from rapidsai/branch-0.16
GPUtester Oct 8, 2020
8a41b30
Merge pull request #265 from rapidsai/branch-0.16
GPUtester Oct 9, 2020
26b6584
Merge pull request #266 from rapidsai/branch-0.16
GPUtester Oct 9, 2020
d54e18c
[REVIEW] Add inverse_complex_cepstrum and minimum_phase to acoustics …
z-ryan1 Oct 13, 2020
30f2fa9
[REVIEW] Various optimization across all functions (#267)
mnicely Oct 16, 2020
2cca8c0
Implemented convolve{1d2o,1d3o} mode='valid', shape={square,cube}, ne…
randompast Oct 19, 2020
e813ad8
[REVIEW] ENH Increase robustness for testing (#271)
mnicely Oct 19, 2020
84d991c
[REVIEW] BUG Fix gausspulse (False, False) (#272)
mnicely Oct 19, 2020
58d6c49
Initial push
mnicely Oct 21, 2020
e87a3a9
Update CHANGELOG.md
mnicely Oct 21, 2020
a545fbc
ENH Improve gpuCI scripts
dillon-cullinan Oct 22, 2020
5505862
Add nightly benchmarks, add CC 8.6, and gpuci_logger
mnicely Oct 23, 2020
b70d52b
Code removal per AJ's comments
mnicely Oct 23, 2020
509942b
Added benchmarks dir
mnicely Oct 23, 2020
87cdfea
changed changedlog.md
randompast Oct 24, 2020
b8869d3
improved #270 changelog.md message
randompast Oct 24, 2020
49e39d6
DOC Update changelog
dillon-cullinan Oct 26, 2020
73fda1e
FIX Remove unnecessary conda install
dillon-cullinan Oct 26, 2020
bbd4440
Update ci/docs/build.sh
ajschmidt8 Oct 28, 2020
94d8b94
Update CHANGELOG.md (#277)
BradReesWork Oct 28, 2020
ed24aba
Merge remote-tracking branch 'upstream/branch-0.16' into branch-0.17-…
rlratzel Oct 29, 2020
1e174f0
Merge pull request #278 from rlratzel/branch-0.17-merge-0.16
BradReesWork Oct 29, 2020
acbd416
Merge pull request #275 from dillon-cullinan/enh-gpuci
dillon-cullinan Nov 6, 2020
9e59dd9
Update readme to reflect new version numbers
awthomp Nov 9, 2020
b5f0548
Update conda upload script (#281)
ajschmidt8 Nov 10, 2020
cf169b7
Merge branch 'branch-0.17' into readme_update
BradReesWork Nov 11, 2020
a3e5293
Merge pull request #282 from awthomp/readme_update
BradReesWork Nov 11, 2020
968c9de
Merge branch-0.17
mnicely Nov 11, 2020
efe63ab
style fix
BradReesWork Nov 18, 2020
ef3756d
style
BradReesWork Nov 18, 2020
53bdc01
Merge pull request #270 from randompast/convolve_1d_2o3o
BradReesWork Nov 18, 2020
34a6d1f
Merge branch 'branch-0.17' into add_nightly_benchmarks
BradReesWork Nov 18, 2020
83b6eee
Merge pull request #274 from mnicely/add_nightly_benchmarks
BradReesWork Nov 18, 2020
dbe50a4
[skip-ci] Add `-y` flag to conda install in E2E_Example Notebook (#286)
ajschmidt8 Nov 24, 2020
d6175e1
Gpuciscripts clean and update (#283)
msadang Dec 4, 2020
04de176
[REVIEW] Fixing issue when reading sigmf data from parent folder (#280)
drabastomek Dec 5, 2020
44e57f5
Update CHANGELOG.md
raydouglass Dec 10, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@

## New Features
- PR #241 - Add inverse_complex_cepstrum and minimum_phase to acoustics module
- PR #270 - Add second and third order convolutions as convolve1d2o and convolve1d3o
- PR #274 - Add nightly benchmarks to CI

## Improvements
218 changes: 218 additions & 0 deletions cpp/src/convolution/_convolution.cu
Original file line number Diff line number Diff line change
@@ -576,3 +576,221 @@ extern "C" __global__ void __launch_bounds__( 256 )
const int pick ) {
_cupy_correlate2D<thrust::complex<double>>( inp, inpW, inpH, kernel, kerW, kerH, S0, S1, out, outW, outH, pick );
}


///////////////////////////////////////////////////////////////////////////////
// CONVOLVE 1D2O //
///////////////////////////////////////////////////////////////////////////////

template<typename T>
__device__ void _cupy_convolve1D2O( const T *__restrict__ inp,
const int inpW,
const T *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
T *__restrict__ out,
const int outW ) {

const int tx { static_cast<int>( blockIdx.x * blockDim.x + threadIdx.x ) };
const int stride { static_cast<int>( blockDim.x * gridDim.x ) };

for ( int tid = tx; tid < outW; tid += stride ) {

T temp {};

if ( mode == 0 ) { // Valid
if ( tid >= 0 && tid < inpW ) {
for ( int i = 0; i < kerW; i++ ) {
for ( int j = 0; j < kerH; j++ ) {
temp += inp[tid + kerW - i - 1] * inp[tid + kerH - j - 1] * kernel[ kerW * i + j];
}
}
}
}
out[tid] = temp;
}

}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_int32( const int *__restrict__ inp,
const int inpW,
const int *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
int *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<int>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_int64( const long int *__restrict__ inp,
const int inpW,
const long int *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
long int *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<long int>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_float32( const float *__restrict__ inp,
const int inpW,
const float *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
float *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<float>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D2O_float64( const double *__restrict__ inp,
const int inpW,
const double *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
double *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<double>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 )
_cupy_convolve1D2O_complex64( thrust::complex<float> *__restrict__ inp,
const int inpW,
thrust::complex<float> *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
thrust::complex<float> *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<thrust::complex<float>>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 )
_cupy_convolve1D2O_complex128( const thrust::complex<double> *__restrict__ inp,
const int inpW,
const thrust::complex<double> *__restrict__ kernel,
const int kerW,
const int kerH,
const int mode,
thrust::complex<double> *__restrict__ out,
const int outW ) {
_cupy_convolve1D2O<thrust::complex<double>>( inp, inpW, kernel, kerW, kerH, mode, out, outW );
}



///////////////////////////////////////////////////////////////////////////////
// CONVOLVE 1D3O //
///////////////////////////////////////////////////////////////////////////////

template<typename T>
__device__ void _cupy_convolve1D3O( const T *__restrict__ inp,
const int inpW,
const T *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
T *__restrict__ out,
const int outW ) {

const int tx { static_cast<int>( blockIdx.x * blockDim.x + threadIdx.x ) };
const int stride { static_cast<int>( blockDim.x * gridDim.x ) };

for ( int tid = tx; tid < outW; tid += stride ) {

T temp {};

if ( mode == 0 ) { // Valid
if ( tid >= 0 && tid < inpW ) {
for ( int i = 0; i < kerW; i++ ) {
for ( int j = 0; j < kerH; j++ ) {
for ( int k = 0; k < kerD; k++ ) {
temp += inp[tid + kerW - i - 1] * inp[tid + kerH - j - 1] * inp[tid + kerD - k - 1] * kernel[ (kerW * i + j) * kerH + k ];
}
}
}
}
}
out[tid] = temp;
}

}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_int32( const int *__restrict__ inp,
const int inpW,
const int *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
int *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<int>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_int64( const long int *__restrict__ inp,
const int inpW,
const long int *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
long int *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<long int>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_float32( const float *__restrict__ inp,
const int inpW,
const float *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
float *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<float>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 ) _cupy_convolve1D3O_float64( const double *__restrict__ inp,
const int inpW,
const double *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
double *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<double>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 )
_cupy_convolve1D3O_complex64( thrust::complex<float> *__restrict__ inp,
const int inpW,
thrust::complex<float> *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
thrust::complex<float> *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<thrust::complex<float>>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}

extern "C" __global__ void __launch_bounds__( 512 )
_cupy_convolve1D3O_complex128( const thrust::complex<double> *__restrict__ inp,
const int inpW,
const thrust::complex<double> *__restrict__ kernel,
const int kerW,
const int kerH,
const int kerD,
const int mode,
thrust::complex<double> *__restrict__ out,
const int outW ) {
_cupy_convolve1D3O<thrust::complex<double>>( inp, inpW, kernel, kerW, kerH, kerD, mode, out, outW );
}
2 changes: 2 additions & 0 deletions python/cusignal/__init__.py
Original file line number Diff line number Diff line change
@@ -40,6 +40,8 @@
choose_conv_method,
convolve,
convolve2d,
convolve1d2o,
convolve1d3o,
)
from cusignal.filter_design.fir_filter_design import (
kaiser_beta,
2 changes: 2 additions & 0 deletions python/cusignal/convolution/__init__.py
Original file line number Diff line number Diff line change
@@ -16,5 +16,7 @@
fftconvolve,
convolve2d,
choose_conv_method,
convolve1d2o,
convolve1d3o,
)
from cusignal.convolution.correlate import correlate, correlate2d
Loading