-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathcuspreadinterp.h
340 lines (315 loc) · 15.2 KB
/
cuspreadinterp.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
#ifndef __CUSPREADINTERP_H__
#define __CUSPREADINTERP_H__
#include <cufinufft_eitherprec.h>
static __forceinline__ __device__
FLT evaluate_kernel(FLT x, FLT es_c, FLT es_beta, int ns)
/* ES ("exp sqrt") kernel evaluation at single real argument:
phi(x) = exp(beta.sqrt(1 - (2x/n_s)^2)), for |x| < nspread/2
related to an asymptotic approximation to the Kaiser--Bessel, itself an
approximation to prolate spheroidal wavefunction (PSWF) of order 0.
This is the "reference implementation", used by eg common/onedim_*
2/17/17 */
{
return abs(x) < ns/2.0 ? exp(es_beta * (sqrt(1.0 - es_c*x*x))) : 0.0;
}
static __inline__ __device__
void eval_kernel_vec_Horner(FLT *ker, const FLT x, const int w,
const double upsampfac)
/* Fill ker[] with Horner piecewise poly approx to [-w/2,w/2] ES kernel eval at
x_j = x + j, for j=0,..,w-1. Thus x in [-w/2,-w/2+1]. w is aka ns.
This is the current evaluation method, since it's faster (except i7 w=16).
Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
{
FLT z = 2*x + w - 1.0; // scale so local grid offset z in [-1,1]
// insert the auto-generated code which expects z, w args, writes to ker...
if (upsampfac==2.0) { // floating point equality is fine here
#include "../contrib/ker_horner_allw_loop.c"
}
}
static __inline__ __device__
void eval_kernel_vec(FLT *ker, const FLT x, const double w, const double es_c,
const double es_beta)
{
for(int i=0; i<w; i++){
ker[i] = evaluate_kernel(abs(x+i), es_c, es_beta, w);
}
}
//Kernels for 1D codes
/* -----------------------------Spreading Kernels-----------------------------*/
/* Kernels for NUptsdriven Method */
__global__
void Spread_1d_NUptsdriven(FLT *x, CUCPX *c, CUCPX *fw, int M, const int ns,
int nf1, FLT es_c, FLT es_beta, int* idxnupts, int pirange);
__global__
void Spread_1d_NUptsdriven_Horner(FLT *x, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, FLT sigma, int* idxnupts, int pirange);
/* Kernels for SubProb Method */
// SubProb properties
__global__
void CalcBinSize_noghost_1d(int M, int nf1, int bin_size_x,
int nbinx, int* bin_size, FLT *x, int* sortidx, int pirange);
__global__
void CalcInvertofGlobalSortIdx_1d(int M, int bin_size_x, int nbinx,
int* bin_startpts, int* sortidx,FLT *x, int* index, int pirange, int nf1);
// Main Spreading Kernel
__global__
void Spread_1d_Subprob(FLT *x, CUCPX *c, CUCPX *fw, int M, const int ns,
int nf1, FLT es_c, FLT es_beta, FLT sigma, int* binstartpts,
int* bin_size, int bin_size_x, int* subprob_to_bin,
int* subprobstartpts, int* numsubprob, int maxsubprobsize, int nbinx,
int* idxnupts, int pirange);
__global__
void Spread_1d_Subprob_Horner(FLT *x, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, FLT sigma, int* binstartpts,
int* bin_size, int bin_size_x, int* subprob_to_bin,
int* subprobstartpts, int* numsubprob, int maxsubprobsize, int nbinx,
int* idxnupts, int pirange);
/* ---------------------------Interpolation Kernels---------------------------*/
/* Kernels for NUptsdriven Method */
__global__
void Interp_1d_NUptsdriven(FLT *x, CUCPX *c, CUCPX *fw, int M, const int ns,
int nf2, FLT es_c, FLT es_beta, int *idxnupts, int pirange);
__global__
void Interp_1d_NUptsdriven_Horner(FLT *x, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf2, FLT sigma, int *idxnupts, int pirange);
//Kernels for 2D codes
/* -----------------------------Spreading Kernels-----------------------------*/
/* Kernels for NUptsdriven Method */
__global__
void Spread_2d_NUptsdriven(FLT *x, FLT *y, CUCPX *c, CUCPX *fw, int M, const int ns,
int nf1, int nf2, FLT es_c, FLT es_beta, int* idxnupts, int pirange);
__global__
void Spread_2d_NUptsdriven_Horner(FLT *x, FLT *y, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, FLT sigma, int* idxnupts, int pirange);
/* Kernels for SubProb Method */
// SubProb properties
__global__
void CalcBinSize_noghost_2d(int M, int nf1, int nf2, int bin_size_x,
int bin_size_y, int nbinx,int nbiny, int* bin_size, FLT *x, FLT *y,
int* sortidx, int pirange);
__global__
void CalcInvertofGlobalSortIdx_2d(int M, int bin_size_x, int bin_size_y,
int nbinx,int nbiny, int* bin_startpts, int* sortidx,FLT *x, FLT *y,
int* index, int pirange, int nf1, int nf2);
// Main Spreading Kernel
__global__
void Spread_2d_Subprob(FLT *x, FLT *y, CUCPX *c, CUCPX *fw, int M, const int ns,
int nf1, int nf2, FLT es_c, FLT es_beta, FLT sigma, int* binstartpts,
int* bin_size, int bin_size_x, int bin_size_y, int* subprob_to_bin,
int* subprobstartpts, int* numsubprob, int maxsubprobsize, int nbinx,
int nbiny,int* idxnupts, int pirange);
__global__
void Spread_2d_Subprob_Horner(FLT *x, FLT *y, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, FLT sigma, int* binstartpts,
int* bin_size, int bin_size_x, int bin_size_y, int* subprob_to_bin,
int* subprobstartpts, int* numsubprob, int maxsubprobsize, int nbinx,
int nbiny,int* idxnupts, int pirange);
/* Kernels for Paul's Method */
__global__
void LocateFineGridPos_Paul(int M, int nf1, int nf2, int bin_size_x, int bin_size_y,
int nbinx, int nbiny, int* bin_size, int ns, FLT *x, FLT *y,
int* sortidx, int* finegridsize, int pirange);
__global__
void CalcInvertofGlobalSortIdx_Paul(int nf1, int nf2, int M, int bin_size_x,
int bin_size_y, int nbinx,int nbiny, int ns, FLT *x, FLT *y,
int* finegridstartpts, int* sortidx, int* index, int pirange);
__global__
void Spread_2d_Subprob_Paul(FLT *x, FLT *y, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, FLT es_c, FLT es_beta, FLT sigma,
int* binstartpts, int* bin_size, int bin_size_x, int bin_size_y,
int* subprob_to_bin, int* subprobstartpts, int* numsubprob,
int maxsubprobsize, int nbinx, int nbiny, int* idxnupts, int* fgstartpts,
int* finegridsize, int pirange);
/* ---------------------------Interpolation Kernels---------------------------*/
/* Kernels for NUptsdriven Method */
__global__
void Interp_2d_NUptsdriven(FLT *x, FLT *y, CUCPX *c, CUCPX *fw, int M, const int ns,
int nf1, int nf2, FLT es_c, FLT es_beta, int *idxnupts, int pirange);
__global__
void Interp_2d_NUptsdriven_Horner(FLT *x, FLT *y, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, FLT sigma, int *idxnupts, int pirange);
/* Kernels for Subprob Method */
__global__
void Interp_2d_Subprob(FLT *x, FLT *y, CUCPX *c, CUCPX *fw, int M, const int ns,
int nf1, int nf2, FLT es_c, FLT es_beta, FLT sigma, int* binstartpts,
int* bin_size, int bin_size_x, int bin_size_y, int* subprob_to_bin,
int* subprobstartpts, int* numsubprob, int maxsubprobsize, int nbinx,
int nbiny, int* idxnupts, int pirange);
__global__
void Interp_2d_Subprob_Horner(FLT *x, FLT *y, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, FLT sigma, int* binstartpts,
int* bin_size, int bin_size_x, int bin_size_y, int* subprob_to_bin,
int* subprobstartpts, int* numsubprob, int maxsubprobsize, int nbinx,
int nbiny, int* idxnupts, int pirange);
//Kernels for 3D codes
/* -----------------------------Spreading Kernels-----------------------------*/
/* Kernels for Bin Sort NUpts */
__global__
void CalcBinSize_noghost_3d(int M, int nf1, int nf2, int nf3, int bin_size_x,
int bin_size_y, int bin_size_z, int nbinx, int nbiny, int nbinz,
int* bin_size, FLT *x, FLT *y, FLT *z, int* sortidx, int pirange);
__global__
void CalcInvertofGlobalSortIdx_3d(int M, int bin_size_x, int bin_size_y,
int bin_size_z, int nbinx, int nbiny, int nbinz, int* bin_startpts,
int* sortidx, FLT *x, FLT *y, FLT *z, int* index, int pirange, int nf1,
int nf2, int nf3);
__global__
void TrivialGlobalSortIdx_3d(int M, int* index);
/* Kernels for NUptsdriven Method */
__global__
void Spread_3d_NUptsdriven_Horner(FLT *x, FLT *y, FLT *z, CUCPX *c, CUCPX *fw,
int M, const int ns, int nf1, int nf2, int nf3, FLT sigma, int* idxnupts,
int pirange);
__global__
void Spread_3d_NUptsdriven(FLT *x, FLT *y, FLT *z, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, int nf3, FLT es_c, FLT es_beta,
int* idxnupts, int pirange);
/* Kernels for Subprob Method */
__global__
void CalcSubProb_3d_v2(int* bin_size, int* num_subprob, int maxsubprobsize,
int numbins);
__global__
void MapBintoSubProb_3d_v2(int* d_subprob_to_bin,int* d_subprobstartpts,
int* d_numsubprob,int numbins);
__global__
void Spread_3d_Subprob_Horner(FLT *x, FLT *y, FLT *z, CUCPX *c, CUCPX *fw,
int M, const int ns, int nf1, int nf2, int nf3, FLT sigma, int* binstartpts,
int* bin_size, int bin_size_x, int bin_size_y, int bin_size_z,
int* subprob_to_bin, int* subprobstartpts, int* numsubprob,
int maxsubprobsize, int nbinx, int nbiny, int nbinz, int* idxnupts,
int pirange);
__global__
void Spread_3d_Subprob(FLT *x, FLT *y, FLT *z, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, int nf3, FLT es_c, FLT es_beta,
int* binstartpts,int* bin_size, int bin_size_x, int bin_size_y,
int bin_size_z, int* subprob_to_bin, int* subprobstartpts, int* numsubprob,
int maxsubprobsize, int nbinx, int nbiny, int nbinz, int* idxnupts,
int pirange);
/* Kernels for Block BlockGather Method */
__global__
void LocateNUptstoBins_ghost(int M, int bin_size_x,
int bin_size_y, int bin_size_z, int nbinx, int nbiny, int nbinz,
int binsperobinx, int binsperobiny, int binsperobinz, int* bin_size,
FLT *x, FLT *y, FLT *z, int* sortidx, int pirange, int nf1, int nf2,
int nf3);
__global__
void Temp(int binsperobinx, int binsperobiny, int binsperobinz,
int nbinx, int nbiny, int nbinz, int* binsize);
__global__
void FillGhostBins(int binsperobinx, int binsperobiny, int binsperobinz,
int nbinx, int nbiny, int nbinz, int* binsize);
__global__
void CalcInvertofGlobalSortIdx_ghost(int M, int bin_size_x,
int bin_size_y, int bin_size_z, int nbinx, int nbiny, int nbinz,
int binsperobinx, int binsperobiny, int binsperobinz, int* bin_startpts,
int* sortidx, FLT *x, FLT *y, FLT *z, int* index, int pirange,
int nf1, int nf2, int nf3);
__global__
void GhostBinPtsIdx(int binsperobinx, int binsperobiny, int binsperobinz,
int nbinx, int nbiny, int nbinz, int* binsize, int* index,
int* bin_startpts, int M);
__global__
void CalcSubProb_3d_v1(int binsperobinx, int binsperobiny, int binsperobinz,
int* bin_size, int* num_subprob, int maxsubprobsize, int numbins);
__global__
void MapBintoSubProb_3d_v1(int* d_subprob_to_obin, int* d_subprobstartpts,
int* d_numsubprob,int numbins);
__global__
void Spread_3d_BlockGather(FLT *x, FLT *y, FLT *z, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, int nf3, FLT es_c, FLT es_beta, FLT sigma,
int* binstartpts, int obin_size_x, int obin_size_y, int obin_size_z,
int binsperobin, int* subprob_to_bin, int* subprobstartpts,
int maxsubprobsize, int nobinx, int nobiny, int nobinz, int* idxnupts,
int pirange);
__global__
void Spread_3d_BlockGather_Horner(FLT *x, FLT *y, FLT *z, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, int nf3, FLT es_c, FLT es_beta, FLT sigma,
int* binstartpts, int obin_size_x, int obin_size_y, int obin_size_z,
int binsperobin, int* subprob_to_bin, int* subprobstartpts,
int maxsubprobsize, int nobinx, int nobiny, int nobinz, int* idxnupts,
int pirange);
/* -----------------------------Spreading Kernels-----------------------------*/
/* Kernels for NUptsdriven Method */
__global__
void Interp_3d_NUptsdriven_Horner(FLT *x, FLT *y, FLT *z, CUCPX *c, CUCPX *fw,
int M, const int ns, int nf1, int nf2, int nf3, FLT sigma, int* idxnupts,
int pirange);
__global__
void Interp_3d_NUptsdriven(FLT *x, FLT *y, FLT *z, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, int nf3, FLT es_c, FLT es_beta,
int* idxnupts, int pirange);
/* Kernels for Subprob Method */
__global__
void Interp_3d_Subprob_Horner(FLT *x, FLT *y, FLT *z, CUCPX *c, CUCPX *fw, int M,
const int ns, int nf1, int nf2, int nf3, FLT sigma, int* binstartpts,
int* bin_size, int bin_size_x, int bin_size_y, int bin_size_z,
int* subprob_to_bin, int* subprobstartpts, int* numsubprob,
int maxsubprobsize, int nbinx, int nbiny, int nbinz, int* idxnupts,
int pirange);
__global__
void Interp_3d_Subprob(FLT *x, FLT *y, FLT *z, CUCPX *c, CUCPX *fw,
int M, const int ns, int nf1, int nf2, int nf3, FLT es_c, FLT es_beta,
int* binstartpts, int* bin_size, int bin_size_x, int bin_size_y,
int bin_size_z, int* subprob_to_bin, int* subprobstartpts, int* numsubprob,
int maxsubprobsize, int nbinx, int nbiny, int nbinz, int* idxnupts,
int pirange);
/* C wrapper for calling CUDA kernels */
// Wrapper for testing spread, interpolation only
int CUFINUFFT_SPREAD1D(int nf1, CUCPX* d_fw, int M,
FLT *d_kx, CUCPX* d_c, CUFINUFFT_PLAN d_plan);
int CUFINUFFT_INTERP1D(int nf1, CUCPX* d_fw, int M,
FLT *d_kx, CUCPX* d_c, CUFINUFFT_PLAN d_plan);
int CUFINUFFT_SPREAD2D(int nf1, int nf2, CUCPX* d_fw, int M,
FLT *d_kx, FLT *d_ky, CUCPX* d_c, CUFINUFFT_PLAN d_plan);
int CUFINUFFT_INTERP2D(int nf1, int nf2, CUCPX* d_fw, int M,
FLT *d_kx, FLT *d_ky, CUCPX* d_c, CUFINUFFT_PLAN d_plan);
int CUFINUFFT_SPREAD3D(int nf1, int nf2, int nf3,
CUCPX* d_fw, int M, FLT *d_kx, FLT *d_ky, FLT* d_kz,
CUCPX* d_c, CUFINUFFT_PLAN dplan);
int CUFINUFFT_INTERP3D(int nf1, int nf2, int nf3,
CUCPX* d_fw, int M, FLT *d_kx, FLT *d_ky, FLT *d_kz,
CUCPX* d_c, CUFINUFFT_PLAN dplan);
// Functions for calling different methods of spreading & interpolation
int CUSPREAD1D(CUFINUFFT_PLAN d_plan, int blksize);
int CUINTERP1D(CUFINUFFT_PLAN d_plan, int blksize);
int CUSPREAD2D(CUFINUFFT_PLAN d_plan, int blksize);
int CUINTERP2D(CUFINUFFT_PLAN d_plan, int blksize);
int CUSPREAD3D(CUFINUFFT_PLAN d_plan, int blksize);
int CUINTERP3D(CUFINUFFT_PLAN d_plan, int blksize);
// Wrappers for methods of spreading
int CUSPREAD1D_NUPTSDRIVEN_PROP(int nf1, int M, CUFINUFFT_PLAN d_plan);
int CUSPREAD1D_NUPTSDRIVEN(int nf1, int M, CUFINUFFT_PLAN d_plan, int blksize);
int CUSPREAD1D_SUBPROB_PROP(int nf1, int M, CUFINUFFT_PLAN d_plan);
int CUSPREAD1D_SUBPROB(int nf1, int M, CUFINUFFT_PLAN d_plan, int blksize);
int CUSPREAD2D_NUPTSDRIVEN_PROP(int nf1, int nf2, int M, CUFINUFFT_PLAN d_plan);
int CUSPREAD2D_NUPTSDRIVEN(int nf1, int nf2, int M, CUFINUFFT_PLAN d_plan,
int blksize);
int CUSPREAD2D_SUBPROB_PROP(int nf1, int nf2, int M, CUFINUFFT_PLAN d_plan);
int CUSPREAD2D_PAUL_PROP(int nf1, int nf2, int M, CUFINUFFT_PLAN d_plan);
int CUSPREAD2D_SUBPROB(int nf1, int nf2, int M, CUFINUFFT_PLAN d_plan,
int blksize);
int CUSPREAD2D_PAUL(int nf1, int nf2, int M, CUFINUFFT_PLAN d_plan,
int blksize);
int CUSPREAD3D_NUPTSDRIVEN_PROP(int nf1, int nf2, int nf3, int M,
CUFINUFFT_PLAN d_plan);
int CUSPREAD3D_NUPTSDRIVEN(int nf1, int nf2, int nf3, int M,
CUFINUFFT_PLAN d_plan, int blksize);
int CUSPREAD3D_BLOCKGATHER_PROP(int nf1, int nf2, int nf3, int M,
CUFINUFFT_PLAN d_plan);
int CUSPREAD3D_BLOCKGATHER(int nf1, int nf2, int nf3, int M,
CUFINUFFT_PLAN d_plan, int blksize);
int CUSPREAD3D_SUBPROB_PROP(int nf1, int nf2, int nf3, int M,
CUFINUFFT_PLAN d_plan);
int CUSPREAD3D_SUBPROB(int nf1, int nf2, int nf3, int M, CUFINUFFT_PLAN d_plan,
int blksize);
// Wrappers for methods of interpolation
int CUINTERP1D_NUPTSDRIVEN(int nf1, int M, CUFINUFFT_PLAN d_plan, int blksize);
int CUINTERP2D_NUPTSDRIVEN(int nf1, int nf2, int M, CUFINUFFT_PLAN d_plan,
int blksize);
int CUINTERP2D_SUBPROB(int nf1, int nf2, int M, CUFINUFFT_PLAN d_plan,
int blksize);
int CUINTERP3D_NUPTSDRIVEN(int nf1, int nf2, int nf3, int M,
CUFINUFFT_PLAN d_plan, int blksize);
int CUINTERP3D_SUBPROB(int nf1, int nf2, int nf3, int M, CUFINUFFT_PLAN d_plan,
int blksize);
#endif