-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCudaPMERecip.h
207 lines (159 loc) · 4.65 KB
/
CudaPMERecip.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#ifndef CUDAPMERECIP_H
#define CUDAPMERECIP_H
#include <cuda.h>
#ifndef USE_FBFFT
#include <cufft.h>
#endif
#if CUDA_VERSION >= 6000
#include <cufftXt.h>
#endif
#include <string>
#include "Bspline.h"
#include "Matrix3d.h"
#include "CudaEnergyVirial.h"
// CCELEC is 1/ (4 pi eps ) in AKMA units, conversion from SI
// units: CCELEC = e*e*Na / (4*pi*eps*1Kcal*1A)
//
// parameter :: CCELEC=332.0636D0 ! old value of dubious origin
// parameter :: CCELEC=331.843D0 ! value from 1986-1987 CRC Handbook
// ! of Chemistry and Physics
// real(chm_real), parameter :: &
// CCELEC_amber = 332.0522173D0, &
// CCELEC_charmm = 332.0716D0 , &
// CCELEC_discover = 332.054D0 , &
// CCELEC_namd = 332.0636D0
const double ccelec = 332.0716;
const double half_ccelec = 0.5*ccelec;
enum FFTtype {COLUMN, SLAB, BOX};
//
// AT = Accumulation Type
// CT = Calculation Type (real)
// CT2 = Calculation Type (complex)
//
template <typename AT, typename CT, typename CT2>
class CudaPMERecip {
public:
// Grid data arrays
CT *data1;
CT *data2;
int data1_len;
int data2_len;
// Type of FFT
FFTtype fft_type;
Matrix3d<AT> *accum_grid; // data1
Matrix3d<CT> *charge_grid; // data2
Matrix3d<CT> *solved_grid; // data2
// For COLUMN FFT
Matrix3d<CT2> *xfft_grid; // data2
Matrix3d<CT2> *yfft_grid; // data1
Matrix3d<CT2> *zfft_grid; // data2
// For SLAB FFT. Also uses "zfft_grid" from above
Matrix3d<CT2> *xyfft_grid; // data2
// For BOX FFT
Matrix3d<CT2> *fft_grid; // data2
private:
// Order of interpolation
int order;
// Size of the entire grid
int nfftx;
int nffty;
int nfftz;
// Region boundaries in real space1
int x0, x1;
int y0, y1;
int z0, z1;
// Writing region in real space
int xlo, xhi;
int ylo, yhi;
int zlo, zhi;
// Writing region size on real space
int xsize;
int ysize;
int zsize;
// Total size of the data array
int data_size;
#ifdef USE_FBFFT
// Parameters for "COLUMN" FFT
int x_r2c_batchSize;
int x_r2c_nfft;
int y_c2c_batchSize;
int y_c2c_nfft;
int z_c2c_batchSize;
int z_c2c_nfft;
int x_c2r_batchSize;
int x_c2r_nfft;
#else
// Plans for "COLUMN" FFT
cufftHandle x_r2c_plan;
cufftHandle y_c2c_plan;
cufftHandle z_c2c_plan;
cufftHandle x_c2r_plan;
// Plans for "SLAB" FFT. Also uses "z_c2c_plan" form above
cufftHandle xy_r2c_plan;
cufftHandle xy_c2r_plan;
// Plans for "BOX" FFT
cufftHandle r2c_plan;
cufftHandle c2r_plan;
#endif
// true for using multiple GPUs for the FFTs
bool multi_gpu;
#if CUDA_VERSION >= 6000
// data for multi-gpus
cudaLibXtDesc *multi_data;
CT2 *host_data;
CT *host_tmp;
#endif
// Stream where all computation takes place
cudaStream_t stream;
// Prefactor arrays
CT* prefac_x;
CT* prefac_y;
CT* prefac_z;
#ifdef USE_TEXTURE_OBJECTS
bool gridTexObjActive;
cudaTextureObject_t gridTexObj;
#endif
// Energy terms
CudaEnergyVirial& energyVirial;
std::string strRecip;
std::string strSelf;
#ifdef USE_NEW_SPREAD
int* coordIndGrid;
int coordIndOverflow_len;
int* coordIndOverflow;
#endif
void init(int x0, int x1, int y0, int y1, int z0, int z1, int order,
bool y_land_locked, bool z_land_locked);
void make_fft_plans();
void calc_prefac();
public:
CudaPMERecip(int nfftx, int nffty, int nfftz, int order, FFTtype fft_type, int nnode, int mynode,
CudaEnergyVirial& energyVirial, const char* nameRecip, const char* nameSelf,
cudaStream_t stream=0);
~CudaPMERecip();
void setup_grid_texture(CT *data, const int data_len);
void print_info();
void set_stream(cudaStream_t stream);
// void spread_charge(const int ncoord, const Bspline<CT> &bspline);
void spread_charge(const float4 *xyzq, const int ncoord, const double *recip);
void scalar_sum(const double* recip, const double kappa,
const bool calc_energy, const bool calc_virial);
void calc_self_energy(const float4 *xyzq, const int ncoord, const double kappa);
// void gather_force(const int ncoord, const double* recip, const Bspline<CT> &bspline,
// const int stride, CT* force);
template <typename FT>
void gather_force(const float4 *xyzq, const int ncoord, const double* recip,
const int stride, FT* force);
void x_fft_r2c(CT2 *data);
void x_fft_c2r(CT2 *data);
void y_fft_c2c(CT2 *data, const int direction);
void z_fft_c2c(CT2 *data, const int direction);
void r2c_fft();
void c2r_fft();
int get_nfftx() {return nfftx;}
int get_nffty() {return nffty;}
int get_nfftz() {return nfftz;}
int get_order() {return order;}
void set_order(int order);
};
#endif // CUDAPMERECIP_H