-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCudaTopExcl.cu
124 lines (106 loc) · 3.16 KB
/
CudaTopExcl.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#include <iostream>
#include <cuda.h>
#include "cuda_utils.h"
#include "CudaTopExcl.h"
//
// Class creator
//
CudaTopExcl::CudaTopExcl(const int ncoord, const int *iblo14, const int *inb14) : ncoord(ncoord) {
atomExclPosLen = 0;
atomExclPos = NULL;
atomExclLen = 0;
atomExcl = NULL;
setup(iblo14, inb14);
allocate<int>(&glo2loc, ncoord);
set_gpu_array<int>(glo2loc, ncoord, -1);
}
//
// Class destructor
//
CudaTopExcl::~CudaTopExcl() {
if (atomExclPos != NULL) deallocate<int>(&atomExclPos);
if (atomExcl != NULL) deallocate<int>(&atomExcl);
deallocate<int>(&glo2loc);
}
//
// Setups topological exclusions from data structure used in CHARMM
//
void CudaTopExcl::setup(const int *iblo14, const int *inb14) {
int *nexcl = new int[ncoord];
for (int i=0;i < ncoord;i++) nexcl[i] = 0;
// Count the number of exclusions to nexcl[0 ... ncoord-1]
for (int i=0;i < ncoord;i++) {
int excl_start;
if (i > 0) {
excl_start = iblo14[i-1];
} else {
excl_start = 0;
}
int excl_end = iblo14[i] - 1;
// add i-j exclusions to atom i
nexcl[i] += excl_end - excl_start + 1;
for (int excl_i=excl_start; excl_i <= excl_end;excl_i++) {
int j = abs(inb14[excl_i]) - 1;
// add i-j exclusion to atom j
nexcl[j]++;
}
}
// Find out maximum number of atom-atom exclusions per atom
maxNumExcl = 0;
for (int i=0;i < ncoord;i++) maxNumExcl = max(maxNumExcl, nexcl[i]);
int *h_atomExclPos = new int[ncoord+1];
// Use exclusive cumulative sum to calculate positions
int nexcl_tot = 0;
for (int i=0;i < ncoord;i++) {
h_atomExclPos[i] = nexcl_tot;
nexcl_tot += nexcl[i];
}
h_atomExclPos[ncoord] = nexcl_tot;
int *h_atomExcl = new int[nexcl_tot];
for (int i=0;i < ncoord;i++) nexcl[i] = 0;
for (int i=0;i < ncoord;i++) {
int excl_start;
if (i > 0) {
excl_start = iblo14[i-1];
} else {
excl_start = 0;
}
int excl_end = iblo14[i] - 1;
int pos_starti = h_atomExclPos[i];
int ni = nexcl[i];
for (int excl_i=excl_start;excl_i <= excl_end;excl_i++) {
int j = abs(inb14[excl_i]) - 1;
// Add i-j exclusion to atom j
int pos_startj = h_atomExclPos[j];
int nj = nexcl[j];
if (pos_startj + nj >= h_atomExclPos[j+1]) {
std::cerr << "CudaTopExcl::setup, overflow in j" << std::endl;
exit(1);
}
h_atomExcl[pos_startj + nj] = i;
nj++;
nexcl[j] = nj;
// Add i-j exclusion to atom i
if (pos_starti + ni >= h_atomExclPos[i+1]) {
std::cerr << "CudaTopExcl::setup, overflow in i" << std::endl;
exit(1);
}
h_atomExcl[pos_starti + ni] = j;
ni++;
}
nexcl[i] = ni;
}
// Allocate GPU memory and copy results to GPU
#ifdef STRICT_MEMORY_REALLOC
reallocate<int>(&atomExclPos, &atomExclPosLen, ncoord+1, 1.0f);
reallocate<int>(&atomExcl, &atomExclLen, nexcl_tot, 1.0f);
#else
reallocate<int>(&atomExclPos, &atomExclPosLen, ncoord+1, 1.1f);
reallocate<int>(&atomExcl, &atomExclLen, nexcl_tot, 1.1f);
#endif
copy_HtoD_sync<int>(h_atomExclPos, atomExclPos, ncoord+1);
copy_HtoD_sync<int>(h_atomExcl, atomExcl, nexcl_tot);
delete [] h_atomExcl;
delete [] h_atomExclPos;
delete [] nexcl;
}