-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCpuMatrix3d.h
191 lines (142 loc) · 4.97 KB
/
CpuMatrix3d.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#ifndef CPUMATRIX3D_H
#define CPUMATRIX3D_H
#include <iostream>
enum {YZX=0, ZXY=1};
struct float2 {
float x, y;
float2() {}
float2(const float v) : x(v), y(v) {}
float2& operator*=(const float2& rhs) {
this->x *= rhs.x;
this->y *= rhs.y;
return *this;
}
bool operator==(const float2& rhs) {
return (this->x == rhs.x) && (this->y == rhs.y);
}
bool operator!=(const float2& rhs) {return !(*this == rhs);}
};
struct double2 {
double x, y;
double2() {}
double2(const double v) : x(v), y(v) {}
double2& operator*=(const double2& rhs) {
this->x *= rhs.x;
this->y *= rhs.y;
return *this;
}
bool operator==(const double2& rhs) {
return (this->x == rhs.x) && (this->y == rhs.y);
}
bool operator!=(const double2& rhs) {return !(*this == rhs);}
};
/*
bool operator==(const double2& lhs, const double2& rhs) {
return (lhs.x == rhs.x) && (lhs.y == rhs.y);
}
*/
/*
inline float2 operator*(float2 lhs, const float2& rhs) {
lhs.x *= rhs.x;
lhs.y *= rhs.y;
return lhs;
}
inline double2 operator*(double2 lhs, const double2& rhs) {
lhs.x *= rhs.x;
lhs.y *= rhs.y;
return lhs;
}
*/
template <typename T>
class CpuMatrix3d {
private:
// True if we are using an external storage for the data
bool external_storage;
// Number of entries in tilebuf_th, must be equal to the number of threads!
int num_tilebuf_th;
// Storage for tile: tilebuf_th[0..num_tilebuf_th-1][0..tiledim*tiledim-1]
T** tilebuf_th;
// Initializes (allocates) data
void init(const int size, T* ext_data = NULL);
void alloc_tile();
void dealloc_tile();
double norm(T a, T b);
bool is_nan(T a);
protected:
// Tile dimensions
const int tiledim;
// Size of the matrix
const int nx, ny, nz;
// Size of the matrix in storage. Allows for padding.
const int xsize, ysize, zsize;
// Matrix data
T* data;
public:
CpuMatrix3d(const int nx, const int ny, const int nz,
const int tiledim=64, T* ext_data = NULL);
CpuMatrix3d(const int nx, const int ny, const int nz,
const int xsize, const int ysize, const int zsize,
const int tiledim=64, T* ext_data = NULL);
CpuMatrix3d(const int nx, const int ny, const int nz, const char *filename,
const int tiledim=64, T* ext_data = NULL);
~CpuMatrix3d();
void print_info();
bool compare(CpuMatrix3d<T>& mat, const double tol, double& max_diff);
void transpose_yzx_ref(const int src_x0, const int src_y0, const int src_z0,
const int dst_x0, const int dst_y0, const int dst_z0,
const int xlen, const int ylen, const int zlen,
CpuMatrix3d<T>& mat);
void transpose_yzx_ref(CpuMatrix3d<T>& mat);
void transpose_zxy_ref(const int src_x0, const int src_y0, const int src_z0,
const int dst_x0, const int dst_y0, const int dst_z0,
const int xlen, const int ylen, const int zlen,
CpuMatrix3d<T>& mat);
void transpose_zxy_ref(CpuMatrix3d<T>& mat);
void transpose(const int src_x0, const int src_y0, const int src_z0,
const int dst_x0, const int dst_y0, const int dst_z0,
const int xlen, const int ylen, const int zlen,
CpuMatrix3d<T>& mat, const int order);
void transpose_yzx(const int src_x0, const int src_y0, const int src_z0,
const int dst_x0, const int dst_y0, const int dst_z0,
const int xlen, const int ylen, const int zlen,
CpuMatrix3d<T>& mat);
void transpose_yzx(CpuMatrix3d<T>& mat);
void transpose_zxy(const int src_x0, const int src_y0, const int src_z0,
const int dst_x0, const int dst_y0, const int dst_z0,
const int xlen, const int ylen, const int zlen,
CpuMatrix3d<T>& mat);
void transpose_zxy(CpuMatrix3d<T>& mat);
void transpose_yzx_legacy(CpuMatrix3d<T>& mat);
void transpose_yzx_legacy(const int src_x0, const int src_y0, const int src_z0,
const int dst_x0, const int dst_y0, const int dst_z0,
const int xlen, const int ylen, const int zlen,
CpuMatrix3d<T>& mat);
void transpose_zxy_legacy(CpuMatrix3d<T>& mat);
void transpose_zxy_legacy(const int src_x0, const int src_y0, const int src_z0,
const int dst_x0, const int dst_y0, const int dst_z0,
const int xlen, const int ylen, const int zlen,
CpuMatrix3d<T>& mat);
void copy(int src_x0, int src_y0, int src_z0,
int dst_x0, int dst_y0, int dst_z0,
int xlen, int ylen, int zlen,
CpuMatrix3d<T>& mat);
void copy(CpuMatrix3d<T>& mat);
void print(const int x0, const int x1,
const int y0, const int y1,
const int z0, const int z1);
void load(const int x0, const int x1, const int nx,
const int y0, const int y1, const int ny,
const int z0, const int z1, const int nz,
const char *filename);
void load(const int nx, const int ny, const int nz,
const char *filename);
void scale(const T fac);
int get_nx() {return nx;}
int get_ny() {return ny;}
int get_nz() {return nz;}
int get_xsize() {return xsize;}
int get_ysize() {return ysize;}
int get_zsize() {return zsize;}
T* get_data() {return data;}
};
#endif // CPUMATRIX3D_H