-
Notifications
You must be signed in to change notification settings - Fork 9
/
Defines.h
210 lines (184 loc) · 5.39 KB
/
Defines.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#pragma once
#include <malloc.h>
#include <stdlib.h>
#include <string>
#include <memory>
namespace CudaTracerLib {
#define EXT_TRI
#define NUM_UV_SETS 1
#define MAX_AREALIGHT_NUM 2
#ifdef _MSC_VER
#define ISWINDOWS
#endif
#ifdef CTL_EXPORT_SYMBOLS
#define CTL_EXPORT __declspec(dllexport)
#endif
#ifdef CTL_IMPORT_SYMBOLS
#define CTL_EXPORT __declspec(dllimport)
#endif
#ifndef CTL_EXPORT
#define CTL_EXPORT
#endif
//__forceinline__
#define CUDA_INLINE inline
#ifdef __CUDACC__
#define CUDA_FUNC inline __host__ __device__
#define CUDA_FUNC_IN CUDA_INLINE __host__ __device__
#define CUDA_ONLY_FUNC __device__ CUDA_INLINE
#define CUDA_HOST __host__
#define CUDA_DEVICE __device__
#define CUDA_CONST __constant__
#define CUDA_SHARED __shared__
#define CUDA_GLOBAL __global__
#define CUDA_LOCAL __local__
#define CUDA_VIRTUAL __device__ virtual
#else
#define CUDA_FUNC inline
#define CUDA_FUNC_IN inline
#define CUDA_ONLY_FUNC inline
#define CUDA_HOST
#define CUDA_DEVICE
#define CUDA_CONST
#define CUDA_SHARED
#define CUDA_GLOBAL
#define CUDA_LOCAL
#define CUDA_VIRTUAL virtual
#endif
#ifdef __CUDA_ARCH__
#define ISCUDA
#endif
#ifndef __func__
#define __func__ __FUNCTION__
#endif
#if !defined(CUDA_RELEASE_BUILD)
#if __CUDACC__
#define CTL_ASSERT(X) ((X) ? ((void)0) : (void)printf("Assertion failed!\n%s:%d\n%s", __FILE__, __LINE__, #X))
#else
#define CTL_ASSERT(X) ((X) ? ((void)0) : throw std::runtime_error(format("Assertion failed!\n%s:%d\n%s", __FILE__, __LINE__, #X)))
#endif
#else
//evaluate the expression either way, it possibly has side effects
CUDA_FUNC_IN void noop() {}
# define CTL_ASSERT(X) ((X) ? noop() : noop())
#endif
//code is from this great answer : http://stackoverflow.com/a/26221725/1715849
template<typename ... Args> std::string format(const std::string& format, Args ... args)
{
size_t size = snprintf(nullptr, 0, format.c_str(), args ...) + 1; // Extra space for '\0'
std::unique_ptr<char[]> buf(new char[size]);
snprintf(buf.get(), size, format.c_str(), args ...);
return std::string(buf.get(), buf.get() + size - 1); // We don't want the '\0' inside
}
//http://stackoverflow.com/questions/12778949/cuda-memory-alignment
//credit to harrsim!
#if defined(__CUDACC__) // NVCC
#define CUDA_ALIGN(n) __align__(n)
#elif defined(__GNUC__) // GCC
#define CUDA_ALIGN(n) __attribute__((aligned(n)))
#elif defined(_MSC_VER) // MSVC
#define CUDA_ALIGN(n) __declspec(align(n))
#else
#error "Please provide a definition for MY_ALIGN macro for your host compiler!"
#endif
CTL_EXPORT void __ThrowCudaErrors__(const char* file, int line, ...);
#define ThrowCudaErrors(...) __ThrowCudaErrors__(__FILE__, __LINE__, ##__VA_ARGS__, -1)
template<typename T> CUDA_FUNC_IN void swapk(T& a, T& b)
{
T q = a;
a = b;
b = q;
}
CUDA_FUNC_IN unsigned int getGlobalIdx_2D_2D()
{
#ifdef ISCUDA
unsigned int blockId = blockIdx.x + blockIdx.y * gridDim.x;
unsigned int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
return threadId;
#else
return 0u;
#endif
}
#define DMAX2(A, B) ((A) > (B) ? (A) : (B))
#define DMAX3(A, B, C) DMAX2(DMAX2(A, B), C)
#define DMAX4(A, B, C, D) DMAX2(DMAX3(A, B, C), D)
#define DMAX5(A, B, C, D, E) DMAX2(DMAX4(A, B, C, D), E)
#define DMAX6(A, B, C, D, E, F) DMAX2(DMAX5(A, B, C, D, E), F)
#define DMAX7(A, B, C, D, E, F, G) DMAX2(DMAX6(A, B, C, D, E, F), G)
#define DMAX8(A, B, C, D, E, F, G, H) DMAX2(DMAX7(A, B, C, D, E, F, G), H)
#define DMAX9(A, B, C, D, E, F, G, H, I) DMAX2(DMAX8(A, B, C, D, E, F, G, H), I)
#define DMIN2(A, B) ((A) < (B) ? (A) : (B))
#define DMIN3(A, B, C) DMIN2(DMIN2(A, B), C)
#define DMIN4(A, B, C, D) DMIN2(DMIN3(A, B, C), D)
#define DMIN5(A, B, C, D, E) DMIN2(DMIN4(A, B, C, D), E)
#define DMIN6(A, B, C, D, E, F) DMIN2(DMIN5(A, B, C, D, E), F)
#define DMIN7(A, B, C, D, E, F, G) DMIN2(DMIN6(A, B, C, D, E, F), G)
#define DMIN8(A, B, C, D, E, F, G, H) DMIN2(DMIN7(A, B, C, D, E, F, G), H)
#define DMIN9(A, B, C, D, E, F, G, H, I) DMIN2(DMIN8(A, B, C, D, E, F, G, H), I)
#define RND_UP(VAL, MOD) (VAL + (((VAL) % (MOD)) != 0 ? ((MOD) - ((VAL) % (MOD))) : (0)))
#define RND_16(VAL) RND_UP(VAL, 16)
CTL_EXPORT void CudaSetToZero(void* dest, size_t length);
CTL_EXPORT void CudaSetToZero_FreeBuffer();
template<typename T> inline void ZeroMemoryCuda(T* cudaVar)
{
CudaSetToZero(cudaVar, sizeof(T));
}
#define ZeroSymbol(SYMBOL) \
{ \
void* tar = 0; \
ThrowCudaErrors(cudaGetSymbolAddress(&tar, SYMBOL)); \
CudaSetToZero(tar, sizeof(SYMBOL)); \
}
#define CopyToSymbol(SYMBOL, value) \
{ \
void* tar = 0; \
ThrowCudaErrors(cudaGetSymbolAddress(&tar, SYMBOL)); \
ThrowCudaErrors(cudaMemcpy(tar, &value, sizeof(value), cudaMemcpyHostToDevice)); \
}
#define CopyFromSymbol(value, SYMBOL) \
{ \
void* tar = 0; \
ThrowCudaErrors(cudaGetSymbolAddress(&tar, SYMBOL)); \
ThrowCudaErrors(cudaMemcpy(&value, tar, sizeof(value), cudaMemcpyDeviceToHost)); \
}
template<typename T> struct CudaStaticWrapper
{
protected:
CUDA_ALIGN(256) unsigned char m_data[sizeof(T)];
public:
CUDA_FUNC_IN CudaStaticWrapper()
{
}
CUDA_FUNC_IN operator const T& () const
{
return As();
}
CUDA_FUNC_IN operator T& ()
{
return As();
}
CUDA_FUNC_IN T* operator->()
{
return &As();
}
CUDA_FUNC_IN const T* operator->() const
{
return &As();
}
CUDA_FUNC_IN T& operator*()
{
return As();
}
CUDA_FUNC_IN const T& operator*() const
{
return As();
}
CUDA_FUNC_IN const T& As() const
{
return *(T*)m_data;
}
CUDA_FUNC_IN T& As()
{
return *(T*)m_data;
}
};
}