-
Notifications
You must be signed in to change notification settings - Fork 50
/
accuracy.py
64 lines (49 loc) · 1.42 KB
/
accuracy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
import numpy
from pprint import pprint
from kernel_tuner import tune_kernel
from kernel_tuner.accuracy import TunablePrecision, AccuracyObserver
def tune():
kernel_string = """
#include <cuda_fp16.h>
using half = __half;
template <typename T>
__global__ void vector_add(int n, const T* left, const T* right, T* output) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n) {
output[i] = left[i] + right[i];
}
}
"""
size = 100000000
n = numpy.int32(size)
a = numpy.random.randn(size).astype(numpy.float64)
b = numpy.random.randn(size).astype(numpy.float64)
c = numpy.zeros_like(b)
args = [
n,
TunablePrecision("float_type", a),
TunablePrecision("float_type", b),
TunablePrecision("float_type", c),
]
answer = [None, None, None, a + b]
tune_params = dict()
tune_params["block_size_x"] = [32, 64, 128, 256, 512, 1024]
tune_params["float_type"] = ["float", "double", "half"]
observers = [
AccuracyObserver("RMSE", "error_rmse"),
AccuracyObserver("MRE", "error_relative"),
]
results, env = tune_kernel(
"vector_add<float_type>",
kernel_string,
size,
args,
tune_params,
answer=answer,
observers=observers,
lang="CUDA",
)
pprint(results)
if __name__ == "__main__":
tune()