-
Notifications
You must be signed in to change notification settings - Fork 133
/
Copy pathtutorial_04A.cpp
101 lines (74 loc) · 3.36 KB
/
tutorial_04A.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
This tutorial shows how to write a simple matrix multiplication (C = A * B)
for i = 0 .. N
for j = 0 .. N
C[i,j] = 0;
for k = 0 .. N
C[i,j] = C[i,j] + A[i,k] * B[k,j];
To run this tutorial
cd build/
make run_developers_tutorial_04A
*/
#include <tiramisu/tiramisu.h>
#define SIZE0 100
using namespace tiramisu;
int main(int argc, char **argv)
{
tiramisu::init("matmul");
// -------------------------------------------------------
// Layer I
// -------------------------------------------------------
constant p0("N", expr((int32_t) SIZE0));
var i("i", 0, p0), j("j", 0, p0), k("k", 0, p0);
// Declare computations that represents the input buffers. The actual
// input buffers will be declared later.
input A("A", {"i", "j"}, {SIZE0, SIZE0}, p_uint8);
input B("B", {"i", "j"}, {SIZE0, SIZE0}, p_uint8);
// Declare a computation to initialize the reduction.
computation C_init("C_init", {i,j}, expr((uint8_t) 0));
// Declare the reduction operation. Do not provide any expression during declaration.
computation C("C", {i,j,k}, p_uint8);
// Note that the previous computation has an empty expression (because we can only use C in an expression after its declaration)
C.set_expression(C(i, j, k - 1) + A(i, k) * B(k, j));
// In this example, C does not read the value of C_init, but later
// we indicate that C_init and C both are stored in the same buffer,
// therefore C will read values written by C_init.
// We are working on adding an operator for reduction to perform reduction
// in a straight forward way.
// -------------------------------------------------------
// Layer II
// -------------------------------------------------------
// Tile both computations: C_init and C
// This tiles the loop levels i and j and produces the loop levels by a 32x32 tile.
// i0, j0, i1 and j1 where i0 is the outermost loop level and j1 is the innermost.
var i0("i0"), j0("j0"), i1("i1"), j1("j1");
C_init.tile(i, j, 32, 32, i0, j0, i1, j1);
C.tile(i, j, 32, 32, i0, j0, i1, j1);
// Parallelize the outermost loop level i0
C.parallelize(i0);
// Indicate that C is after C_init at the loop level j0 (this means,
// they share the two outermost loops i0 and j0 and starting from j0 C
// is ordered after C_init).
C.after(C_init, j1);
// -------------------------------------------------------
// Layer III
// -------------------------------------------------------
// Declare the buffers.
buffer b_A("b_A", {expr(SIZE0), expr(SIZE0)}, p_uint8, a_input);
buffer b_B("b_B", {expr(SIZE0), expr(SIZE0)}, p_uint8, a_input);
buffer b_C("b_C", {expr(SIZE0), expr(SIZE0)}, p_uint8, a_output);
// Map the computations to a buffer.
A.store_in(&b_A);
B.store_in(&b_B);
// Store C_init[i,j] in b_C[i,j]
C_init.store_in(&b_C, {i,j});
// Store c_C[i,j,k] in b_C[i,j]
C.store_in(&b_C, {i,j});
// Note that both of the computations C_init and C store their
// results in the buffer b_C.
// -------------------------------------------------------
// Code Generation
// -------------------------------------------------------
tiramisu::codegen({&b_A, &b_B, &b_C}, "build/generated_fct_developers_tutorial_04A.o");
return 0;
}