-
Notifications
You must be signed in to change notification settings - Fork 0
/
lab6.2Yuvan.asm
215 lines (159 loc) · 5.01 KB
/
lab6.2Yuvan.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# Lab 6 CS 10
# Program by Yuvan Sharma
# multiplies two 8x8 matrices and prints the result using cache blocking
# RESULTS:
# WITHOUT CACHE BLOCKING:
# Direct mapped cache, 4 blocks, block size of 8 words (TOTAL CACHE SIZE 128 BYTES) -> 49% hit rate
# 2 way assoc. cache, 4 blocks, block size of 8 words (TOTAL CACHE SIZE 128 BYTES) -> 58% hit rate
# WITH CACHE BLOCKING:
# Direct mapped cache, 4 blocks, block size of 8 words (TOTAL CACHE SIZE 128 BYTES) -> 63% hit rate
# 2 way assoc. cache, 4 blocks, block size of 8 words (TOTAL CACHE SIZE 128 BYTES) -> 64% hit rate
# Direct mapped cache, 8 blocks, block size of 8 words (TOTAL CACHE SIZE 256 BYTES) -> 74% hit rate
# 2 way assoc. cache, 8 blocks, block size of 8 words (TOTAL CACHE SIZE 256 BYTES) -> 80% hit rate
# COMMENTS:
# the results above show how cache blocking has a big impact on the hit rate, with a 14% improvement
# for a direct mapped cache of size 128 bytes. Making the 128 byte cache 2 way associative after blocking
# did not improve the hit rate very much, but a 2 way associative cache performed much better than the direct
# mapped cache when the cache size was increased from 128 to 256 bytes.
.data
.align 2
prompt: .asciiz "Enter Row "
.align 2
prompt2: .asciiz " : "
newline: .asciiz "\n"
space: .asciiz " "
.align 2
prompt3: .asciiz "Row "
.align 2
M: .word 6,2,4,2,5,6,3,8,
8,4,4,6,5,6,9,1,
2,6,7,8,4,1,5,8,
2,1,2,9,3,5,2,5,
6,6,4,3,5,6,0,7,
7,3,2,2,6,4,6,7,
5,4,1,5,6,9,8,9,
3,0,8,0,3,5,5,6 # 8x8 integer matrix #1
.align 2
N: .word 4,5,0,2,1,6,3,8,
5,9,1,3,0,6,4,3,
2,6,7,8,4,7,5,1,
1,7,2,8,3,4,2,5,
6,9,4,2,5,6,6,2,
7,3,4,2,6,4,6,7,
5,7,1,5,6,3,3,4,
3,0,8,0,3,1,5,6 # 8x8 integer matrix #1
.align 2
result: .space 256 # 8x8 integer matrix to store multiplication result
mask: .word 15 # 15 is the mask to convert char digits to int
.text
li $t0, 1
la $t1, M
jal printloop
la $a0, newline
li $v0, 4
syscall
li $t0, 1
la $t1, N
jal printloop
la $a1, result
la $a2, M
la $a3, N
jal dgemm
la $a0, newline
li $v0, 4
syscall
li $t0, 1
la $t1, result
jal printloop
j exit # exit the program
exit:
li $v0, 10
syscall #exit the program
dgemm:
addi $sp, $sp, -4
sw $ra, 0($sp)
li $s3, 0 # sj
li $s4, 0 # si
li $s5, 0 # sk
li $s6, 8 # 8 is the size of the matrix
B1:
li $s4, 0
B2:
li $s5, 0
B3:
# sj in $s3, si in $s4, sk in $s5, n = 8
jal do_block
addi $s5, $s5, 4
bltu $s5, $s6, B3
addi $s4, $s4, 4
bltu $s4, $s6, B2
addi $s3, $s3, 4 # 4 is the block size
bltu $s3, $s6, B1
lw $ra, 0($sp)
addi $sp, $sp, 4
jr $ra
do_block:
# addi $t0, $zero, 8 # $t0 holds the number of elements in each row
add $t1, $s3, $zero # counter for first loop $t1 = i
addi $t6, $s3, 4 # 4 is the block size
addi $t7, $s4, 4
addi $t8, $s5, 4
L1:
add $t2, $s4, $zero # counter for second loop
L2:
add $t3, $s5, $zero # counter for third loop
sll $t4, $t1, 3 # multiply first counter by 8 to "skip" that many rows
add $t4, $t4, $t2 # add the second counter value to select the required element in the row
sll $t4, $t4, 2 # multiply our index by 4 to transform it into a byte index, since one int = 4 bytes
add $t4, $a1, $t4 # add the byte index to address of the result array, to tranform t4 into address of result[][] element
lw $s0, 0($t4)
L3:
sll $t5, $t1, 3 # multiply counter for third loop by 8 to skip that many rows
add $t5, $t5, $t3 # add first counter value to select the required element in the row
sll $t5, $t5, 2 # multiply index by 4 to transform into byte index, since one int = 4 bytes
add $t5, $a2, $t5 # add byte index to address of M array, to transform $t5 to address of M[][] element
lw $s1, 0($t5)
sll $t5, $t3, 3 # multiply counter of second loop by 8 to skip that many rows
add $t5, $t5, $t2 # add third counter value to select the required element in the row
sll $t5, $t5, 2 # multiply index by 4 to transform into byte index, since one int = 4 bytes
add $t5, $a3, $t5 # add byte index to addres of N array, to transform $t5 to address of N[][] element
lw $s2, 0($t5)
# s0 has result element, s1 has M element, s2 has N element
mul $s1, $s2, $s1
add $s0, $s0, $s1
addi $t3, $t3, 1 # increment 3rd loop counter by 1
blt $t3, $t8, L3 # if 3rd loop counter < $t8, repeat addition
sw $s0, 0($t4) # otherwise store the computed value in the appropriate element
addi $t2, $t2, 1
blt $t2, $t7, L2
addi $t1, $t1, 1
blt $t1, $t6, L1
jr $ra
printloop:
la $a0, prompt3 # prompt3 = "Row "
li $v0, 4
syscall
addi $a0, $t0, 0 # to print row number
li $v0, 1
syscall
la $a0, prompt2 # prompt2 = ": "
li $v0, 4
syscall
li $t2, 1 # counter for printloop2
li $t9, 9
printloop2:
li $v0, 1 # print element
lw $a0, 0($t1)
syscall
li $v0, 4 # print space
la $a0, space
syscall
addi $t2, $t2, 1
addi $t1, $t1, 4
bne $t2, $t9, printloop2
la $a0, newline # go to next line
li $v0, 4
syscall
addi $t0, $t0, 1
bne $t0, $t9, printloop # $t9 has 9
jr $ra