-
Notifications
You must be signed in to change notification settings - Fork 5
/
isqrt.S
203 lines (187 loc) · 2.99 KB
/
isqrt.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
;-----------------------------------------------------------------------------;
; Fast integer squareroot routines for avr-gcc project (C)ChaN, 2008
; http://elm-chan.org/docs/avrlib/sqrt32.S
;-----------------------------------------------------------------------------;
; uint16_t isqrt32 (uint32_t n);
; uint8_t isqrt16 (uint16_t n);
; uint16_t ihypot (int16_t x, int16_t y);
;-----------------------------------------------------------------------------:
; 32bit integer squareroot
;-----------------------------------------------------------------------------;
; uint16_t isqrt32 (
; uint32_t n
; );
;
; Return Value:
; Squareroot of n.
;
; Size = 53 words
; Clock = 532..548 cycles
; Stack = 0 byte
.global isqrt32
.func isqrt32
isqrt32:
clr r0
clr r18
clr r19
clr r20
ldi r21, 1
clr r27
clr r30
clr r31
ldi r26, 16
1: lsl r22
rol r23
rol r24
rol r25
rol r0
rol r18
rol r19
rol r20
lsl r22
rol r23
rol r24
rol r25
rol r0
rol r18
rol r19
rol r20
brpl 2f
add r0, r21
adc r18, r27
adc r19, r30
adc r20, r31
rjmp 3f
2: sub r0, r21
sbc r18, r27
sbc r19, r30
sbc r20, r31
3: lsl r21
rol r27
rol r30
andi r21, 0b11111000
ori r21, 0b00000101
sbrc r20, 7
subi r21, 2
dec r26
brne 1b
lsr r30
ror r27
ror r21
lsr r30
ror r27
ror r21
mov r24, r21
mov r25, r27
ret
.endfunc
;-----------------------------------------------------------------------------:
; 16bit integer squareroot
;-----------------------------------------------------------------------------;
; uint8_t isqrt16 (
; uint16_t n
; );
;
; Return Value:
; Squareroot of n.
;
; Size = 33 words
; Clock = 181..189 cycles
; Stack = 0 byte
.global isqrt16
.func isqrt16
isqrt16:
clr r18
clr r19
ldi r20, 1
clr r21
ldi r22, 8
1: lsl r24
rol r25
rol r18
rol r19
lsl r24
rol r25
rol r18
rol r19
brpl 2f
add r18, r20
adc r19, r21
rjmp 3f
2: sub r18, r20
sbc r19, r21
3: lsl r20
rol r21
andi r20, 0b11111000
ori r20, 0b00000101
sbrc r19, 7
subi r20, 2
dec r22
brne 1b
lsr r21
ror r20
lsr r21
ror r20
mov r24, r20
ret
.endfunc
;-----------------------------------------------------------------------------:
; 16bit integer hypot (megaAVR is required)
;-----------------------------------------------------------------------------;
; uint16_t ihypot (
; int16_t x,
; int16_t y
; );
;
; Return Value:
; Squareroot of (x*x + y*y)
;
; Size = 42 words
; Clock = 581..597 cycles
; Stack = 0 byte
.global ihypot
.func ihypot
ihypot:
clr r26
sbrs r25, 7
rjmp 1f
com r24
com r25
adc r24, r26
adc r25, r26
1: sbrs r23, 7
rjmp 2f
com r22
com r23
adc r22, r26
adc r23, r26
2: mul r22, r22
movw r18, r0
mul r23, r23
movw r20, r0
mul r22, r23
add r19, r0
adc r20, r1
adc r21, r26
add r19, r0
adc r20, r1
adc r21, r26
mul r24, r24
movw r30, r0
mul r25, r25
add r18, r30
adc r19, r31
adc r20, r0
adc r21, r1
mul r24, r25
add r19, r0
adc r20, r1
adc r21, r26
add r19, r0
adc r20, r1
adc r21, r26
movw r24, r20
movw r22, r18
clr r1
rjmp isqrt32
.endfunc