-
Notifications
You must be signed in to change notification settings - Fork 6
/
omult22.a
112 lines (103 loc) · 3.68 KB
/
omult22.a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
; omult22.a
; based on Dr Jefyll, http://forum.6502.org/viewtopic.php?f=9&t=689&start=0#p19958
; - adjusted to use fixed zero page addresses
; - removed 'decrement to avoid clc' as this is slower on average
; - rearranged memory use to remove final memory copy and give LSB first order to result
; - X counter counts up from $fc to avoid cpx
; - i.e. mult60 expanded into 32 bit x 32 bit
; - could be unrolled for more speed at the cost of more memory
;
;
; multiplicand
; +------+------+------+------+
; | +3 | +2 | +1 | +0 |
; +------+------+------+------+
; ||
; _||_ add
; \ / initially set to
; result \/ multiplier
; +------+------+------+------+ +------+------+------+------+
; | +7 | +6 | +5 | +4 | | +3 | +2 | +1 | +0 |
; +------+------+------+------+ +------+------+------+------+
;
; (1) first 8 times around loop, shift right: result+7 into +6 into +5 into +4 into +0:
;
; ----------------------------> shift >
; \_____________________/
;
;
; (2) next 8 times around loop, shift right: result+7 into +6 into +5 into +4 into +1:
;
; ----------------------------> shift >
; \______________/
;
;
; (3) next 8 times around loop, shift right: result+7 into +6 into +5 into +4 into +2:
;
; ----------------------------> shift >
; \_______/
;
;
; (3) final 8 times around loop, shift right: result+7 into +6 into +5 into +4 into +3:
;
; -------------------------------> shift
;
;
; 32 bit x 32 bit unsigned multiply, 64 bit result
; Average cycles: 1653.00
; 59 bytes
multiplicand = $02 ; 4 bytes
multiplier = $06 ; 4 bytes
result = $06 ; 8 bytes (note: shares memory with multiplier)
* = $0200
; 32 bit x 32 bit unsigned multiply, 64 bit result
;
; On Entry:
; multiplier: four byte value
; multiplicand: four byte value
; On Exit:
; result: eight byte product (note: 'result' shares memory with 'multiplier')
mult
lda #0 ;
sta result+6 ;
sta result+5 ;
sta result+4 ; 32 bits of zero in A, result+6, result+5, result+4
; (think of A as a local cache of result+7)
; Note: First 8 shifts are A -> result+6 -> result+5 -> result+4 -> result
; Next 8 shifts are A -> result+6 -> result+5 -> result+4 -> result+1
; Next 8 shifts are A -> result+6 -> result+5 -> result+4 -> result+2
; Final 8 shifts are A -> result+6 -> result+5 -> result+4 -> result+3
ldx #$fc ; count for outer loop. Loops four times.
; outer loop (4 times)
outer_loop
ldy #8 ; count for inner loop
lsr result+4,x ; think "result" then later "result+1" then "result+2" then "result+3"
; inner loop (8 times)
inner_loop
bcc +
; (result+4, result+5, result+6, A) += (multiplicand, multiplicand+1, multiplicand+2. multiplicand+3)
sta result+7 ; remember A
lda result+4
clc
adc multiplicand
sta result+4
lda result+5
adc multiplicand+1
sta result+5
lda result+6
adc multiplicand+2
sta result+6
lda result+7 ; recall A
adc multiplicand+3
+
ror ; shift
ror result+6 ;
ror result+5 ;
ror result+4 ;
ror result+4,x ; think "result" then later "result+1" then "result+2" then "result+3"
dey
bne inner_loop ; go back for 1 more shift?
inx
bne outer_loop ; go back for 8 more shifts?
sta result+7 ;
rts