-
Notifications
You must be signed in to change notification settings - Fork 6
/
mult86.a
175 lines (148 loc) · 3.32 KB
/
mult86.a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
; mult86.a
; from 6502.org, by Repose: http://forum.6502.org/viewtopic.php?p=106519#p106519
;
; 16 bit x 16 bit unsigned multiply, 32 bit result
; Average cycles: 187.07
; 2170 bytes
; How to use:
; call jsr init, before first use
; put numbers in (x0,x1) and (y0,y1) and result is (z3, A, Y, z0)
; pointers to square tables
p_sqr_lo1 = $8b ; 2 bytes
p_sqr_hi1 = $8d ; 2 bytes
p_neg_sqr_lo = $8f ; 2 bytes
p_neg_sqr_hi = $91 ; 2 bytes
p_sqr_lo2 = $93 ; 2 bytes
p_sqr_hi2 = $95 ; 2 bytes
; the inputs and outputs
x0 = p_sqr_lo1 ; multiplier, 2 bytes
x1 = p_sqr_lo2
y0 = $04 ; multiplicand, 2 bytes
y1 = $05
z0 = $06 ; product, 2 bytes + 2 registers
; z1 = $07 returned in Y reg
; z2 = $08 returned in A reg
z3 = $09 ;
* = $0200
; Align tables to start of page
; Note - the last byte of each table is never referenced, as a+b<=510
sqrlo
!for i, 0, 511 {
!byte <((i*i)/4)
}
sqrhi
!for i, 0, 511 {
!byte >((i*i)/4)
}
negsqrlo
!for i, 0, 511 {
!byte <(((255-i)*(255-i))/4)
}
negsqrhi
!for i, 0, 511 {
!byte >(((255-i)*(255-i))/4)
}
; Diagram of the additions
; y1 y0
; x x1 x0
; --------
; x0y0h x0y0l
; + x0y1h x0y1l
; + x1y0h x1y0l
; +x1y1h x1y1l
; ------------------------
; z3 z2 z1 z0
umult16
; set multiplier as x1
lda x1
sta p_sqr_hi1
eor #$ff
sta p_neg_sqr_lo
sta p_neg_sqr_hi
; set multiplicand as y0
ldy y0
; x1y0l = low(x1*y0)
; x1y0h = high(x1*y0)
sec
lda (p_sqr_lo2),y
sbc (p_neg_sqr_lo),y
sta x1y0l+1
lda (p_sqr_hi1), y
sbc (p_neg_sqr_hi),y
sta x1y0h+1
; set multiplicand as y1
ldy y1
; x1y1l = low(x1*y1)
; z3 = high(x1*y1)
lda (p_sqr_lo2),y
sbc (p_neg_sqr_lo),y
sta x1y1l+1
lda (p_sqr_hi1),y
sbc (p_neg_sqr_hi),y
sta z3
; set multiplier as x0
lda x0
sta p_sqr_hi2
eor #$ff
sta p_neg_sqr_lo
sta p_neg_sqr_hi
; x0y1l = low(x0*y1)
; X = high(x0*y1)
lda (p_sqr_lo1),y
sbc (p_neg_sqr_lo),y
sta x0y1l+1
lda (p_sqr_hi2),y
sbc (p_neg_sqr_hi),y
tax
; set multiplicand as y0
ldy y0
; z0 = low(x0*y0)
; A = high(x0*y0)
lda (p_sqr_lo1),y
sbc (p_neg_sqr_lo),y
sta z0
lda (p_sqr_hi2),y
sbc (p_neg_sqr_hi),y
clc
do_adds
; add the first two numbers of column 1
x0y1l
adc #0 ; x0y0h + x0y1l
tay
; continue to first two numbers of column 2
txa
x1y0h
adc #0 ; x0y1h + x1y0h
tax ; X=z2 so far
bcc +
inc z3 ; column 3
clc
; add last number of column 1
+
tya
x1y0l
adc #0 ; + x1y0l
tay ; Y=z1
; add last number of column 2
txa
x1y1l
adc #0 ; + x1y1l
bcc fin ; A=z2
inc z3 ; column 3
fin
rts
; Once only initialisation
; this could set up the pointer values in a loop to save memory
; it could also generate the square tables in code rather than load them
init
lda #>sqrlo
sta p_sqr_lo2+1
sta p_sqr_lo1+1
lda #>sqrhi
sta p_sqr_hi1+1
sta p_sqr_hi2+1
lda #>negsqrlo
sta p_neg_sqr_lo+1
lda #>negsqrhi
sta p_neg_sqr_hi+1
rts