-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path_parser.s
1637 lines (1365 loc) · 26.8 KB
/
_parser.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
.include "src/zif.inc"
.include "src/_globals.inc"
.section .zp, "zax", @nobits
; Parser temporaries
parsecmd: .word 0 ; points into parser bytecode
parsefailstackptr: .byte 0 ; stack pointer to rewind to
parsestartstackptr: .byte 0 ; stack pointer when parse starts
bytecodeptr: .byte 0 ; offset into bytecode buffer (TODO: remove)
.data 11
pop_exec_hi:
.data 12
pop_exec_lo:
.macro defpop exec
.data 11
.byte (\exec-1)@mos16hi
.data 12
.byte (\exec-1)@mos16lo
.endm
P_TRY_OP = 0
P_JSR_OP = 1
P_JMP_OP = 2
P_FAIL_OP = 3
P_PANIC_OP = 4
P_RETURN_OP = 5
P_COMMIT_OP = 6
P_TOKEN_OP = 7
P_EAGER_TOKEN_OP = 8
P_EMIT_OP = 9
P_EMIT4_OP = 10
P_SHUNT_OP = 11
P_FLUSH_OP = 12
P_WORD_OP = 13
P_NUMBER_OP = 14
P_EOL_OP = 15
P_PAREN_OP = 16
P_SUCCEED_OP = 17
P_STRING_OP = 18
P_PARAM_OP = 19
P_FLUSH_AND_EMIT_VARARG_OP = 20
defpop pop_try
defpop pop_jsr
defpop pop_jmp
defpop pop_fail
defpop pop_panic
defpop pop_return
defpop pop_commit
defpop pop_token
defpop pop_eager_token
defpop pop_emit
defpop pop_emit4
defpop pop_shunt
defpop pop_flush
defpop pop_word
defpop pop_number
defpop pop_eol
defpop pop_paren
defpop pop_succeed
defpop pop_string
defpop pop_param
defpop pop_flush_and_emit_vararg
zproc parse_line
lda #0
sta bytecode_buffer+LINE_NUMBER+0
sta bytecode_buffer+LINE_NUMBER+1
sta bytecode_buffer+LINE_INDENT
ldx #LINE_DATA
stx bytecodeptr
ldx #0
stx opstackptr
tsx
stx parsestartstackptr
lda #<immediate_statement_rule
sta parsecmd+0
lda #>immediate_statement_rule
sta parsecmd+1
lda #<pop_succeed_ptr
pha
lda #>pop_succeed_ptr
pha
zendproc
; fall through
zproc parse_state_machine
ldy #0
lda (parsecmd), y
tax
lda pop_exec_hi, x
pha
lda pop_exec_lo, x
pha
.if 0
tsx
txa
jsr print_hex8_number
jsr print_space
lda parsecmd+0
ldx parsecmd+1
jsr print_hex16_number
jsr print_nl
.endif
rts
zendproc
; Adds A to parsecmd.
; Preserves X and Y.
parse_advance_3:
lda #3
bne parse_advance ; always taken
parse_advance_2:
lda #2
bne parse_advance ; always taken
parse_advance_1:
lda #1
zproc parse_advance
clc
adc parsecmd+0
sta parsecmd+0
zif_cs
inc parsecmd+1
zendif
rts
zendproc
parse_next_2:
lda #2
bne parse_next ; always taken
parse_next_1:
lda #1
zproc parse_next
jsr parse_advance
jmp parse_state_machine
zendproc
; Pushes A onto the bytecode queue.
; Preserves Y.
zproc queue_bytecode
ldx bytecodeptr
sta bytecode_buffer, x
inc bytecodeptr
rts
zendproc
; Shunts the operator A through the shunting yard algorithm.
zproc shunt_operator
tay ; Y contains operator throughout
jmp 1f
zrepeat
lda operator_stack, x
zbreakif_eq ; if TOS is 0, it's a parenthesis
tax
lda token_precedence_table, x ; priority of operator
cmp token_precedence_table, y ; priority of TOS
zbreakif_cc ; stack if TOS < operator
dec opstackptr
txa
jsr queue_bytecode
1:
ldx opstackptr
zuntil_eq
; Stack the operator onto the operator stack.
stack_operator:
ldx opstackptr
inx
tya
sta operator_stack, x
stx opstackptr
rts
zendproc
; Unstacks operators down to and including a parenthesis marker (or the stack
; is empty).
;
; Returns:
; A = the parameter count, if this is a parameter list
zproc unstack_operators
zloop
ldx opstackptr
zif_eq
rts
zendif
lda operator_stack, x
zif_eq ; parenthesis marker
lda operator_stack-1, x
dec opstackptr
dec opstackptr
rts
zendif
dec opstackptr
jsr queue_bytecode
zendloop
zendproc
pop_succeed_ptr:
.byte P_SUCCEED_OP
zproc pop_succeed
jsr unstack_operators
lda #TOKEN_EOL
ldx bytecodeptr
sta bytecode_buffer, x
inx
stx bytecode_buffer+LINE_SIZE
ldx parsestartstackptr
txs
rts
zendproc
zproc pop_panic
ldx parsestartstackptr
txs
lda #<1f
ldx #>1f
jsr print_string
ldx text_buffer+1
zif_ne
lda #0
sta p0+0
zrepeat
ldx p0+0
lda text_buffer+2, x
jsr platform_putchar
inc p0+0
ldx p0+0
cpx text_buffer+1
zuntil_eq
jsr print_nl
lda #2
sta p0+0
zloop
ldx p0+0
cpx bufferpos
zbreakif_eq
jsr print_space
inc p0+0
zendloop
lda #'^'
jsr platform_putchar
jsr print_nl
zendif
jmp error_return
1:
.ascii "\r\nSyntax error\r"
.byte '\n'|0x80
zendproc
zproc pop_try
; Read parameter into p0.
ldy #1
lda (parsecmd), y
sta p0+0
iny
lda (parsecmd), y
sta p0+1
; Advance.
jsr parse_advance_3
.if 0
lda #'>'
jsr platform_putchar
lda parsecmd+0
ldx parsecmd+1
jsr print_hex16_number
jsr print_nl
.endif
; Push backtracking state.
lda parsefailstackptr
pha
lda parsecmd+0
pha
lda parsecmd+1
pha
lda bufferpos
pha
lda bytecodeptr
pha
lda opstackptr
pha
tsx
stx parsefailstackptr
; Push the marker indicating that this is a backtracking state.
lda #0
pha
pha
; Switch to rule being tried.
lda p0+0
sta parsecmd+0
lda p0+1
sta parsecmd+1
jmp parse_state_machine
zendproc
; We can't backtrack any more --- if we get a parse failure, panic.
zproc pop_commit
lda parsefailstackptr
pha
lda #<pop_panic_ptr
pha
lda #>pop_panic_ptr
pha
lda bufferpos
pha
lda bytecodeptr
pha
lda opstackptr
pha
tsx
stx parsefailstackptr
; Push the marker indicating that this is a backtracking state.
lda #0
pha
pha
jmp parse_next_1
zendproc
pop_panic_ptr:
.byte P_PANIC_OP
; Revert to a backtracking state.
zproc pop_fail
; Revert the state.
.if 0
lda #'<'
jsr platform_putchar
jsr print_nl
.endif
ldx parsefailstackptr
txs
pla
sta opstackptr
pla
sta bytecodeptr
pla
sta bufferpos
pla
sta parsecmd+1
pla
sta parsecmd+0
pla
sta parsefailstackptr
jmp parse_state_machine
zendproc
zproc pop_jsr
; Read parameter into p0.
ldy #1
lda (parsecmd), y
sta p0+0
iny
lda (parsecmd), y
sta p0+1
; Advance.
jsr parse_advance_3
; Push return point.
lda parsecmd+0
pha
lda parsecmd+1
pha
; Jump to the new rule.
lda p0+0
sta parsecmd+0
lda p0+1
sta parsecmd+1
jmp parse_state_machine
zendproc
zproc pop_jmp
; Read parameter into parsecmd.
ldy #1
lda (parsecmd), y
tax
iny
lda (parsecmd), y
stx parsecmd+0
sta parsecmd+1
jmp parse_state_machine
zendproc
zproc pop_return
; Pop the return point.
pla
sta parsecmd+1
pla
sta parsecmd+0
; Was this a backtracking marker?
lda parsecmd+0
ora parsecmd+1
zif_eq
; It was. Discard the backtracking state and return again.
pla
pla
pla
pla
pla
pla
sta parsefailstackptr
jmp pop_return
zendif
jmp parse_state_machine
zendproc
zproc pop_shunt
ldy #1
lda (parsecmd), y
jsr shunt_operator
jmp parse_next_2
zendproc
zproc pop_paren
ldy #0
jsr stack_operator
jsr stack_operator
jmp parse_next_1
zendproc
zproc pop_param
ldx opstackptr
lda operator_stack, x
zif_ne
brk
zendif
inc operator_stack-1, x
zif_eq
jmp error_invalid_parameter
zendif
jmp parse_next_1
zendproc
zproc pop_flush
jsr unstack_operators
jmp parse_next_1
zendproc
zproc pop_flush_and_emit_vararg
jsr unstack_operators
pha
ldy #1
lda (parsecmd), y
jsr queue_bytecode
pla
jsr queue_bytecode
jmp parse_next_2
zendproc
zproc parser_skip_whitespace
zloop
ldx bufferpos
lda text_buffer, x
cmp #' '
zbreakif_ne
inc bufferpos
zendloop
rts
zendproc
; Fetch an identifier and set p0/p1 to the atom pointer and ID.
zproc parser_fetch_word
jsr parser_skip_whitespace
lda bufferpos
sta bufferend
ldx bufferend
lda text_buffer, x
jsr is_first_letter_of_word
zif_cs
zrepeat
inc bufferend
ldx bufferend
lda text_buffer, x
jsr is_subsequent_letter_of_word
zuntil_cc
lda text_buffer, x
cmp #'#'
beq 1f
cmp #'$'
zif_eq
1:
inc bufferend
zendif
zendif
lda bufferpos
cmp bufferend
beq 1f
jsr get_atom
lda bufferend
sta bufferpos
rts
1:
jmp pop_fail
zendproc
; Used by pop*token. Leaves the buffer position in X.
zproc skip_token
jsr parser_skip_whitespace
ldx bufferpos
dex
ldy #0 ; 1 - 1
zrepeat
inx
iny
lda text_buffer, x
jsr toupper
eor (parsecmd), y
and #0x7f
zif_ne
jmp pop_fail
zendif
lda (parsecmd), y
zuntil_mi
inx
stx bufferpos
rts
zendproc
zproc pop_token
jsr skip_token
; Check that this is the end of a word.
lda text_buffer, x
jsr is_subsequent_letter_of_word
zif_cs
jmp pop_fail
zendif
1:
iny
tya
jsr parse_advance
jmp parse_state_machine
zendproc
zproc pop_eager_token
jsr skip_token
jmp 1b
zendproc
zproc pop_emit
ldy #1
lda (parsecmd), y
jsr queue_bytecode
jmp parse_next_2
zendproc
zproc pop_emit4
ldy #1
lda (parsecmd), y
jsr queue_bytecode
ldy #3
zrepeat
lda #0
jsr queue_bytecode
dey
zuntil_eq
jmp parse_next_2
zendproc
zproc pop_word
jsr parser_fetch_word
ldy #1
lda (parsecmd), y
jsr queue_bytecode
sta bytecode_buffer, x
lda p1+0
jsr queue_bytecode
lda p1+1
jsr queue_bytecode
jmp parse_next_2
zendproc
zproc pop_eol
jsr parser_skip_whitespace
ldx bufferpos
lda text_buffer, x
zif_ne
jmp pop_fail
zendif
jmp parse_next_1
zendproc
zproc emit_i0
lda i0+1
ora i0+2
ora i0+3
zif_eq
; Top three bytes are zero.
lda i0+0
zif_eq
lda #TOKEN_ZERO
jmp queue_bytecode
zendif
cmp #1
zif_eq
lda #TOKEN_ONE
jmp queue_bytecode
zendif
zendif
lda i0+0
and i0+1
and i0+2
and i0+3
cmp #0xff
zif_eq ; is the number minus one?
lda #TOKEN_MONE
jmp queue_bytecode
zendif
lda #TOKEN_INTEGER
jsr queue_bytecode
lda i0+0
jsr queue_bytecode
lda i0+1
jsr queue_bytecode
lda i0+2
jsr queue_bytecode
lda i0+3
jsr queue_bytecode
rts
zendproc
zproc pop_number
jsr buffer_parse_int
zif_cs
1:
jmp pop_fail
zendif
jsr emit_i0
jmp parse_next_1
zendproc
zproc pop_string
jsr parser_skip_whitespace
; Check for the leading double quote.
ldx bufferpos
lda text_buffer, x
cmp #'"'
bne 1b ; fail
; Find the bounds of the string in the text buffer.
zrepeat
inx
lda text_buffer, x
zif_eq
jmp pop_panic
zendif
cmp #'"'
zuntil_eq
stx bufferend
; Emit the bytecode.
lda #TOKEN_STRING
jsr queue_bytecode
inc bufferpos
lda bufferend
sec
sbc bufferpos
jsr queue_bytecode
zloop
ldx bufferpos
cpx bufferend
zbreakif_eq
lda text_buffer, x
jsr queue_bytecode
inc bufferpos
zendloop
inc bufferpos ; skip over trailing double quota
jmp parse_next_1
zendproc
.macro P_TRY rule
.byte P_TRY_OP
.word \rule
.endm
.macro P_JSR rule
.byte P_JSR_OP
.word \rule
.endm
.macro P_JMP rule
.byte P_JMP_OP
.word \rule
.endm
.macro P_FAIL
.byte P_FAIL_OP
.endm
.macro P_PANIC
.byte P_PANIC_OP
.endm
.macro P_RETURN
.byte P_RETURN_OP
.endm
.macro P_COMMIT
.byte P_COMMIT_OP
.endm
.macro P_TOKEN bytes:vararg
.byte P_TOKEN_OP
.byte \bytes
.endm
.macro P_EAGER_TOKEN bytes:vararg
.byte P_EAGER_TOKEN_OP
.byte \bytes
.endm
.macro P_EMIT token
.byte P_EMIT_OP
.byte \token
.endm
.macro P_EMIT4 token
.byte P_EMIT4_OP
.byte \token
.endm
.macro P_SHUNT token
.byte P_SHUNT_OP
.byte \token
.endm
.macro P_FLUSH
.byte P_FLUSH_OP
.endm
.macro P_WORD token
.byte P_WORD_OP
.byte \token
.endm
.macro P_NUMBER
.byte P_NUMBER_OP
.endm
.macro P_EOL
.byte P_EOL_OP
.endm
.macro P_PAREN
.byte P_PAREN_OP
.endm
.macro P_STRING
.byte P_STRING_OP
.endm
.macro P_PARAM
.byte P_PARAM_OP
.endm
.macro P_FLUSH_AND_EMIT_VARARG token
.byte P_FLUSH_AND_EMIT_VARARG_OP
.byte \token
.endm
.macro keyword name
keyword_\name = . + 1
.global keyword_\name
.endm
immediate_statement_rule:
P_TRY lineentry_rule
P_TRY list_rule
P_TRY del_rule
P_TRY renum_rule
P_TRY run_rule
P_TRY con_rule
P_TRY enter_rule
statement_rule:
P_TRY repeat_rule
P_TRY until_rule
P_TRY while_rule
P_TRY endwhile_rule
P_TRY for_rule
P_TRY endfor_rule
P_TRY if_rule
P_TRY else_rule
P_TRY elif_rule
P_TRY endif_rule
P_TRY proc_or_func_rule
P_TRY endproc_rule
P_TRY endfunc_rule
P_TRY eol_rule
simple_statement_rule:
P_TRY print_rule
P_TRY input_rule
P_TRY stop_rule
P_TRY bye_rule
P_TRY end_rule
P_TRY return0_rule
P_TRY return1_rule
P_TRY randomize_rule
P_TRY proccall_rule
P_TRY assignment_rule
P_PANIC
lineentry_rule:
P_NUMBER
P_COMMIT
P_EMIT TOKEN_LINEENTRY
P_JMP statement_rule
list_rule:
keyword list
P_TOKEN 'L', 'I', 'S', 'T'|0x80
P_COMMIT
P_TRY list_to_file_rule
P_TRY empty_list_rule
P_JSR line_range_rule
P_EMIT TOKEN_LIST
P_EOL
P_RETURN
list_to_file_rule:
P_STRING
P_EMIT TOKEN_LISTTOF
P_EOL
P_RETURN
empty_list_rule:
P_EOL
P_EMIT TOKEN_ZERO
P_EMIT TOKEN_ZERO
P_EMIT TOKEN_LIST
P_RETURN
del_rule:
P_TOKEN 'D', 'E', 'L'|0x80
P_COMMIT
P_JSR line_range_rule
P_EMIT TOKEN_DEL
P_EOL
P_RETURN
renum_rule:
P_TOKEN 'R', 'E', 'N', 'U', 'M'|0x80
P_COMMIT
P_JSR first_optional_number_or_zero_rule
P_JSR second_optional_number_or_zero_rule
P_EOL
P_EMIT TOKEN_RENUM
P_RETURN
second_optional_number_or_zero_rule:
P_TRY comma_then_number_rule
P_EMIT TOKEN_ZERO
P_RETURN
first_optional_number_or_zero_rule:
P_TRY number_rule
P_EMIT TOKEN_ZERO
P_RETURN
comma_then_number_rule:
P_EAGER_TOKEN ','|0x80
number_rule:
P_NUMBER
P_RETURN
line_range_rule:
P_TRY open_left_line_range_rule
P_TRY single_line_range_rule
P_NUMBER
P_EAGER_TOKEN '-'|0x80
P_TRY eol_line_range_rule
P_NUMBER
P_RETURN
single_line_range_rule:
P_NUMBER
P_EMIT TOKEN_ZERO
P_EOL
P_RETURN
open_left_line_range_rule:
P_EAGER_TOKEN '-'|0x80
P_EMIT TOKEN_ZERO
P_NUMBER
P_RETURN
eol_line_range_rule:
P_EOL
P_EMIT TOKEN_MONE
P_RETURN
run_rule:
P_TOKEN 'R', 'U', 'N'|0x80
P_EMIT TOKEN_RUN
P_EOL
P_RETURN