forked from felix-lang/dypgen
-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
dyp2gram.pl
120 lines (77 loc) · 2.68 KB
/
dyp2gram.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/perl
# small utility perl script to generate a human readable grammar from a dypgen
# file...
# Probably full of bugs!
# printf help if number of arguments different from 2
if (scalar(@ARGV) != 2) {
print "usage: $0 input.dyp output.txt\n";
exit -1;
}
$dypgen_file = $ARGV[0];
open (DYP_FILE, "<$dypgen_file") || die "Can't open $dypgen_file!\n";
$output_file = $ARGV[1];
open (GRAM_FILE, ">$output_file") || die "Can't open $output_file!\n";
## remove OCAML preamble
## the first line should contain a '{' on the first line.
## it removes all lines until a line ending with a '}'
sub remove_caml_preamble {
$line = <DYP_FILE> ; chop $line ;
if ($line =~ /^\s*{/) {
while ($line=<DYP_FILE>) {
if ($line =~ /}\s*$/) {return 0;}
}}
else {return 1};
}
sub process_preamble {
while ($line=<DYP_FILE>) {
# end of preamble...
if ($line =~ /^(%%)|(%parser)|(%lexer)/) {return 0;}
# starting rule
elsif ($line =~ /^%start/) { $line =~ s/\s*<.*>\s*/ / ; print GRAM_FILE "$line" }
# tokens with definition and substitution
elsif ($line =~ /%token\s+(<.*>\s*)?(\w+)\s+\/\*\s*:=\s*\'(.*)\'\s+->\s*\'(.*)\'\s*\*\/$/) {
$token = "$2" ;
$subst = "$4" ;
$def = $3 ;
$substitution{$token} = $subst;
print GRAM_FILE "%token $token := $def\n" }
# tokens with token definition
elsif ($line =~ /%token\s+(<.*>\s*)?(\w+)\s+\/\*\s*:=\s*\'(.*)\'.*\*\/$/) {
$token = $2 ;
$def = $3 ;
print GRAM_FILE "%token $token := $def\n" }
# token with substitution
elsif ($line =~ /%token\s+(<.*>\s*)?(\w+)\s+\/\*\s*->\s*\'(.*)\'.*\*\/$/) {
$token = "$2" ;
$subst = "$3" ;
$substitution{$token} = $subst
}
#all the rest is unchanged
else {print GRAM_FILE "$line";}
}
}
sub remove_action {
while ($line=<DYP_FILE>) {
if ($line =~ /^[^}]*}\s*$/) {return ""}
elsif ($line =~ /^[^}]*}\s*(\w+)\s*/) {return "$1\n"}
}}
sub remove_comment {
while ($line=<DYP_FILE>) {
if ($line =~ /^.* \*\/\s*$/) {return ""}
elsif ($line =~ /^.*\*\/\s*(\w+)\s*/) {return "$1\n"}
}}
remove_caml_preamble ();
process_preamble ();
while ($line=<DYP_FILE>) {
if ($line =~ /^(\s*){.*}\s*$/) {$line = "";}
elsif ($line =~ /^([^{]*){.*}(.*)$/) {$line = "$1 $2\n"}
elsif ($line =~ /(.*){[^}]*/) { $line = $1 . remove_action();
if ($line =~ /^\s*$/) { next } }
elsif ($line =~ /(.*)\/\*--/) {$line = $1 . remove_comment();}
foreach $token (keys(%substitution)) { $line =~ s/(\W)$token(\W)/$1\"$substitution{$token}\"$2/g }
print GRAM_FILE "$line";
}
close (DYP_FILE) ;
close (GRAM_FILE) ;
print "Grammar generated in $output_file.\n";
exit 0;