This repository has been archived by the owner on Feb 26, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
RE_ReaderWriter.cpp
196 lines (187 loc) · 5.82 KB
/
RE_ReaderWriter.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/**
* @file RE_ReaderWriter.cpp
* Implementation of the reader and writer class for regular expressions.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
#include "RE_ReaderWriter.hpp"
#include "RE_TreeNode.hpp"
#include <cstring>
#include <string>
#include <iostream>
#include <fstream>
/**
* Reads a file and parses the regular expression in it.
* The file must contain only a single line, which in turn contains the regular expression.
* @param filename The path to the input file. The path is relative to the working directory, but can also be defined absolutely.
* @see RegularExpression
* @author Daniel Dreibrodt, Konstantin Steinmiller
* @return The regular expression defined in the given file.
*/
RegularExpression *REReaderWriter::read(string filename) {
ifstream file (filename.c_str());
string str((istreambuf_iterator<char>(file)), istreambuf_iterator<char>());
return new RegularExpression(str);
}
/**
* Parses a string and returns the represented regular expression.
* @author Daniel Dreibrodt, Konstantin Steinmiller
* @return The regular expression defined in the given string.
* @param str The null-terminated string representing the regular expression.
*/
RegularExpression *REReaderWriter::parse(const char str[]) {
string s = str;
return new RegularExpression(s);
}
/**
* Parses a regular expression string and builds a regular expression
* tree. The root node of that tree is returned.
* @param string The regular expression string.
* @param pos The position of the parser in the regular expression.
* @param len The length of the whole regular expression string.
* @return The root node of the expression tree.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
RETreeNode *REReaderWriter::parseNode(const char string[], int *pos, int len) {
if(*pos>=len) {
//end of string reached
return NULL;
}
switch(string[*pos]) {
case '(' : {
(*pos)++;
RETreeNode *groupNode = parseNode(string, pos, len);
return parseNode(groupNode, string, pos, len);
break;
}
case ')' : {
throw "Read ')' but that is not allowed here!";
break;
}
case '|' : {
throw "Read '|' but that is not allowed here!";
break;
}
case '.' : {
throw "Read '.' but that is not allowed here!";
break;
}
case '*' : {
throw "Read '*' but that is not allowed here!";
break;
}
case ' ' : {
(*pos)++;
return parseNode(string, pos, len);
break;
}
default : {
RETreeNode *lit = parseLiteral(string,pos,len);
return parseNode(lit, string, pos, len);
break;
}
}
}
/**
* Parses part of a regular expression string and builds a regular expression
* tree with the given left subtree. The root node of the new tree is returned.
* @param left The left subtree.
* @param string The regular expression string.
* @param pos The position of the parser in the regular expression.
* @param len The length of the whole regular expression string.
* @return The new root node of the expression tree.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
RETreeNode *REReaderWriter::parseNode(RETreeNode *left, const char string[], int *pos, int len) {
if(*pos>=len) {
//end of string reached, return last known tree root
return left;
}
switch(string[*pos]) {
case '(' : {
throw "Read '(' but that is not allowed here!";
break;
}
case ')' : {
(*pos)++;
return left;
break;
}
case '|' : {
RETreeNode *opNode = new RETreeNode("|");
(*pos)++;
opNode->setLeft(left);
opNode->setRight(parseNode(string, pos, len));
return opNode;
break;
}
case '.' : {
RETreeNode *opNode = new RETreeNode(".");
(*pos)++;
opNode->setLeft(left);
opNode->setRight(parseNode(string, pos, len));
return opNode;
break;
}
case '*' : {
RETreeNode *opNode = new RETreeNode("*");
(*pos)++;
opNode->setLeft(left);
return parseNode(opNode, string, pos, len);
break;
}
case ' ' : {
(*pos)++;
return parseNode(left, string, pos, len);
break;
}
default : {
RETreeNode *lit = parseLiteral(string,pos,len);
return parseNode(lit, string, pos, len);
break;
}
}
}
/**
* Parses a literal from a regular expression string.
* The literal is built by reading from the start position until an illegal character is found.
* Illegal characters are spaces and operators.
* @param str The input string
* @param pos The parsing position
* @param len The input string length
* @return A node containing the literal.
*/
RETreeNode *REReaderWriter::parseLiteral(const char str[], int *pos, int len) {
string s = "";
while(1) {
s += str[*pos];
(*pos)++;
if(*pos >= len) {
break;
}
//Spaces and operators are not allowed in literals
if(str[*pos] == ' ' || str[*pos] == '.' || str[*pos] =='|' || str[*pos] == '*' || str[*pos] == '(' || str[*pos] == ')') {
break;
}
}
return new RETreeNode(s);
}
/**
* Creates a string representation of the given regular expression.
* @param re The regular expression.
* @return The string representation of the regular expression.
* @author Daniel Dreibrodt
*/
string REReaderWriter::writeToString(RegularExpression *re) {
return re->toString();
}
/**
* Writes a string representation of the given regular expression into a file.
* @param re The regular expression.
* @param filename The path to the output file.
* @author Daniel Dreibrodt
*/
void REReaderWriter::writeToFile(RegularExpression *re, const char *filename) {
ofstream file(filename, std::ios_base::binary);
file << writeToString(re);
file.close();
}