-
Notifications
You must be signed in to change notification settings - Fork 2
/
lexer.h
178 lines (169 loc) · 5.88 KB
/
lexer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
//
// Created by Alex
//
#ifndef MYLIBS_LEXER_H
#define MYLIBS_LEXER_H
#include <string_view>
#include <fstream>
#include "regex.h"
namespace alex {
class StreamIter {
std::istream *stream = nullptr;
int chr = 0;
public:
using iterator_category = typename std::input_iterator_tag;
using value_type = char;
using difference_type = long;
using pointer = const char *;
using reference = char &;
constexpr StreamIter() = default;
StreamIter(std::istream &os) : stream(&os) {
++*this;
}
inline bool empty() const { return stream == nullptr; }
inline bool eof() const { return !stream || (stream && stream->eof()); }
inline const int operator*() const { return chr; }
inline StreamIter&operator++() {
chr = (!empty()) ? stream->get() : 0;
return *this;
}
inline bool operator==(const StreamIter &rhs) {
return rhs.empty() && eof();
}
inline bool operator!=(const StreamIter &rhs) {
return !(*this == rhs);
}
};
class FileStreamWrapper {
private:
std::fstream file;
public:
FileStreamWrapper(const char *path) {
file.open(path, std::ios::in);
}
~FileStreamWrapper() {
if (file.is_open()) {
file.close();
}
}
StreamIter begin() {
return StreamIter(this->file);
}
StreamIter end() { return StreamIter(); }
};
template<class char_t = char>
class StringIter {
const char_t *current = nullptr;
public:
using iterator_category = typename std::input_iterator_tag;
using value_type = char_t;
using difference_type = long;
using pointer = const char_t *;
using reference = char_t &;
constexpr StringIter() = default;
constexpr StringIter(const char_t *current) : current(current) {}
inline bool empty() const { return current == nullptr; }
inline const char operator*() const { return current ? *current : 0; }
inline StringIter&operator++() {
current++;
return *this;
}
inline bool operator==(const StringIter &rhs) {
return rhs.empty() && **this == '\0';
}
inline bool operator!=(const StringIter &rhs) {
return !(*this == rhs);
}
};
template <class iter_t = StringIter<>, class symbol_t = int>
class Lexer {
public:
using RegexGen = RegexGenerator<symbol_t>;
using char_t = typename std::iterator_traits<iter_t>::value_type;
using string_t = std::basic_string<char_t>;
RegexGen generator;
using RegexState = typename RegexGen::State;
using RegexTransition = typename RegexGen::Transition;
std::vector<std::unique_ptr<RegexState>> state_machine;
std::vector<std::unique_ptr<RegexState>> whitespace;
iter_t current;
iter_t last;
string_t lexeme_;
int line_ = 0;
int position_ = 0;
int token_start = 0;
int token_line_start = 0;
symbol_t token_symbol;
public:
Lexer() = default;
void set_whitespace(std::string_view pattern) {
RegexParser parser(pattern.data(), pattern.data() + pattern.size());
RegexGen space_generator;
space_generator.feed(parser.parse_concat(), 1);
whitespace = std::move(space_generator.generate());
}
void add_pattern(std::string_view pattern, symbol_t symbol) {
RegexParser parser(pattern.data(), pattern.data() + pattern.size());
generator.feed(parser.parse_concat(), symbol);
}
void add_literal(std::string_view pattern, symbol_t symbol) {
RegexParser parser(pattern.data(), pattern.data() + pattern.size());
generator.feed(parser.parse_literal(), symbol);
}
void generate_states() {
state_machine = std::move(generator.generate());
}
void reset(iter_t begin, iter_t end) {
this->current = begin;
this->last = end;
}
void advance() {
while (advance_symbol(whitespace[0].get()) != RegexGen::SymbolNull);
token_start = position_;
symbol_t symbol = advance_symbol(state_machine[0].get());
if (symbol == RegexGen::SymbolNull && current != last) {
std::cout << "Unexpect char: " << *current << " line:" << line() << std::endl;
}
token_symbol = symbol;
}
inline string_t &lexeme() { return lexeme_; }
inline symbol_t symbol() { return token_symbol; }
inline int line() { return line_; }
inline int column() { return token_start - token_line_start; }
void dump() {
do {
advance();
std::cout << "{" << lexeme() << "} -> " << int(symbol()) << std::endl;
} while (symbol() != 0);
exit(0);
}
bool eof() {
return current == last;
}
private:
symbol_t advance_symbol(RegexState *begin) {
lexeme_.clear();
RegexState *state = begin;
do {
if (current == last) {
return state->symbol;
}
auto *trans = state->find_trans(*current);
if (trans) {
lexeme_ += *current;
state = trans->state;
++position_;
if (*current == '\n') {
++line_;
token_line_start = position_;
}
++current;
} else {
break;
}
} while (true);
return state->symbol;
}
};
}
#endif //MYLIBS_LEXER_H